In [None]:
import os

from pathlib import Path

import matplotlib.pyplot as plt
import xarray as xr

from sdm_eurec4a import REPOSITORY_PATH
from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    handler_map_alpha,
    adjust_lightness_array,
    plot_colors,
)
from sdm_eurec4a.reductions import x_y_flatten, shape_dim_as_dataarray
from sdm_eurec4a.input_processing.models import linear_func, split_linear_func
from matplotlib import rc

import numpy as np
from lmfit import Model

In [None]:
REPOSITORY_ROOT = REPOSITORY_PATH()
# THE PATH TO THE SCRIPT DIRECTORY
# print(REPOSITORY_ROOT)

C:\Users\Niebaum\Documents\Repositories\sdm-eurec4a


In [None]:
plt.style.use("default")
default_colors = set_custom_rcParams()
dark_colors = adjust_lightness_array(default_colors, 0.6)
plot_colors(default_colors)
plot_colors(dark_colors)

# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
# rc('font',**{'family':'serif','serif':['Palatino']})
rc("text", usetex=False)


fig_path = REPOSITORY_ROOT / Path("results/CLEO/initilization/fitting_thermo_profiles")
fig_path.mkdir(parents=True, exist_ok=True)

### Load datasets

In [None]:
# Load data
mask_name = "cloud_mask"
chosen_id = 1421

subfig_path = fig_path / Path(f"{mask_name}_{chosen_id}")
subfig_path.mkdir(parents=True, exist_ok=True)
# mask_name = "rain_mask"
# chosen_id = 77

identified_clouds = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(
        f"data/observation/cloud_composite/processed/identified_clouds/identified_clouds_{mask_name}.nc"
    )
)
# select only clouds which are between 800 and 1100 m
# identified_clouds = identified_clouds.where(
#     (identified_clouds.alt >= 800) & (identified_clouds.alt <= 1100), drop=True
# )

distance_IC_DS = xr.open_dataset(
    REPOSITORY_ROOT / Path(f"data/observation/combined/distance/distances_IC_DS.nc")
)

cloud_composite = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/cloud_composite/processed/cloud_composite.nc"),
    chunks={"time": 1000},
)

drop_sondes = xr.open_dataset(
    REPOSITORY_ROOT
    / Path("data/observation/dropsonde/Level_3/EUREC4A_JOANNE_Dropsonde-RD41_Level_3_v2.0.0.nc")
)
drop_sondes = drop_sondes.rename({"launch_time": "time"})
drop_sondes = drop_sondes.swap_dims({"sonde_id": "time"})
drop_sondes = drop_sondes.sortby("time")
drop_sondes = drop_sondes.chunk({"time": -1})

## Preprocess dataset

**Create altitude 2D array**
using ``shape_dim_as_dataarray``
**Convert q units** 
from $[kg/kg]$  to $[g/kg]$

In [None]:
example_sondes = drop_sondes.sel(time=slice("2020-01-24T13:00:00", "2020-01-24T14:00:00"))
example_sondes = example_sondes.where((example_sondes.alt <= 1500), drop=True)
example_sondes["alt_array"] = shape_dim_as_dataarray(example_sondes["ta"], output_dim="alt")
example_sondes["q"] *= 1e3
example_sondes["q"].attrs.update({"units": "g kg-1"})
example_sondes["q"]

Move to using a forced fit which is the mean of all values

In [None]:
variable_names = ["ta", "q", "theta"]

In [None]:
fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(12, 4), sharey=True)

scatter_style = dict(alpha=0.75)

ax_q, ax_ta, ax_theta = axs
# First plot
ax_q.plot(
    example_sondes["q"].T,
    example_sondes["alt_array"].T,
    color=default_colors[0],
    label="Spec. Hum.",
    **scatter_style,
)

ax_q.set_xlabel("Specific humidity [g/kg]")
ax_q.set_ylabel("alt [m]")
ax_q.xaxis.label.set_color(default_colors[0])  # Set the color of x-axis label
ax_q.tick_params(axis="x", colors=default_colors[0])  # Set the color of x-axis ticks

# Second plot on a new x-axis
ax_ta.plot(
    example_sondes["ta"].T,
    example_sondes["alt_array"].T,
    color=default_colors[1],
    label="Air Temperature",
    **scatter_style,
)

ax_ta.set_xlabel("Air Temperature [K]")
ax_ta.xaxis.label.set_color(default_colors[1])  # Set the color of x-axis label
ax_ta.tick_params(axis="x", colors=default_colors[1])  # Set the color of x-axis ticks

# Thrid plot on a new x-axis
ax_theta.plot(
    example_sondes["theta"].T,
    example_sondes["alt_array"].T,
    color=default_colors[2],
    label="Pot. Temperature",
    **scatter_style,
)

ax_theta.set_xlabel("Pot. Temperature [K]")
ax_theta.xaxis.label.set_color(default_colors[2])  # Set the color of x-axis label
ax_theta.tick_params(axis="x", colors=default_colors[2])  # Set the color of x-axis ticks

#### Fit the ``split_linear_func``
For this, use the split heigth as the mean of the fitting with varying x_split for all variables

#### Use a non varying split altitude. 
This is the mean of all split values of the different variables

create the model and parameters

In [None]:
variable_names = ["ta", "q", "theta", "rh"]
model = Model(split_linear_func, independent_vars=["x"])
params = model.make_params(
    f_0={"value": 0.017},
    x_split={"value": 1000, "min": 0, "max": 400},
    slope_1={"value": 1e-2},
    slope_2={"value": 1e-3},
)

In [None]:
parameters = dict()
x_split_list = list()


# Make a first fit to get the x_split
for var in variable_names:
    result_model = model.fit(
        data=example_sondes[var].compute().data,
        params=params,
        x=example_sondes["alt_array"].compute().data,
        nan_policy="omit",
        method="leastsq",
    )
    x_split_list.append(result_model.params["x_split"].value)

x_split_list = np.array(x_split_list)
print(f"x_split has mean {np.mean(x_split_list)} and std {np.std(x_split_list)}")
print("The mean will be used")
x_split = np.mean(x_split_list)

# make sure x_split does not vary
params.add("x_split", value=x_split, vary=False)


for var in variable_names:
    result_model = model.fit(
        data=example_sondes[var].compute().data,
        params=params,
        x=example_sondes["alt_array"].compute().data,
        nan_policy="omit",
        method="leastsq",
    )
    # Create the fitted data
    fitted = result_model.eval(x=example_sondes.alt, flatten=False)
    example_sondes[f"fit_{var}"] = (("alt"), fitted)

    # Store the parameters
    parameters[var] = result_model.params

# plot parameters
for var in variable_names:
    print(var)
    parameters[var].pretty_print(columns=["value", "stderr", "vary"])

x_split has mean 398.36991676094465 and std 0.0
The mean will be used
ta
Name        Value   Stderr     Vary
f_0         299.3  0.05277     True
slope_1  -0.009923 0.000169     True
slope_2  -0.005289 4.746e-05     True
x_split     398.4        0    False
q
Name        Value   Stderr     Vary
f_0         16.68   0.1277     True
slope_1  -0.0002116 0.0004099     True
slope_2  -0.007158 0.0001165     True
x_split     398.4        0    False
theta
Name        Value   Stderr     Vary
f_0         298.1  0.05428     True
slope_1  -0.0003803 0.0001743     True
slope_2  0.004524 4.953e-05     True
x_split     398.4        0    False
rh
Name        Value   Stderr     Vary
f_0        0.7751  0.01066     True
slope_1  0.0004715 3.412e-05     True
slope_2  -0.0002798 9.584e-06     True
x_split     398.4        0    False


In [None]:
fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(12, 4), sharey=True)

scatter_style = dict(alpha=0.3, marker="o", s=2)

ax_q, ax_ta, ax_theta = axs
# First plot
ax_q.scatter(
    example_sondes["q"],
    example_sondes["alt_array"],
    color=default_colors[0],
    label="Spec. Hum.",
    **scatter_style,
)
ax_q.plot(example_sondes["fit_q"], example_sondes["alt"], color=dark_colors[0], label="fit")

ax_q.set_title("Specific humidity")
ax_q.set_xlabel("Specific humidity [g/kg]")
ax_q.set_ylabel("alt [m]")
ax_q.xaxis.label.set_color(default_colors[0])  # Set the color of x-axis label
ax_q.tick_params(axis="x", colors=default_colors[0])  # Set the color of x-axis ticks

# Second plot on a new x-axis
ax_ta.scatter(
    example_sondes["ta"],
    example_sondes["alt_array"],
    color=default_colors[1],
    label="Air Temperature",
    **scatter_style,
)
ax_ta.plot(example_sondes["fit_ta"], example_sondes["alt"], color=dark_colors[1], label="fit")

ax_ta.set_title("Air Temperature")
ax_ta.set_xlabel("Air Temperature [K]")
ax_ta.xaxis.label.set_color(default_colors[1])  # Set the color of x-axis label
ax_ta.tick_params(axis="x", colors=default_colors[1])  # Set the color of x-axis ticks

# Thrid plot on a new x-axis
ax_theta.scatter(
    example_sondes["theta"],
    example_sondes["alt_array"],
    color=default_colors[2],
    label="Pot. Temperature",
    **scatter_style,
)
ax_theta.plot(example_sondes["fit_theta"], example_sondes["alt"], color=dark_colors[2], label="fit")

ax_theta.set_title("Pot. Temperature")
ax_theta.set_xlabel("Pot. Temperature [K]")
ax_theta.xaxis.label.set_color(default_colors[2])  # Set the color of x-axis label
ax_theta.tick_params(axis="x", colors=default_colors[2])  # Set the color of x-axis ticks

for ax in axs.flatten():
    ax.axhline(x_split, color="black", linestyle="--", label="split level")
    ax.legend(handler_map=handler_map_alpha())


fig.suptitle(f"Fit of the thermodynamic profiles")
fig.tight_layout()
fig.savefig(subfig_path / Path("fit_thermo_profiles.png"), dpi=300)
fig.savefig(subfig_path / Path("fit_thermo_profiles.svg"), dpi=300)

The forced fit of using the same altitude for the split does not work.

In [None]:
def double_split_linear(
    x1: np.ndarray,
    x2: np.ndarray,
    f_0: float = 2,
    g_0: float = 2,
    f_slope_1: float = 1,
    f_slope_2: float = 2,
    g_slope_1: float = -4,
    g_slope_2: float = 0,
    x_split: float = 800,
    flatten: bool = False,
    axis: int = 0,
    order: str = "F",
):
    """
    Split the array x into two arrays at the point x_split. The first array is a linear_func function with the slope slope and the second array is a constant function with the value f_0
    """

    f = split_linear_func(x=x1, f_0=f_0, slope_1=f_slope_1, slope_2=f_slope_2, x_split=x_split)
    g = split_linear_func(x=x2, f_0=g_0, slope_1=g_slope_1, slope_2=g_slope_2, x_split=x_split)
    result = np.stack((f, g), axis=axis)
    if flatten:
        return result.flatten(order=order)
    else:
        return result


def ndim_split_linear_func(
    x: np.ndarray,
    f_0: np.ndarray,
    slopes_1: np.ndarray,
    slopes_2: np.ndarray,
    x_split: float = 800,
    axis: int = 0,
    order: str = "F",
):
    """
    Split the array x into two arrays at the point x_split. The first array is a linear_func function with the slope slope and the second array is a constant function with the value f_0
    """

    assert x.ndim != 0, "x must be a at least 1D array"

    x = np.asarray(x)
    f_0 = np.asarray(f_0)
    slopes_1 = np.asarray(slopes_1)
    slopes_2 = np.asarray(slopes_2)
    axis = int(axis)

    assert (
        (x.shape[axis],) == f_0.shape == slopes_1.shape == slopes_2.shape
    ), f"f_0, slopes_1, slopes_2 must have the same shape as x[axis]. But have x:{x.shape[axis]}, axis:{axis}, f_0:{f_0.shape}, slopes_1:{slopes_1.shape}, slopes_2:{slopes_2.shape}"

    results = []
    for idx in np.arange(x.shape[axis]):
        x_now = np.take(x, idx, axis=axis)
        params = {
            "f_0": f_0[idx],
            "slope_1": slopes_1[idx],
            "slope_2": slopes_2[idx],
            "x_split": x_split,
        }

        sinlge_res = split_linear_func(x=x_now, **params)
        results.append(sinlge_res)

    result = np.stack(results, axis=axis)

    return result


def double_split_linear_func(
    x: np.ndarray,
    f_0: float,
    g_0: float,
    f_slope_1: float,
    f_slope_2: float,
    g_slope_1: float,
    g_slope_2: float,
    x_split: float = 800,
    axis: int = 0,
    flatten: bool = False,
):
    """
    Split the array x into two arrays at the point x_split. The first array is a linear_func function with the slope slope and the second array is a constant function with the value f_0
    """

    if x.ndim == 2:
        pass
    elif x.ndim == 1:
        try:
            shape = [2, -1]
            x = x.reshape(shape)
        except:
            raise ValueError(
                "x must be a 2D array or one should be able to reshape it to a 2D array. But have x.shape = {x.ndim}"
            )
    else:
        raise ValueError("x must be not bigger than 2D array. But have x.ndim = {x.ndim}")

    params = dict(
        f_0=np.array([f_0, g_0]),
        slopes_1=np.array([f_slope_1, g_slope_1]),
        slopes_2=np.array([f_slope_2, g_slope_2]),
        x_split=x_split,
    )

    result = ndim_split_linear_func(x=x, axis=axis, **params)
    if flatten == True:
        result = result.flatten()
    return result


def triple_split_linear_func(
    x: np.ndarray,
    f_0: float,
    g_0: float,
    h_0: float,
    f_slope_1: float,
    f_slope_2: float,
    g_slope_1: float,
    g_slope_2: float,
    h_slope_1: float,
    h_slope_2: float,
    x_split: float = 800,
    axis: int = 0,
):
    """
    Split the array x into two arrays at the point x_split. The first array is a linear_func function with the slope slope and the second array is a constant function with the value f_0
    """

    if x.ndim == 2:
        pass
    elif x.ndim == 1:
        try:
            shape = [3, -1]
            x = x.reshape(shape)
        except:
            raise ValueError(
                "x must be a 2D array or one should be able to reshape it to a 2D array. But have x.ndim = 2 and x.shape = {x.shape}"
            )
    else:
        raise ValueError(
            "x must be a 2D array or one should be able to reshape it to a 2D array. But have x.ndim = 2 and x.shape = {x.shape}"
        )
    params = dict(
        f_0=np.array([f_0, g_0, h_0]),
        slopes_1=np.array([f_slope_1, g_slope_1, h_slope_1]),
        slopes_2=np.array([f_slope_2, g_slope_2, h_slope_2]),
        x_split=x_split,
    )

    result = ndim_split_linear_func(x=x, axis=axis, **params)
    return result


x = np.arange(0, 11, 1)
x_test = np.stack((x, x))
x_test_tri = np.stack((x, x, x))

f_0 = 2
f_slope_1 = 1
f_slope_2 = 2

g_0 = 2
g_slope_1 = 1
g_slope_2 = 3

h_0 = 4
h_slope_1 = 1
h_slope_2 = 2


x_split = 5

result = double_split_linear_func(
    x=x_test,
    f_0=f_0,
    g_0=g_0,
    f_slope_1=f_slope_1,
    f_slope_2=f_slope_2,
    g_slope_1=g_slope_1,
    g_slope_2=g_slope_2,
    x_split=x_split,
)
result_tri = triple_split_linear_func(
    x=x_test_tri,
    f_0=f_0,
    g_0=g_0,
    h_0=h_0,
    f_slope_1=f_slope_1,
    f_slope_2=f_slope_2,
    g_slope_1=g_slope_1,
    g_slope_2=g_slope_2,
    h_slope_1=h_slope_1,
    h_slope_2=h_slope_2,
    x_split=x_split,
)

In [None]:
model_double = Model(double_split_linear_func, independent_vars=["x"])
model_triple = Model(triple_split_linear_func, independent_vars=["x"])

params_double = model_double.make_params(
    x_split={"value": 2, "min": 0, "max": 10},
)

params_triple = model_triple.make_params(
    x_split={"value": 2, "min": 0, "max": 10},
    axis={"value": 0, "vary": False},
    flatten={"value": True, "vary": False},
)

# model_double.fit(
#     data=x_test.flatten(),
#     params=params_double,
#     x=result.flatten(),
#     nan_policy="omit",
#     method="leastsq",
# )

''