In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
from scipy.optimize import Bounds
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a.conversions import relative_humidity_partial_density, relative_humidity_dewpoint
import sdm_eurec4a.calculations as sdm_conversions
from sdm_eurec4a.visulization import set_custom_rcParams, label_from_attrs
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.identifications import match_clouds_and_dropsondes, match_clouds_and_cloudcomposite

from typing import Union, Tuple, Dict

import sdm_eurec4a.input_processing.models as smodels

default_colors = set_custom_rcParams()
RP = RepositoryPath("levante")
repo_dir = RP.repo_dir
data_dir = RP.data_dir

fig_dir = repo_dir / "notebooks/issues/114"
fig_dir.mkdir(exist_ok=True, parents=True)

In [None]:
def dict_to_Dataset(d: dict, new_coords: dict) -> xr.Dataset:
    """
    Convert a dictionary of data into an xarray Dataset with specified coordinates.

    Parameters
    ----------
    d : dict
        A dictionary where keys are variable names and values are data arrays
        (numpy arrays, lists, or xarray DataArrays).
    new_coords : dict
        A dictionary specifying the new coordinates for the Dataset. Keys are
        coordinate names and values are coordinate values.

    Returns
    -------
    xr.Dataset
        An xarray Dataset containing the data from the input dictionary with
        the specified coordinates.
    """

    new_dims = list(new_coords.keys())

    d_new = {}
    for key in d:
        value = d[key]
        if isinstance(value, (np.ndarray, list, xr.DataArray)):
            d_new[key] = xr.DataArray(
                value,
                coords=new_coords,
            )
        else:
            d_new[key] = xr.DataArray(
                value,
                coords=new_coords,
                dims=new_dims,
            )

    result = xr.Dataset(
        # coords= new_coords,
        data_vars=d_new,
    )
    return result

In [None]:
drop_sondes = xr.open_dataset(repo_dir / "data/observation/dropsonde/processed/drop_sondes.nc")
distance = xr.open_dataset(
    repo_dir
    / "data/observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"
)
safire = xr.open_dataset(repo_dir / "data/observation/safire_core/processed/safire_core.nc")
cloud_composite = xr.open_dataset(
    repo_dir / "data/observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"
)
identified_clusters = xr.open_dataset(
    repo_dir
    / "data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)


def select_subset(ds: xr.Dataset, altitude_name="altitude", drop=False):

    mask = (ds[altitude_name] <= 1200) & (ds[altitude_name] >= 500)
    return ds.where(mask, drop=drop)


cloud_composite = select_subset(cloud_composite, "altitude")
identified_clusters = select_subset(identified_clusters, "altitude", drop=True)
safire = select_subset(safire, "altitude")

safire["relative_humidity_1"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_1"],
    specific_gas_constant=461.5,
)
safire["relative_humidity_1"].attrs.update(
    source=safire["absolute_humidity_2"].attrs["source"],
)

safire["relative_humidity_2"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_2"],
    specific_gas_constant=461.5,
)
safire["relative_humidity_2"].attrs.update(
    source=safire["absolute_humidity_2"].attrs["source"],
)

# Use the SAFIRE-CORE relative humidity values

In [None]:
shared_times = np.intersect1d(safire["time"], cloud_composite["time"])
safire_shared = safire.sel(time=shared_times)
cloud_composite_shared = cloud_composite.sel(time=shared_times)

### Liquid water content

It seems that the liquid water content for the SAFIRE-CORE is not good. There are negative values!

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    x=safire_shared["liquid_water_content"],
    y=cloud_composite_shared["liquid_water_content"],
    marker=".",
    alpha=0.2,
)
ax.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

Text(0, 0.5, 'COMPOSITE Liquid water content $\\left[  g m^{-3}  \\right]$')

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(1, 11, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[0:9])
cax = fig.add_subplot(gs[-1])

range = [[-0.05, 3], [-0.05, 3]]

_, _, _, mappable = ax_cc.hist2d(
    safire_shared["liquid_water_content"],
    cloud_composite_shared["liquid_water_content"],
    range=range,
    bins=100,
    # marker=".",
    # alpha=0.2,
    norm=mcolors.LogNorm(vmax=100, vmin=1),
    cmap="plasma",
)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

# ax.set_yscale('log')
ax_cc.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax_cc.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

corr = xr.corr(safire_shared["liquid_water_content"], cloud_composite_shared["liquid_water_content"])

fig.suptitle(f"Liquid Water Content comparison with R = {corr.data:.2f}", fontsize="medium")
# fig.tight_layout()
ax_cc.plot((0, 3), (0, 3), "k--")

[<matplotlib.lines.Line2D at 0x7fff8c2f3290>]

##### Lag or smoothing does not help the correlation

In [None]:
lags = np.arange(-20, 21, 1)
corr = []
corr_cc = []
corr_ds = []


ds1 = safire_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)
ds2 = cloud_composite_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)

for lag in lags:
    corr.append(
        xr.corr(
            ds1.shift(time=lag),
            ds2,
        )
    )
    corr_cc.append(
        xr.corr(
            ds2.shift(time=lag),
            ds2,
        )
    )
    corr_ds.append(
        xr.corr(
            ds1.shift(time=lag),
            ds1,
        )
    )

fig, ax = plt.subplots()
ax.scatter(lags, corr, label="SAFIRE-CORE and COMPOSITE", marker="o")
ax.scatter(lags, corr_cc, marker="x", label="Auto COMPOSITE")
ax.scatter(lags, corr_ds, marker="+", label="Auto SAFIRE-CORE")
ax.set_xlabel("Lag (time steps)")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title("Correlation between Liquid Water Content with Different Lags")

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different Lags')

In [None]:
rolling = (1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 100)

corr = []
corr_both = []


ds1 = safire_shared["liquid_water_content"]
ds2 = cloud_composite_shared["liquid_water_content"]
for roll in rolling:
    ds1_roll = ds1.rolling(time=roll).mean(skipna=True)
    ds2_roll = ds2.rolling(time=roll).mean(skipna=True)

    corr.append(
        xr.corr(
            ds1_roll,
            ds2,
        )
    )
    corr_both.append(
        xr.corr(
            ds1_roll,
            ds2_roll,
        )
    )

fig, ax = plt.subplots()
ax.scatter(rolling, corr, label="SAFIRE-CORE rolling mean", marker="o")
ax.scatter(rolling, corr_both, marker="X", label="Both with rolling mean")
ax.set_xlabel("Rolling mean window size")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title("Correlation between Liquid Water Content with Different rolling mean window sizes")

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different rolling mean window sizes')

In [None]:
rolling = (1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 100)

corr = []
corr_both = []

lag = -2
ds1 = safire_shared["liquid_water_content"]
ds2 = cloud_composite_shared["liquid_water_content"]
for roll in rolling:
    ds1_roll = ds1.rolling(time=roll).mean(skipna=True)
    ds2_roll = ds2.rolling(time=roll).mean(skipna=True)

    corr.append(
        xr.corr(
            ds1_roll.shift(time=lag),
            ds2,
        )
    )
    corr_both.append(
        xr.corr(
            ds1_roll.shift(time=lag),
            ds2_roll,
        )
    )

fig, ax = plt.subplots()
ax.scatter(rolling, corr, label="SAFIRE-CORE rolling mean", marker="o")
ax.scatter(rolling, corr_both, marker="X", label="Both with rolling mean")
ax.set_xlabel("Rolling mean window size")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title(
    "Correlation between Liquid Water Content with Different with lag -2 and rolling mean window sizes"
)

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different with lag -2 and rolling mean window sizes')

### Relative humidity to LWC relation

We can see that there are pretty high relative humidity values in the ``SAFIRE-CORE`` dataset.

In [None]:
mask = safire_shared["relative_humidity_1"] > 100

ds = safire_shared  # .where(mask)
cc = cloud_composite_shared  # .sel(time = ds.time)

above_102 = (ds["relative_humidity_1"] > 102).sum()
above_100 = (ds["relative_humidity_1"] > 100).sum()

fraction_100 = 100 * above_100 / np.isfinite(ds["relative_humidity_1"]).sum()
fraction_102 = 100 * above_102 / np.isfinite(ds["relative_humidity_1"]).sum()

# fig, axs = plt.subplots(ncols = 2, figsize = (9, 6))

fig = plt.figure()
gs = fig.add_gridspec(11, 21, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[1:, 0:9])
ax_ds = fig.add_subplot(gs[1:, 10:19], sharey=ax_cc, sharex=ax_cc)
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[10, 130], [-0.55, 2.5]],
    bins=[30, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


ax_cc.hist2d(ds["relative_humidity_1"], cc["liquid_water_content"], **style)
_, _, _, mappable = ax_ds.hist2d(ds["relative_humidity_1"], ds["liquid_water_content"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

for ax in [ax_cc, ax_ds]:
    ax.axvline(100, color="black", linestyle="--")
    ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax_cc.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
ax_cc.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_ds.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_cc.set_title("Cloud Composite LWC")
ax_ds.set_title("SAFIRE-CORE LWC")

title = f"Relative Humidity vs Liquid Water Content | {ds['relative_humidity_1'].attrs['source']}"
title += f"\n{fraction_100.data:.2f}% of meas. have RH > 100%"
title += f"\n{fraction_102.data:.2f}% of meas. have RH > 102%"

fig.suptitle(
    title,
    fontsize="medium",
)
# fig.tight_layout()

Text(0.5, 0.98, 'Relative Humidity vs Liquid Water Content |  Li-Cor : LI-7500A: 75H-2079\n3.50% of meas. have RH > 100%\n1.45% of meas. have RH > 102%')

In [None]:
mask = safire_shared["relative_humidity_2"] > 100

ds = safire_shared  # .where(mask)
cc = cloud_composite_shared  # .sel(time = ds.time)

above_102 = (ds["relative_humidity_2"] > 102).sum()
above_100 = (ds["relative_humidity_2"] > 100).sum()

fraction_100 = 100 * above_100 / np.isfinite(ds["relative_humidity_2"]).sum()
fraction_102 = 100 * above_102 / np.isfinite(ds["relative_humidity_2"]).sum()

# fig, axs = plt.subplots(ncols = 2, figsize = (9, 6))

fig = plt.figure()
gs = fig.add_gridspec(11, 21, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[1:, 0:9])
ax_ds = fig.add_subplot(gs[1:, 10:19], sharey=ax_cc, sharex=ax_cc)
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[10, 130], [-0.55, 2.5]],
    bins=[30, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


ax_cc.hist2d(ds["relative_humidity_2"], cc["liquid_water_content"], **style)
_, _, _, mappable = ax_ds.hist2d(ds["relative_humidity_2"], ds["liquid_water_content"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

for ax in [ax_cc, ax_ds]:
    ax.axvline(100, color="black", linestyle="--")
    ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax_cc.set_xlabel(label_from_attrs(ds["relative_humidity_2"]))
ax_cc.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_ds.set_xlabel(label_from_attrs(ds["relative_humidity_2"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_cc.set_title("Cloud Composite LWC")
ax_ds.set_title("SAFIRE-CORE LWC")

title = f"Relative Humidity vs Liquid Water Content | {ds['relative_humidity_2'].attrs['source']}"
title += f"\n{fraction_100.data:.2f}% of meas. have RH > 100%"
title += f"\n{fraction_102.data:.2f}% of meas. have RH > 102%"

fig.suptitle(
    title,
    fontsize="medium",
)

# fig.tight_layout()

Text(0.5, 0.98, 'Relative Humidity vs Liquid Water Content |  Li-Cor : LI-7500A: 75H-2079\n0.14% of meas. have RH > 100%\n0.00% of meas. have RH > 102%')

In [None]:
ds = safire_shared

fig = plt.figure()
gs = fig.add_gridspec(9, 11, hspace=1, wspace=1)

ax = fig.add_subplot(gs[1:, 0:9])
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[20, 130], [20, 130]],
    bins=[50, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


_, _, _, mappable = ax.hist2d(ds["relative_humidity_1"], ds["relative_humidity_2"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

ax.axvline(100, color="black", linestyle="--")
ax.axhline(100, color="black", linestyle="--")
# ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
ax.set_ylabel(label_from_attrs(ds["relative_humidity_2"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))

# fig.suptitle(
#     fontsize="medium",
# )
# fig.tight_layout()

Text(0, 0.5, 'Relative humidity $\\left[  \\%  \\right]$')

### Single flight track

We can see very high relative humidity values, exceeding 110%, which should not be the case.
There can be multiple explanations for this:
1. The calculations we use are not good. Maybe we need to consider the temperature dependecy of $L_V$ more. But we already use the most accurate estimation by Murphy and Koop!
2. The temperature and absolute humidity values are not well related to each other. BUT the values exceed 100% for a consistent timeperiod  

In [None]:
single_flight_time_slice = slice("2020-02-13T08:45:00", "2020-02-13T11:00")
ds = safire.sel(time=single_flight_time_slice)
cc = cloud_composite.sel(time=single_flight_time_slice)
ic = identified_clusters.sel(time=single_flight_time_slice)

Liquid water content

In [None]:
plt.figure(figsize=(3, 3))
plt.plot(
    ds["longitude"],
    ds["latitude"],
    marker="None",
    label="Flight track",
)
plt.scatter(
    ic["longitude"],
    ic["latitude"],
    color="k",
    label="identified clusters",
)

plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Flight track and identified clusters")

Text(0.5, 1.0, 'Flight track and identified clusters')

In [None]:
fig, ax = plt.subplots()

ax.plot(
    ds["time"],
    ds["liquid_water_content"],
    label="SAFIRE-CORE",
)

ax.plot(
    cc["time"],
    cc["liquid_water_content"],
    label="Composite",
)

ax.set_xlabel("Time")
ax.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax.legend()
# ax.set_yscale("log")
# ax.set_ylim(0, None)

<matplotlib.legend.Legend at 0x7fffa087d370>

In [None]:
fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(9, 6))

for ax in axs:
    i = 0
    for s, e in zip(ic["start"], ic["end"]):
        if i == 0:
            label = "Rain Clouds"
        else:
            label = None
        ax.axvspan(s.data, e.data, color="grey", alpha=0.5, label=label)
        i += 1
twinx_axs = [ax.twinx() for ax in axs]


ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

# SETUP SECOND PLOT

ds["relative_humidity_2"].plot(
    ax=axs[1],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_2"].where(ds["relative_humidity_2"] > 100).plot(
    ax=axs[1],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_2"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[1],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_2"] > 100).plot(
    ax=twinx_axs[1],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)


fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[1].set_ylabel(label_from_attrs(ah))
axs[1].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])
axs[1].set_title(ds["absolute_humidity_2"].attrs["comment"])

fig.tight_layout()

In [None]:
fig, axs = plt.subplots(nrows=1, sharex=True, figsize=(9, 4.5))
axs = [axs]
twinx_axs = [ax.twinx() for ax in axs]

ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[0].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])

fig.tight_layout()

In [None]:
# plot the trajectory of the ATR aircraft in 3D with colors indicating the relative humidity

longitude = ds["longitude"]
latitude = ds["latitude"]
altitude = ds["altitude"]
# color = ds['time'].where(mask)
color = ds["relative_humidity_1"].rolling(time=10).mean()


fig = plt.figure(figsize=(8, 6))
gs = fig.add_gridspec(10, 100)

ax = fig.add_subplot(gs[:, :], projection="3d")
cax = fig.add_subplot(gs[2:-2, -3:])

sc = ax.scatter(longitude, latitude, altitude, c=color, cmap="Blues", marker=".")
ax.scatter(
    longitude.where(color >= 100),
    latitude.where(color >= 100),
    altitude.where(color >= 100),
    color="red",
    marker="X",
    zorder=10,
)
fig.colorbar(cax=cax, mappable=sc, label=label_from_attrs(relative_humidity))

ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_zlabel("Altitude")

flight_Numbers = np.unique((ds["flight_number"])[~np.isnan((ds["flight_number"]))])
ax.set_title(f"ATR trajectory with relative humidity for flight NUM {flight_Numbers}")

# fig.tight_layout()

Text(0.5, 0.92, 'ATR trajectory with relative humidity for flight NUM [19.]')

# Optimize the linear fits of the thermodynamic profiles

### Make sure that we can reconstruct the specific humidity from the relative humidity

In [None]:
import sdm_eurec4a.conversions as sdm_conversions
from importlib import reload

reload(sdm_conversions)
from tqdm import tqdm

q_v = drop_sondes["specific_humidity"]
t = drop_sondes["air_temperature"]
p = drop_sondes["pressure"]

rh1 = sdm_conversions.relative_humidity_from_tps(
    specific_humidity=q_v, temperature=t, pressure=p, simplified=True
)

rh2 = sdm_conversions.relative_humidity_from_tps(
    specific_humidity=q_v, temperature=t, pressure=p, simplified=False
)

q_v1 = sdm_conversions.specific_humidity_from_relative_humidity_temperature_pressure(
    relative_humidity=rh1, temperature=t, pressure=p, simplified=True
)

max_relative_error = np.max((q_v1 - q_v) / q_v)

print(f"Max relative error while reconstructing specific humidity: {max_relative_error.data:.2e}")

Max relative error while reconstructing specific humidity: 3.09e-07


identified_clusters

In [None]:
cloud_id = np.random.choice(identified_clusters["cloud_id"].values)


# time_slice = slice(ic.start.data, ic.end.data)
# extended_time_slice = slice(ic.start.data - np.timedelta64(20, "s"), ic.end.data + np.timedelta64(20, "s"))
# extended_time_slice = time_slice
# ic = identified_clusters.sel(time = single_flight_time_slice)
ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

drop_sondes_selected = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(2, "h"),
    max_spatial_distance=0.5e2,
)
drop_sondes_selected_large = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(12, "h"),
    max_spatial_distance=1e2,
)

if drop_sondes_selected["time"].size == 0:
    raise ValueError("No dropsondes found for the selected cloud")

cloud_composite_selected = match_clouds_and_cloudcomposite(
    ds_clouds=ic,
    ds_cloudcomposite=cloud_composite,
)


# cloud_composite_selected = cloud_composite_selected.sel(time = time_slice)
safire_selected = safire.sel(time=cloud_composite_selected["time"])

plt.plot(
    drop_sondes_selected["relative_humidity"].T,
    drop_sondes_selected["altitude"],
    color="grey",
    alpha=0.5,
)

plt.plot(
    drop_sondes_selected_large["relative_humidity"].T,
    drop_sondes_selected_large["altitude"],
    color="grey",
    alpha=0.1,
)


plt.axhline(
    cloud_composite_selected["altitude"].mean(), color="grey", linestyle="-", alpha=0.3, zorder=0
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_1"], dims=("time",))
m, sem = safire_selected["relative_humidity_1"].mean(skipna=True), safire_selected[
    "relative_humidity_1"
].std(skipna=True)

plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="red",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_2"], dims=("time",))
m, sem = safire_selected["relative_humidity_2"].mean(skipna=True), safire_selected[
    "relative_humidity_2"
].std(skipna=True)
plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="blue",
)
plt.ylim(0, 1200)

(0.0, 1200.0)

# Relative humidity

In [None]:
list_safire_humidity = []
list_drop_sondes_humidity = []

cloud_ids = []
for cloud_id in identified_clusters["cloud_id"]:

    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)
    sc = safire.sel(time=slice(ic["start"], ic["end"]))
    ds = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )
    if ds["time"].size >= 3:
        cloud_ids.append(cloud_id)
        list_safire_humidity.append(sc["relative_humidity_1"].mean())
        list_drop_sondes_humidity.append(ds["relative_humidity"].mean("time"))
    else:
        pass

In [None]:
safire_humidity = xr.DataArray(
    list_safire_humidity,
    dims=("cloud_id",),
    coords=dict(cloud_id=cloud_ids),
)

safire_humidity.attrs.update(
    units="\%",
    long_name="Mean relative humidity",
    comment="Mean relative humidity of SAFIRE-CORE measurements for each identified cloud",
)

drop_sondes_humidity = xr.DataArray(
    list_drop_sondes_humidity,
    dims=("cloud_id", "altitude"),
    coords=dict(
        cloud_id=cloud_ids,
        altitude=drop_sondes["altitude"],
    ),
)

drop_sondes_humidity.attrs.update(
    units="\%",
    long_name="Mean relative humidity",
    comment="Mean relative humidity of dropsondes measurements for each identified cloud",
)

  units="\%",
  units="\%",


In [None]:
class InitialAndBounds(object):

    def __init__(self, x0, bounds):
        self.x0 = x0
        self.bounds = bounds

    def __call__(self, *args, **kwargs):
        return self.x0, self.bounds


class RelativeHumidityIB(InitialAndBounds):

    def __init__(self):

        f_0 = np.array([60, 85, 100])  # surface humidity in 1
        slope_1 = np.array([20 / 1000, 40 / 1000, 100 / 1000])  # slope in 1 / 1000 m
        self.x0 = f_0[1], slope_1[1]  # , saturation_value[1]
        self.bounds = Bounds(
            lb=[f_0[0], slope_1[0]],  # saturation_value[0]],
            ub=[f_0[2], slope_1[2]],  # saturation_value[2]],
        )

### Unweighted

In [None]:
reload(smodels)

<module 'sdm_eurec4a.input_processing.models' from '/home/m/m301096/repositories/sdm-eurec4a/src/sdm_eurec4a/input_processing/models.py'>

In [None]:
list_prediction = []
list_parameters = []

for cloud_id in tqdm(cloud_ids):

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    sondes = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    y_train = sondes["relative_humidity"]
    t_train = sondes["altitude"].expand_dims(time=sondes["time"])

    y_train = y_train.transpose("time", "altitude")
    t_train = t_train.transpose("time", "altitude")

    relative_humidity_fit = smodels.FixedSaturatedLinearLeastSquare(
        name="test",
        x0=RelativeHumidityIB().x0,
        bounds=RelativeHumidityIB().bounds,
        y_train=y_train.sel(altitude=slice(200, 500)),
        t_train=t_train.sel(altitude=slice(200, 500)),
        saturation_value=100,
        weight=None,
    )
    relative_humidity_fit.fit(3)

    parameters = relative_humidity_fit.full_parameters

    # save the parameters
    list_parameters.append(
        dict_to_Dataset(
            d=parameters,
            new_coords=dict(
                cloud_id=[
                    cloud_id,
                ]
            ),
        )
    )

    dimension = sondes["altitude"].astype(float)
    dimension, prediction = relative_humidity_fit.predict(dimension)
    # smodels.split_linear_func(dimension.values, **parameters)

    prediction = xr.DataArray(prediction, dims=("altitude"), coords=dict(altitude=dimension))

    prediction = prediction.expand_dims(cloud_id=(cloud_id,))

    list_prediction.append(prediction)


# create a data array with the fitted number concentration
relative_humidity_unweighted_fitted = xr.concat(
    list_prediction,
    dim="cloud_id",
)
relative_humidity_unweighted_fitted.attrs.update(long_name="relative humidity fitted data", units="\%")
relative_humidity_unweighted_parameters = xr.concat(
    list_parameters,
    dim="cloud_id",
)
relative_humidity_unweighted_parameters["x_split"].attrs.update(
    long_name="RH saturation altitude", units="m"
)

  relative_humidity_unweighted_fitted.attrs.update(long_name="relative humidity fitted data", units="\%")
  0%|          | 0/260 [00:00<?, ?it/s]

100%|██████████| 260/260 [00:13<00:00, 19.54it/s]


In [None]:
fig, ax = plt.subplots()

ax.plot(
    relative_humidity_unweighted_fitted.T,
    relative_humidity_unweighted_fitted["altitude"],
    color="k",
    alpha=0.2,
)

ax.set_ylabel(label_from_attrs(relative_humidity_unweighted_fitted))
ax.set_xlabel(label_from_attrs(relative_humidity_unweighted_fitted["altitude"]))

ax.set_ylim(0, 1500)

ax.set_title(
    f"Fitted Relative Humidity profiles for all {len(relative_humidity_unweighted_fitted['cloud_id'])} clouds"
)

Text(0.5, 1.0, 'Fitted Relative Humidity profiles for all 260 clouds')

In [None]:
fig, ax = plt.subplots()

ax.scatter(
    relative_humidity_unweighted_parameters["x_split"],
    identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_ids)["altitude"],
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(relative_humidity_unweighted_parameters["x_split"]))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
ax.plot([500, 1400], [500, 1400], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")
ax.set_xlim(400, 1600)
ax.set_ylim(400, 1600)

(400.0, 1600.0)

In [None]:
fig, ax = plt.subplots()

altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_ids)


ax.hist2d(
    safire_humidity.sel(cloud_id=cloud_ids),
    relative_humidity_unweighted_fitted.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
    cmap="Reds",
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(relative_humidity_unweighted_fitted))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = relative_humidity_unweighted_fitted["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist2d(
    drop_sondes_humidity.sel(cloud_id=cloud_id).sel(altitude=altitudes, method="nearest"),
    relative_humidity_unweighted_fitted.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
    cmap="Reds",
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(relative_humidity_unweighted_fitted))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = relative_humidity_unweighted_fitted["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist(
    relative_humidity_unweighted_fitted.sel(altitude=altitudes, method="nearest"),
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel("Number of clouds")
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
# ax.plot([500, 1800], [500, 1800], color = 'black', linestyle = '--')
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

### Weighted

In [None]:
list_prediction = []
list_parameters = []


for cloud_id in tqdm(cloud_ids):

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    sondes = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    time = sondes["time"]

    y_train = sondes["relative_humidity"]
    t_train = sondes["altitude"].expand_dims(time=time)

    y_train = y_train.transpose("time", "altitude").sel(altitude=slice(100, 700))
    t_train = t_train.transpose("time", "altitude").sel(altitude=slice(100, 700))

    altitude = y_train["altitude"]

    a = 0.3

    w = y_train.mean(dim="altitude")
    w = w - w.min()
    w = w / w.max()
    w = (w + a) / (1 + a)
    weight = w
    weight = weight.expand_dims(altitude=altitude)
    weight = weight.transpose("time", "altitude")

    weight

    relative_humidity_fit = smodels.FixedSaturatedLinearLeastSquare(
        name="test",
        x0=RelativeHumidityIB().x0,
        bounds=RelativeHumidityIB().bounds,
        y_train=y_train.sel(altitude=slice(200, 500)),
        t_train=t_train.sel(altitude=slice(200, 500)),
        weight=weight.sel(altitude=slice(200, 500)),
        saturation_value=100,
    )
    relative_humidity_fit.fit(3)

    parameters = relative_humidity_fit.full_parameters

    # save the parameters
    list_parameters.append(
        dict_to_Dataset(
            d=parameters,
            new_coords=dict(
                cloud_id=[
                    cloud_id,
                ]
            ),
        )
    )

    dimension = sondes["altitude"].astype(float)
    dimension, prediction = relative_humidity_fit.predict(dimension)
    # smodels.split_linear_func(dimension.values, **parameters)

    prediction = xr.DataArray(prediction, dims=("altitude"), coords=dict(altitude=dimension))

    prediction = prediction.expand_dims(cloud_id=(cloud_id,))

    list_prediction.append(prediction)


# create a data array with the fitted number concentration
relative_humidity_fitted = xr.concat(
    list_prediction,
    dim="cloud_id",
)
relative_humidity_fitted.attrs.update(long_name="relative humidity fitted data", units="\%")
relative_humidity_parameters = xr.concat(
    list_parameters,
    dim="cloud_id",
)
relative_humidity_parameters["x_split"].attrs.update(long_name="RH saturation altitude", units="m")

  relative_humidity_fitted.attrs.update(long_name="relative humidity fitted data", units="\%")
100%|██████████| 260/260 [00:21<00:00, 12.31it/s]


In [None]:
fig, ax = plt.subplots()

ax.plot(
    relative_humidity_fitted.T,
    relative_humidity_fitted["altitude"],
    color="k",
    alpha=0.2,
    linestyle="-",
)
ax.set_xlabel(label_from_attrs(relative_humidity_fitted))
ax.set_ylabel(label_from_attrs(relative_humidity_fitted["altitude"]))

ax.set_title("")
ax.set_ylim(0, 1500)

fig.savefig(fig_dir / "relative_humidity_fit.png", dpi=400)

In [None]:
cloud_id = np.random.choice(relative_humidity_fitted["cloud_id"].values)

# time_slice = slice(ic.start.data, ic.end.data)
# extended_time_slice = slice(ic.start.data - np.timedelta64(20, "s"), ic.end.data + np.timedelta64(20, "s"))
# extended_time_slice = time_slice
# ic = identified_clusters.sel(time = single_flight_time_slice)
ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

drop_sondes_selected = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(3, "h"),
    max_spatial_distance=1e2,
)
drop_sondes_selected_large = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(12, "h"),
    max_spatial_distance=1e2,
)

if drop_sondes_selected["time"].size == 0:
    raise ValueError("No dropsondes found for the selected cloud")

cloud_composite_selected = match_clouds_and_cloudcomposite(
    ds_clouds=ic,
    ds_cloudcomposite=cloud_composite,
)


# cloud_composite_selected = cloud_composite_selected.sel(time = time_slice)
safire_selected = safire.sel(time=cloud_composite_selected["time"])


fig, ax = plt.subplots()

ax.plot(
    drop_sondes_selected["relative_humidity"].T,
    drop_sondes_selected["altitude"],
    color="grey",
    alpha=0.5,
)

ax.plot(
    drop_sondes_selected_large["relative_humidity"].T,
    drop_sondes_selected_large["altitude"],
    color="grey",
    alpha=0.1,
)


ax.axhline(
    cloud_composite_selected["altitude"].mean(),
    color="k",
    linestyle=":",
    alpha=1,
    zorder=0,
    label="ATR altitude",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_1"], dims=("time",))
m, sem = safire_selected["relative_humidity_1"].mean(skipna=True), safire_selected[
    "relative_humidity_1"
].std(skipna=True)

ax.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker="x",
    alpha=0.9,
    color="red",
    label="RH WVSS2",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_2"], dims=("time",))
m, sem = safire_selected["relative_humidity_2"].mean(skipna=True), safire_selected[
    "relative_humidity_2"
].std(skipna=True)
ax.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker="o",
    alpha=0.9,
    color="blue",
    label="RH LICOR",
)

ax.plot(
    relative_humidity_fitted.sel(cloud_id=cloud_id),
    relative_humidity_fitted["altitude"],
    color="k",
    alpha=1,
    linestyle="--",
)

ax.legend()
ax.set_ylim(0, 1200)
ax.set_xlim(50, 120)
ax.set_ylabel(r"Altitude [$m$]")
ax.set_xlabel(label_from_attrs(relative_humidity_fitted))
ax.set_title(f"Cloud {cloud_id}")

fig.savefig(fig_dir / "relative_humidity_fit_example.png", dpi=400)

In [None]:
fig, ax = plt.subplots()

ax.scatter(
    relative_humidity_parameters["x_split"],
    identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_ids)["altitude"],
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(relative_humidity_parameters["x_split"]))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
ax.plot([500, 1500], [500, 1500], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")
# ax.set_xlim(0, 1800)
ax.set_xlim(400, 1600)
ax.set_ylim(400, 1600)

fig.savefig(fig_dir / "saturation_altitude_comparison.png", dpi=400)

In [None]:
fig, ax = plt.subplots()

cloud_id = relative_humidity_fitted["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


_, _, _, sc = ax.hist2d(
    safire_humidity.sel(cloud_id=cloud_id),
    relative_humidity_fitted.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
    cmap="Reds",
)

fig.colorbar(mappable=sc, ax=ax, label="Number of clouds")
ax.set_xlabel("SAFIRE RH [%]")
ax.set_ylabel("Fitted " + label_from_attrs(relative_humidity_fitted))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH at cloud observed altitude \n Fitted RH vs SAFIRE mean")
fig.savefig(fig_dir / "RH_saturation_altitude_SAFIRE.png", dpi=400)

In [None]:
fig, ax = plt.subplots()

cloud_id = relative_humidity_fitted["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


_, _, _, sc = ax.hist2d(
    drop_sondes_humidity.sel(cloud_id=cloud_id).sel(altitude=altitudes, method="nearest"),
    relative_humidity_fitted.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
    cmap="Reds",
)
fig.colorbar(mappable=sc, ax=ax, label="Number of clouds")
ax.set_xlabel("Dropsondes RH [%]")
ax.set_ylabel("Fitted " + label_from_attrs(relative_humidity_fitted))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH at cloud observed altitude \n Fitted RH vs JOANNE mean")
fig.savefig(fig_dir / "RH_saturation_altitude_JOANNE.png", dpi=400)

In [None]:
fig, ax = plt.subplots()

cloud_id = relative_humidity_fitted["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist(
    relative_humidity_fitted.sel(altitude=altitudes, method="nearest"),
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(relative_humidity_fitted))
ax.set_ylabel("Number of clouds")
# ax.plot([500, 1800], [500, 1800], color = 'black', linestyle = '--')
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

fig.savefig(fig_dir / "saturation_altitude_hist.png", dpi=400)

# Potential temperature

In [None]:
list_prediction = []
list_parameters = []

for cloud_id in tqdm(relative_humidity_parameters["cloud_id"]):

    wp = relative_humidity_parameters.sel(cloud_id=cloud_id)

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    sondes = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    pt = sondes["potential_temperature"]

    lower_pt = pt.where(pt["altitude"] <= wp["x_split"])
    pt_mean, pt_sem = mean_and_stderror_of_mean(lower_pt, dims=("altitude", "time"))

    x0 = []

    train_data = pt.where(pt["altitude"] >= wp["x_split"], drop=True)

    y_train = train_data.copy()
    t_train = train_data["altitude"]
    t_train = t_train.expand_dims(time=y_train["time"])

    t_train = t_train.transpose("time", "altitude")
    y_train = y_train.transpose("time", "altitude")

    x0 = [pt_mean, 20 / 1000]
    bounds = Bounds(
        lb=[pt_mean - 1e-12, 0 / 1000],
        ub=[pt_mean + 1e-12, 1000 / 1000],
    )
    initial_parameters = InitialAndBounds(x0, bounds)

    temperature_fit = smodels.LinearLeastSquare(
        name="potential temperature",
        t_train=t_train,
        y_train=y_train,
        x0=initial_parameters.x0,
        bounds=initial_parameters.bounds,
        # t_train=
        fit_kwargs=dict(),
    )

    temperature_fit.fit(3)

    parameters = temperature_fit.parameters
    parameters.update(slope_1=0)
    parameters.update(slope_2=parameters["slope"])
    parameters.update(x_split=float(wp["x_split"].values))
    parameters.pop("slope")

    # save the parameters
    list_parameters.append(
        dict_to_Dataset(
            d=parameters,
            new_coords=dict(
                cloud_id=[
                    cloud_id,
                ]
            ),
        )
    )

    dimension = sondes["altitude"].astype(float)
    prediction = smodels.split_linear_func(dimension.values, **parameters)

    prediction = xr.DataArray(prediction, dims=("altitude"), coords=dict(altitude=dimension))

    prediction = prediction.expand_dims(cloud_id=(cloud_id,))

    list_prediction.append(prediction)


# create a data array with the fitted number concentration
potential_fitted_data = xr.concat(
    list_prediction,
    dim="cloud_id",
)
potential_fitted_data.attrs.update(long_name="potential temperature", units="K")
potential_parameters = xr.concat(
    list_parameters,
    dim="cloud_id",
)

  0%|          | 0/260 [00:00<?, ?it/s]

100%|██████████| 260/260 [00:17<00:00, 14.45it/s]


In [None]:
fig, ax = plt.subplots()

ax.plot(
    potential_fitted_data.T,
    potential_fitted_data["altitude"],
    color="k",
    alpha=0.2,
    linestyle="-",
)
ax.set_xlabel(label_from_attrs(potential_fitted_data))
ax.set_ylabel(label_from_attrs(potential_fitted_data["altitude"]))

ax.set_title("")
ax.set_ylim(0, 1500)
ax.set_xlim(297, 303)

fig.savefig(fig_dir / "potential_temperature_fits.png", dpi=400)

# Pressure

In [None]:
list_prediction = []
list_parameters = []

for cloud_id in tqdm(relative_humidity_parameters["cloud_id"]):

    wp = relative_humidity_parameters.sel(cloud_id=cloud_id)

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    sondes = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    train_data = sondes["pressure"].sel(altitude=slice(0, 1500))

    y_train = train_data.copy()
    t_train = train_data["altitude"]
    t_train = t_train.expand_dims(time=y_train["time"])

    t_train = t_train.transpose("time", "altitude")
    y_train = y_train.transpose("time", "altitude")

    x0 = [101430, -10e3 / 1e3]
    bounds = Bounds(
        lb=[-np.inf, -np.inf],
        ub=[np.inf, np.inf],
    )

    pressure_fit = smodels.LinearLeastSquare(
        name="potential temperature",
        t_train=t_train,
        y_train=y_train,
        x0=x0,
        bounds=bounds,
        # t_train=
        fit_kwargs=dict(),
    )

    pressure_fit.fit(1)

    parameters = pressure_fit.parameters

    # save the parameters
    list_parameters.append(
        dict_to_Dataset(
            d=parameters,
            new_coords=dict(
                cloud_id=[
                    cloud_id,
                ]
            ),
        )
    )

    dimension = sondes["altitude"].astype(float)
    prediction = smodels.linear_func(dimension.values, **parameters)

    prediction = xr.DataArray(prediction, dims=("altitude"), coords=dict(altitude=dimension))

    prediction = prediction.expand_dims(cloud_id=(cloud_id,))

    list_prediction.append(prediction)


# create a data array with the fitted number concentration
pressure_fitted_data = xr.concat(
    list_prediction,
    dim="cloud_id",
)
pressure_fitted_data.attrs.update(long_name="potential temperature", units="K")
pressure_parameters = xr.concat(
    list_parameters,
    dim="cloud_id",
)

100%|██████████| 260/260 [00:11<00:00, 23.24it/s]


In [None]:
fig, ax = plt.subplots()


ax.plot(
    pressure_fitted_data.T,
    pressure_fitted_data["altitude"],
    color="k",
    alpha=0.2,
    linestyle="-",
)
sondes["pressure"].mean("time").sel(altitude=slice(0, 1200)).plot(ax=ax, y="altitude")
ax.set_xlabel(label_from_attrs(pressure_fitted_data))
ax.set_ylabel(label_from_attrs(pressure_fitted_data["altitude"]))

ax.set_title("")
ax.set_ylim(0, 1500)
ax.set_xlim(8.5e4, 10e4)

fig.savefig(fig_dir / "pressure_temperature_fits.png", dpi=400)

# Save the fitted parameters to the storage

In [None]:
output_dir = data_dir / "model/input_v4.0"

relative_humidity_parameters.to_netcdf(output_dir / "relative_humidity_parameters.nc")
potential_parameters.to_netcdf(output_dir / "potential_temperature_parameters.nc")
pressure_parameters.to_netcdf(output_dir / "pressure_parameters.nc")