In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a.conversions import relative_humidity_partial_density, relative_humidity_dewpoint
import sdm_eurec4a.calculations as sdm_conversions
from sdm_eurec4a.visulization import set_custom_rcParams, label_from_attrs
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.identifications import match_clouds_and_dropsondes, match_clouds_and_cloudcomposite

import sdm_eurec4a.input_processing.models as sdm_models

default_colors = set_custom_rcParams()
repo_dir = RepositoryPath("nils_private").get_repo_dir()

In [None]:
drop_sondes = xr.open_dataset(repo_dir / "data/observation/dropsonde/processed/drop_sondes.nc")
distance = xr.open_dataset(
    repo_dir
    / "data/observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"
)
safire = xr.open_dataset(repo_dir / "data/observation/safire_core/processed/safire_core.nc")
cloud_composite = xr.open_dataset(
    repo_dir / "data/observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"
)
identified_clusters = xr.open_dataset(
    repo_dir
    / "data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)


def select_subset(ds: xr.Dataset, altitude_name="altitude", drop=False):

    mask = (ds[altitude_name] <= 1200) & (ds[altitude_name] >= 500)
    return ds.where(mask, drop=drop)


cloud_composite = select_subset(cloud_composite, "altitude")
identified_clusters = select_subset(identified_clusters, "altitude", drop=True)
safire = select_subset(safire, "altitude")

safire["relative_humidity_1"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_1"],
    specific_gas_constant=461.5,
)
safire["relative_humidity_1"].attrs.update(
    source=safire["absolute_humidity_2"].attrs["source"],
)

safire["relative_humidity_2"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_2"],
    specific_gas_constant=461.5,
)
safire["relative_humidity_2"].attrs.update(
    source=safire["absolute_humidity_2"].attrs["source"],
)

# Use the SAFIRE-CORE relative humidity values

In [None]:
shared_times = np.intersect1d(safire["time"], cloud_composite["time"])
safire_shared = safire.sel(time=shared_times)
cloud_composite_shared = cloud_composite.sel(time=shared_times)

### Liquid water content

It seems that the liquid water content for the SAFIRE-CORE is not good. There are negative values!

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    x=safire_shared["liquid_water_content"],
    y=cloud_composite_shared["liquid_water_content"],
    marker=".",
    alpha=0.2,
)
ax.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

Text(0, 0.5, 'COMPOSITE Liquid water content $\\left[  g m^{-3}  \\right]$')

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(1, 11, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[0:9])
cax = fig.add_subplot(gs[-1])

range = [[-0.05, 3], [-0.05, 3]]

_, _, _, mappable = ax_cc.hist2d(
    safire_shared["liquid_water_content"],
    cloud_composite_shared["liquid_water_content"],
    range=range,
    bins=100,
    # marker=".",
    # alpha=0.2,
    norm=mcolors.LogNorm(vmax=100, vmin=1),
    cmap="plasma",
)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

# ax.set_yscale('log')
ax_cc.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax_cc.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

corr = xr.corr(safire_shared["liquid_water_content"], cloud_composite_shared["liquid_water_content"])

fig.suptitle(f"Liquid Water Content comparison with R = {corr.data:.2f}", fontsize="medium")
# fig.tight_layout()
ax_cc.plot((0, 3), (0, 3), "k--")

[<matplotlib.lines.Line2D at 0x2824d666ef0>]

##### Lag or smoothing does not help the correlation

In [None]:
lags = np.arange(-20, 21, 1)
corr = []
corr_cc = []
corr_ds = []


ds1 = safire_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)
ds2 = cloud_composite_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)

for lag in lags:
    corr.append(
        xr.corr(
            ds1.shift(time=lag),
            ds2,
        )
    )
    corr_cc.append(
        xr.corr(
            ds2.shift(time=lag),
            ds2,
        )
    )
    corr_ds.append(
        xr.corr(
            ds1.shift(time=lag),
            ds1,
        )
    )

fig, ax = plt.subplots()
ax.scatter(lags, corr, label="SAFIRE-CORE and COMPOSITE", marker="o")
ax.scatter(lags, corr_cc, marker="x", label="Auto COMPOSITE")
ax.scatter(lags, corr_ds, marker="+", label="Auto SAFIRE-CORE")
ax.set_xlabel("Lag (time steps)")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title("Correlation between Liquid Water Content with Different Lags")

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different Lags')

In [None]:
rolling = (1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 100)

corr = []
corr_both = []


ds1 = safire_shared["liquid_water_content"]
ds2 = cloud_composite_shared["liquid_water_content"]
for roll in rolling:
    ds1_roll = ds1.rolling(time=roll).mean(skipna=True)
    ds2_roll = ds2.rolling(time=roll).mean(skipna=True)

    corr.append(
        xr.corr(
            ds1_roll,
            ds2,
        )
    )
    corr_both.append(
        xr.corr(
            ds1_roll,
            ds2_roll,
        )
    )

fig, ax = plt.subplots()
ax.scatter(rolling, corr, label="SAFIRE-CORE rolling mean", marker="o")
ax.scatter(rolling, corr_both, marker="X", label="Both with rolling mean")
ax.set_xlabel("Rolling mean window size")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title("Correlation between Liquid Water Content with Different rolling mean window sizes")

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different rolling mean window sizes')

In [None]:
rolling = (1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 100)

corr = []
corr_both = []

lag = -2
ds1 = safire_shared["liquid_water_content"]
ds2 = cloud_composite_shared["liquid_water_content"]
for roll in rolling:
    ds1_roll = ds1.rolling(time=roll).mean(skipna=True)
    ds2_roll = ds2.rolling(time=roll).mean(skipna=True)

    corr.append(
        xr.corr(
            ds1_roll.shift(time=lag),
            ds2,
        )
    )
    corr_both.append(
        xr.corr(
            ds1_roll.shift(time=lag),
            ds2_roll,
        )
    )

fig, ax = plt.subplots()
ax.scatter(rolling, corr, label="SAFIRE-CORE rolling mean", marker="o")
ax.scatter(rolling, corr_both, marker="X", label="Both with rolling mean")
ax.set_xlabel("Rolling mean window size")
ax.set_ylabel("Correlation coefficient")
ax.legend()
ax.set_title(
    "Correlation between Liquid Water Content with Different with lag -2 and rolling mean window sizes"
)

Text(0.5, 1.0, 'Correlation between Liquid Water Content with Different with lag -2 and rolling mean window sizes')

### Relative humidity to LWC relation

We can see that there are pretty high relative humidity values in the ``SAFIRE-CORE`` dataset.

In [None]:
mask = safire_shared["relative_humidity_1"] > 100

ds = safire_shared  # .where(mask)
cc = cloud_composite_shared  # .sel(time = ds.time)

above_102 = (ds["relative_humidity_1"] > 102).sum()
above_100 = (ds["relative_humidity_1"] > 100).sum()

fraction_100 = 100 * above_100 / np.isfinite(ds["relative_humidity_1"]).sum()
fraction_102 = 100 * above_102 / np.isfinite(ds["relative_humidity_1"]).sum()

# fig, axs = plt.subplots(ncols = 2, figsize = (9, 6))

fig = plt.figure()
gs = fig.add_gridspec(11, 21, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[1:, 0:9])
ax_ds = fig.add_subplot(gs[1:, 10:19], sharey=ax_cc, sharex=ax_cc)
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[10, 130], [-0.55, 2.5]],
    bins=[30, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


ax_cc.hist2d(ds["relative_humidity_1"], cc["liquid_water_content"], **style)
_, _, _, mappable = ax_ds.hist2d(ds["relative_humidity_1"], ds["liquid_water_content"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

for ax in [ax_cc, ax_ds]:
    ax.axvline(100, color="black", linestyle="--")
    ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax_cc.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
ax_cc.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_ds.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_cc.set_title("Cloud Composite LWC")
ax_ds.set_title("SAFIRE-CORE LWC")

title = f"Relative Humidity vs Liquid Water Content | {ds['relative_humidity_1'].attrs['source']}"
title += f"\n{fraction_100.data:.2f}% of meas. have RH > 100%"
title += f"\n{fraction_102.data:.2f}% of meas. have RH > 102%"

fig.suptitle(
    title,
    fontsize="medium",
)
# fig.tight_layout()

Text(0.5, 0.98, 'Relative Humidity vs Liquid Water Content |  Li-Cor : LI-7500A: 75H-2079\n3.50% of meas. have RH > 100%\n1.45% of meas. have RH > 102%')

In [None]:
mask = safire_shared["relative_humidity_2"] > 100

ds = safire_shared  # .where(mask)
cc = cloud_composite_shared  # .sel(time = ds.time)

above_102 = (ds["relative_humidity_2"] > 102).sum()
above_100 = (ds["relative_humidity_2"] > 100).sum()

fraction_100 = 100 * above_100 / np.isfinite(ds["relative_humidity_2"]).sum()
fraction_102 = 100 * above_102 / np.isfinite(ds["relative_humidity_2"]).sum()

# fig, axs = plt.subplots(ncols = 2, figsize = (9, 6))

fig = plt.figure()
gs = fig.add_gridspec(11, 21, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[1:, 0:9])
ax_ds = fig.add_subplot(gs[1:, 10:19], sharey=ax_cc, sharex=ax_cc)
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[10, 130], [-0.55, 2.5]],
    bins=[30, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


ax_cc.hist2d(ds["relative_humidity_2"], cc["liquid_water_content"], **style)
_, _, _, mappable = ax_ds.hist2d(ds["relative_humidity_2"], ds["liquid_water_content"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

for ax in [ax_cc, ax_ds]:
    ax.axvline(100, color="black", linestyle="--")
    ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax_cc.set_xlabel(label_from_attrs(ds["relative_humidity_2"]))
ax_cc.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_ds.set_xlabel(label_from_attrs(ds["relative_humidity_2"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_cc.set_title("Cloud Composite LWC")
ax_ds.set_title("SAFIRE-CORE LWC")

title = f"Relative Humidity vs Liquid Water Content | {ds['relative_humidity_2'].attrs['source']}"
title += f"\n{fraction_100.data:.2f}% of meas. have RH > 100%"
title += f"\n{fraction_102.data:.2f}% of meas. have RH > 102%"

fig.suptitle(
    title,
    fontsize="medium",
)

# fig.tight_layout()

Text(0.5, 0.98, 'Relative Humidity vs Liquid Water Content |  Li-Cor : LI-7500A: 75H-2079\n0.14% of meas. have RH > 100%\n0.00% of meas. have RH > 102%')

In [None]:
ds = safire_shared

fig = plt.figure()
gs = fig.add_gridspec(9, 11, hspace=1, wspace=1)

ax = fig.add_subplot(gs[1:, 0:9])
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[20, 130], [20, 130]],
    bins=[50, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


_, _, _, mappable = ax.hist2d(ds["relative_humidity_1"], ds["relative_humidity_2"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

ax.axvline(100, color="black", linestyle="--")
ax.axhline(100, color="black", linestyle="--")
# ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
ax.set_ylabel(label_from_attrs(ds["relative_humidity_2"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))

# fig.suptitle(
#     fontsize="medium",
# )
# fig.tight_layout()

Text(0, 0.5, 'Relative humidity $\\left[  \\%  \\right]$')

### Single flight track

We can see very high relative humidity values, exceeding 110%, which should not be the case.
There can be multiple explanations for this:
1. The calculations we use are not good. Maybe we need to consider the temperature dependecy of $L_V$ more. But we already use the most accurate estimation by Murphy and Koop!
2. The temperature and absolute humidity values are not well related to each other. BUT the values exceed 100% for a consistent timeperiod  

In [None]:
single_flight_time_slice = slice("2020-02-13T08:45:00", "2020-02-13T11:00")
ds = safire.sel(time=single_flight_time_slice)
cc = cloud_composite.sel(time=single_flight_time_slice)
ic = identified_clusters.sel(time=single_flight_time_slice)

Liquid water content

In [None]:
plt.figure(figsize=(3, 3))
plt.plot(
    ds["longitude"],
    ds["latitude"],
    marker="None",
    label="Flight track",
)
plt.scatter(
    ic["longitude"],
    ic["latitude"],
    color="k",
    label="identified clusters",
)

plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Flight track and identified clusters")

Text(0.5, 1.0, 'Flight track and identified clusters')

In [None]:
fig, ax = plt.subplots()

ax.plot(
    ds["time"],
    ds["liquid_water_content"],
    label="SAFIRE-CORE",
)

ax.plot(
    cc["time"],
    cc["liquid_water_content"],
    label="Composite",
)

ax.set_xlabel("Time")
ax.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax.legend()
# ax.set_yscale("log")
# ax.set_ylim(0, None)

<matplotlib.legend.Legend at 0x28363a72f20>

In [None]:
fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(9, 6))

for ax in axs:
    i = 0
    for s, e in zip(ic["start"], ic["end"]):
        if i == 0:
            label = "Rain Clouds"
        else:
            label = None
        ax.axvspan(s.data, e.data, color="grey", alpha=0.5, label=label)
        i += 1
twinx_axs = [ax.twinx() for ax in axs]


ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

# SETUP SECOND PLOT

ds["relative_humidity_2"].plot(
    ax=axs[1],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_2"].where(ds["relative_humidity_2"] > 100).plot(
    ax=axs[1],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_2"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[1],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_2"] > 100).plot(
    ax=twinx_axs[1],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)


fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[1].set_ylabel(label_from_attrs(ah))
axs[1].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])
axs[1].set_title(ds["absolute_humidity_2"].attrs["comment"])

fig.tight_layout()

In [None]:
fig, axs = plt.subplots(nrows=1, sharex=True, figsize=(9, 4.5))
axs = [axs]
twinx_axs = [ax.twinx() for ax in axs]

ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[0].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])

fig.tight_layout()

In [None]:
# plot the trajectory of the ATR aircraft in 3D with colors indicating the relative humidity

longitude = ds["longitude"]
latitude = ds["latitude"]
altitude = ds["altitude"]
# color = ds['time'].where(mask)
color = ds["relative_humidity_1"].rolling(time=10).mean()


fig = plt.figure(figsize=(8, 6))
gs = fig.add_gridspec(10, 100)

ax = fig.add_subplot(gs[:, :], projection="3d")
cax = fig.add_subplot(gs[2:-2, -3:])

sc = ax.scatter(longitude, latitude, altitude, c=color, cmap="Blues", marker=".")
ax.scatter(
    longitude.where(color >= 100),
    latitude.where(color >= 100),
    altitude.where(color >= 100),
    color="red",
    marker="X",
    zorder=10,
)
fig.colorbar(cax=cax, mappable=sc, label=label_from_attrs(relative_humidity))

ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_zlabel("Altitude")

flight_Numbers = np.unique((ds["flight_number"])[~np.isnan((ds["flight_number"]))])
ax.set_title(f"ATR trajectory with relative humidity for flight NUM {flight_Numbers}")

# fig.tight_layout()

Text(0.5, 0.92, 'ATR trajectory with relative humidity for flight NUM [19.]')

# Optimize the linear fits of the thermodynamic profiles

### Make sure that we can reconstruct the specific humidity from the relative humidity

In [None]:
import sdm_eurec4a.conversions as sdm_conversions
from importlib import reload

reload(sdm_conversions)
from tqdm import tqdm

q_v = drop_sondes["specific_humidity"]
t = drop_sondes["air_temperature"]
p = drop_sondes["pressure"]

rh1 = sdm_conversions.relative_humidity_from_tps(
    specific_humidity=q_v, temperature=t, pressure=p, simplified=True
)

rh2 = sdm_conversions.relative_humidity_from_tps(
    specific_humidity=q_v, temperature=t, pressure=p, simplified=False
)

q_v1 = sdm_conversions.specific_humidity_from_relative_humidity_temperature_pressure(
    relative_humidity=rh1, temperature=t, pressure=p, simplified=True
)

max_relative_error = np.max((q_v1 - q_v) / q_v)

print(f"Max relative error while reconstructing specific humidity: {max_relative_error.data:.2e}")

Max relative error while reconstructing specific humidity: 3.09e-07


identified_clusters

In [None]:
cloud_id = np.random.choice(identified_clusters["cloud_id"].values)


# time_slice = slice(ic.start.data, ic.end.data)
# extended_time_slice = slice(ic.start.data - np.timedelta64(20, "s"), ic.end.data + np.timedelta64(20, "s"))
# extended_time_slice = time_slice
# ic = identified_clusters.sel(time = single_flight_time_slice)
ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

drop_sondes_selected = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(2, "h"),
    max_spatial_distance=0.5e2,
)
drop_sondes_selected_large = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(12, "h"),
    max_spatial_distance=1e2,
)

if drop_sondes_selected["time"].size == 0:
    raise ValueError("No dropsondes found for the selected cloud")

cloud_composite_selected = match_clouds_and_cloudcomposite(
    ds_clouds=ic,
    ds_cloudcomposite=cloud_composite,
)


# cloud_composite_selected = cloud_composite_selected.sel(time = time_slice)
safire_selected = safire.sel(time=cloud_composite_selected["time"])

plt.plot(
    1e2 * drop_sondes_selected["relative_humidity"].T,
    drop_sondes_selected["altitude"],
    color="grey",
    alpha=0.5,
)

plt.plot(
    1e2 * drop_sondes_selected_large["relative_humidity"].T,
    drop_sondes_selected_large["altitude"],
    color="grey",
    alpha=0.1,
)


plt.axhline(
    cloud_composite_selected["altitude"].mean(), color="grey", linestyle="-", alpha=0.3, zorder=0
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_1"], dims=("time",))
m, sem = safire_selected["relative_humidity_1"].mean(skipna=True), safire_selected[
    "relative_humidity_1"
].std(skipna=True)

plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="red",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_2"], dims=("time",))
m, sem = safire_selected["relative_humidity_2"].mean(skipna=True), safire_selected[
    "relative_humidity_2"
].std(skipna=True)
plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="blue",
)
plt.ylim(0, 1200)

(0.0, 1200.0)

# All clouds

In [None]:
safire_humidity = []
drop_sondes_humidity = []

for cloud_id in identified_clusters["cloud_id"]:

    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)
    sc = safire.sel(time=slice(ic["start"], ic["end"]))
    ds = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )
    safire_humidity.append(sc["relative_humidity_1"].mean())
    drop_sondes_humidity.append(ds["relative_humidity"].mean("time"))

In [None]:
safire_humidity = xr.DataArray(
    safire_humidity,
    dims=("cloud_id",),
    coords=dict(cloud_id=identified_clusters.swap_dims({"time": "cloud_id"})["cloud_id"]),
)
drop_sondes_humidity = xr.DataArray(
    drop_sondes_humidity,
    dims=("cloud_id", "altitude"),
    coords=dict(
        cloud_id=identified_clusters.swap_dims({"time": "cloud_id"})["cloud_id"],
        altitude=drop_sondes["altitude"],
    ),
)

In [None]:
from scipy.optimize import Bounds


class InitialAndBounds(object):

    def __init__(self, x0, bounds):
        self.x0 = x0
        self.bounds = bounds

    def __call__(self, *args, **kwargs):
        return self.x0, self.bounds


class RelativeHumidityIB(InitialAndBounds):

    def __init__(self):

        f_0 = [0.6, 0.85, 1]  # surface humidity in 1
        slope_1 = [0.2 / 1000, 0.4 / 1000, 1 / 1000]  # slope in 1 / 1000 m
        bounds = Bounds(
            lb=[f_0[0], slope_1[0]],  # saturation_value[0]],
            ub=[f_0[2], slope_1[2]],  # saturation_value[2]],
        )

        x0 = f_0[1], slope_1[1]  # , saturation_value[1]

        super().__init__(
            x0=x0,
            bounds=bounds,
        )

### Unweighted

In [None]:
result = {}
for cloud_id in tqdm(identified_clusters["cloud_id"]):

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    ds = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    if ds["time"].size <= 3:
        pass
        # print(f"No dropsondes found for the selected cloud {cloud_id}")
    else:

        y_train = ds["relative_humidity"]
        t_train = ds["altitude"].expand_dims(time=ds["time"])

        y_train = y_train.transpose("time", "altitude")
        t_train = t_train.transpose("time", "altitude")

        thermo_fit = sdm_models.FixedSaturatedLinearLeastSquare(
            name="test",
            x0=RelativeHumidityIB().x0,
            bounds=RelativeHumidityIB().bounds,
            y_train=y_train.sel(altitude=slice(200, 500)),
            t_train=t_train.sel(altitude=slice(200, 500)),
        )

        thermo_fit.fit(1)

        result[str(cloud_id)] = thermo_fit.parameters

100%|██████████| 317/317 [00:40<00:00,  7.92it/s]


In [None]:
fitted_relative_humidity = []
x_split = []
for cloud_id in result:
    cloud_id = int(cloud_id)

    parameters = result[str(cloud_id)]
    x_s = (1 - parameters["f_0"]) / parameters["slope_1"]

    x = drop_sondes["altitude"]
    y = sdm_models.saturated_linear_func(
        x=x,
        saturation_value=1,
        **result[str(cloud_id)],
    )
    y = 100 * y
    attrs = {
        "long_name": "Relative Humidity",
        "units": "\%",
    }
    y = xr.DataArray(y, dims=x.dims, coords=x.coords, attrs=attrs)
    fitted_relative_humidity.append(y)
    x_split.append(x_s)


fitted_relative_humidity = xr.concat(fitted_relative_humidity, dim="cloud_id")
fitted_relative_humidity["cloud_id"] = np.array(list(result.keys())).astype(int)
x_split = xr.DataArray(x_split, dims="cloud_id")
x_split.attrs.update(
    long_name="RH saturation altitude",
    units="m",
)
x_split["cloud_id"] = fitted_relative_humidity["cloud_id"]

In [None]:
fig, ax = plt.subplots()

ax.plot(
    fitted_relative_humidity.T,
    fitted_relative_humidity["altitude"],
    color="k",
    alpha=0.2,
)

ax.set_ylabel(label_from_attrs(fitted_relative_humidity))
ax.set_xlabel(label_from_attrs(fitted_relative_humidity["altitude"]))

ax.set_ylim(0, 1800)

ax.set_title(f"Fitted Relative Humidity profiles for all {len(result)} clouds")

Text(0.5, 1.0, 'Fitted Relative Humidity profiles for all 254 clouds')

In [None]:
fig, ax = plt.subplots()

ax.scatter(
    x_split,
    identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=x_split["cloud_id"])["altitude"],
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(x_split))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
ax.plot([500, 1400], [500, 1400], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")
ax.set_xlim(400, 1600)
ax.set_ylim(400, 1600)

(400.0, 1600.0)

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist2d(
    safire_humidity.sel(cloud_id=cloud_id),
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(fitted_relative_humidity))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist2d(
    1e2 * drop_sondes_humidity.sel(cloud_id=cloud_id).sel(altitude=altitudes, method="nearest"),
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(fitted_relative_humidity))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist(
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(x_split))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
# ax.plot([500, 1800], [500, 1800], color = 'black', linestyle = '--')
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

### Weighted

In [None]:
from typing import Tuple, Dict, Union, Callable, TypedDict
import numpy as np
import xarray as xr
from scipy.optimize import least_squares, Bounds
from inspect import signature


def __annotation_dict__(func: Callable):
    """This funciton returns a TypedDict from a function"""

    d = dict(signature(func).parameters)

    annotations = dict()
    # defaults = dict()

    for key in d:
        annotation, default = d[key].annotation, d[key].default
        annotations[key] = annotation
        # defaults[key] = default

    return annotations


class LeastSquareFit:
    """
    A class to perform least squares fitting using scipy.optimize's least_squares function.
    The fit works also for multidimensional data.
    We try to estiamte the parameters (x) of the funciton F(t, x). With x can be a vector of parameters.
    The dependent variable in this case is y, which is a function of t and x: y = F(t, x)
    The provided function needs to be in the form of y = F(t, x), where t is the independent variable and x are the parameters to be estimated.
    The cost function is used to minimize the difference between the predicted and the actual data.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:
        fit(repetitions: int = 1):
            Perform the fitting process. Can repeat the fitting multiple times.

        predict(t_test: Union[np.ndarray, xr.DataArray]) -> Tuple[Union[np.ndarray, xr.DataArray], Union[np.ndarray, xr.DataArray]]:
            Predict the dependent variable using the fitted model for given test data.
    """

    def __init__(
        self,
        name: str,
        func: Callable,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: Union[np.ndarray, xr.DataArray],
        y_train: Union[np.ndarray, xr.DataArray],
        cost_func: Union[Callable, None] = None,
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
    ):
        """
        Initialize the LeastSquareFit instance.

        Parameters:
            name (str): The name of the fitting instance.
            func (Callable): The model function to fit.
            cost_func (Callable or None): The cost function to minimize. If None, a default cost function is used based on the model function.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
            y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
            fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
            plot_kwargs (Dict): Additional keyword arguments for plotting.
            parameters (Dict): Final parameters for the model function after the fit.
        """
        self.name = name
        self.func = func

        # set the cost function
        self.cost_func = cost_func
        self.x0 = x0
        self.bounds = bounds
        self.t_train = t_train
        self.y_train = y_train
        self.fit_kwargs = fit_kwargs
        self.plot_kwargs = plot_kwargs
        self.fit_result = None

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff

    # def __weighted_cost_function__(
    #         self,
    #         x : np.ndarray,
    #         t : np.ndarray,
    #         y : np.ndarray,
    #         w : np.ndarray,
    #         **kwargs,
    #         ) -> np.ndarray :
    #     """
    #     Apply a weighted factor to the cost function

    #     Parameters:
    #     -----
    #     """

    # The model function
    @property
    def func(self):
        """The model function to fit."""
        return self._func

    @func.setter
    def func(self, func: Callable):
        self._func = func

    @func.getter
    def func(self) -> Callable:
        return self._func

    # The cost function to minimize in the least squares fitting
    @property
    def cost_func(self):
        """
        The cost function to minimize.
        If no cost function is provided, a default cost function is used based on the model function.
        It is explained in the __default_cost_func__ method.
        """
        return self._cost_func

    @cost_func.setter
    def cost_func(self, cost_func: Union[Callable, None]):
        # set the cost function to the default cost function if None is given
        if cost_func is None:
            self._cost_func = self.__default_cost_func__
        else:
            self._cost_func = cost_func

    @cost_func.getter
    def cost_func(self):
        return self._cost_func

    # ------------
    # Properties for parameters
    @property
    def x0(self):
        """Initial guess for the parameters."""
        return self._x0

    @x0.setter
    def x0(self, x0: np.ndarray):
        # validate that x0 fits the model function
        # self.ParameterDict()
        keys = list(self.ParameterDict.keys())
        if len(x0) != len(keys) - 1:
            raise ValueError(
                f"Initial guess x0 has {len(x0)} elements, but the model function has {len(keys)-1} parameters with parameter keys {keys[1:]}"
            )

        self._x0 = x0

    @x0.getter
    def x0(self) -> np.ndarray:
        return self._x0

    @property
    def x_guess(self):
        """
        The guess of the parameters.
        It is updated after each fit.
        This property is read-only.
        """

        if self.fit_result is None:
            return self.x0
        else:
            return self.fit_result.x

    def __x_to_parameters__(self, x: np.ndarray) -> dict:
        """This function converts the list of parameter values x to a dictionary with the parameter names given by the function annotations."""
        annotations = __annotation_dict__(self.func)
        keys = list(annotations.keys())
        # ignore the first argument, as it is the independent variable
        keys = keys[1:]

        return dict(zip(keys, x))

    @property
    def parameters(self):
        """
        Final parameters for the model function after the fit.
        The parameters are stored in a dictionary with the parameter names given by the function annotations.
        This property is read-only.
        """
        return self.__x_to_parameters__(self.x_guess)

    @property
    def ParameterDict(self):
        """
        The TypedDict for the parameters of the model function.
        This property is read-only.
        """
        return __annotation_dict__(self.func)

    @property
    def bounds(self):
        """
        Bounds on the parameters.
        This is a Bounds object from scipy.optimize.
        """
        return self._bounds

    @bounds.setter
    def bounds(self, bounds: Bounds):
        self._bounds = bounds

    @bounds.getter
    def bounds(self) -> Bounds:
        return self._bounds

    def fit(self, repetitions: int = 1):
        """
        Perform the fitting process. Can repeat the fitting multiple times.

        Parameters:
            repetitions (int): The number of times to repeat the fitting process. Default is 1.

        Returns:
            The result of the fitting process.
        """
        for i in np.arange(repetitions):

            self.fit_result = least_squares(
                self.cost_func,
                x0=self.x_guess,
                bounds=self.bounds,
                args=(np.ravel(self.t_train), np.ravel(self.y_train)),
                **self.fit_kwargs,
            )

    def predict(
        self, t_test: Union[np.ndarray, xr.DataArray]
    ) -> Tuple[Union[np.ndarray, xr.DataArray], Union[np.ndarray, xr.DataArray]]:
        """
        Predict the dependent variable using the fitted model for given test data.

        Parameters:
            t_test (Union[np.ndarray, xr.DataArray]): Test data for the independent variable.

        Returns:
            Tuple[Union[np.ndarray, xr.DataArray], Union[np.ndarray, xr.DataArray]]: The test data and the predicted dependent variable.
        """
        return t_test, self.func(t_test, **self.parameters)


def linear_func(x: np.ndarray, f_0: float = 2, slope: float = 1):
    """
    Linear function.

    :math:`y = slope * x + f_0`
    """
    return slope * x + f_0


def split_linear_func(
    x: np.ndarray, f_0: float = 2, slope_1: float = 1, slope_2: float = 2, x_split: float = 800
):
    """
    Split the array x into two arrays at the point x_split. The function is the
    concatenation of two linear functions with different slopes.

    :math:`y_1 = slope_1 * x + f_0` for x <= x_split
    :math:`y_2 = slope_2 * x + f_0 + (slope_1 - slope_2) * x_split` for x > x_split

    Parameters
    ----------
    x : np.ndarray
        The input array
    f_0 : float, optional
        The y-intercept, by default 2
    slope_1 : float, optional
        The slope of the first linear function, by default 1
    slope_2 : float, optional
        The slope of the second linear function, by default 2
    x_split : float, optional
        The x value at which the array is split, by default 800

    Returns
    -------
    np.ndarray
        The sum of the two linear functions

    Examples
    --------
    >>> x = np.arange(0, 1000, 100)
    >>> split_linear(x, f_0=2, slope_1=1, slope_2=2, x_split=800)
    array([  2., 102., 202., 302., 402., 502., 602., 702., 802., 902.])
    """
    x_1 = np.where(x <= x_split, x, np.nan)
    x_2 = np.where(x > x_split, x, np.nan)

    y_1 = linear_func(x=x_1, f_0=f_0, slope=slope_1)
    y_2 = linear_func(x=x_2, f_0=f_0 + (slope_1 - slope_2) * x_split, slope=slope_2)

    y_1 = np.where(x > x_split, 0, y_1)
    y_2 = np.where(x <= x_split, 0, y_2)
    return y_1 + y_2


def saturated_linear_func(
    x: np.ndarray, f_0: float = 2, slope_1: float = 1, saturation_value: float = 1
):
    """
    This function is a linear function that saturates at a certain value.
    Above this value, the function is constant.

    Parameters
    ----------
    x : np.ndarray
        The input array
    f_0 : float, optional
        The y-intercept, by default 2
    slope_1 : float, optional
        The slope of the linear function, by default 1
    saturation_value : float, optional
        The value at which the function saturates, by default 1

    Returns
    -------
    np.ndarray
        The computed saturated linear function
    """

    x_split = (saturation_value - f_0) / slope_1
    return split_linear_func(x=x, f_0=f_0, slope_1=slope_1, slope_2=0, x_split=x_split)


class FixedSaturatedLinearLeastSquare(LeastSquareFit):
    """
    A class to perform least squares fitting for a saturated linear function for a fixed saturation value.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:

    """

    def __init__(
        self,
        name: str,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: np.ndarray,
        y_train: np.ndarray,
        weight: np.ndarray = None,
        saturation_values: float = 1,
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
    ):
        """
        Initialize the SaturatedLinearLeastSquare instance.

        Parameters:
            name (str): The name of the fitting instance.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (np.ndarray): Training data for the independent variable.
            y_train (np.ndarray): Training data for the dependent variable.
            saturation_values (float): The fixed saturation value for the function.
        """

        def fixed_saturation(x, f_0, slope_1):
            """
            This func is a linear function that saturates at 1.
            """

            return saturated_linear_func(x, f_0, slope_1, saturation_value=saturation_values)

        super().__init__(
            name=name,
            func=fixed_saturation,
            x0=x0,
            bounds=bounds,
            t_train=t_train,
            y_train=y_train,
            fit_kwargs=fit_kwargs,
            plot_kwargs=plot_kwargs,
        )

        if weight is not None:
            np.testing.assert_array_equal(t_train.shape, weight.shape)
            self.weight = weight
            self.cost_func = self.__weighted_cost_function__

        else:
            self.cost_func = self.__default_cost_func

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff

    def __weighted_cost_function__(
        self,
        x: np.ndarray,
        t: np.ndarray,
        y: np.ndarray,
        **kwargs,
    ) -> np.ndarray:
        """
        Apply a weighted factor to the cost function

        Parameters:
        -----
        """

        y_fit = self.func(t, *x)
        diff = y - y_fit
        diff, weight = np.ravel(diff), np.ravel(self.weight)

        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        weight = weight[idx]
        weight = weight / np.nansum(weight)

        return np.ravel(diff * weight)

In [None]:
distance_swap = distance.swap_dims({"time_identified_clouds": "cloud_id"})
distance_swap

In [None]:
result = {}
for cloud_id in tqdm(identified_clusters.swap_dims({"time": "cloud_id"})["cloud_id"]):

    cloud_id = int(cloud_id)
    ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

    sondes = match_clouds_and_dropsondes(
        ds_clouds=ic,
        ds_sonde=drop_sondes,
        ds_distance=distance,
        max_temporal_distance=np.timedelta64(3, "h"),
        max_spatial_distance=1e2,
    )

    if sondes["time"].size <= 3:
        pass
        # print(f"No dropsondes found for the selected cloud {cloud_id}")
    else:

        time = sondes["time"]
        altitude = sondes["altitude"]

        y_train = sondes["relative_humidity"]
        t_train = sondes["altitude"].expand_dims(time=time)

        # dist = distance_swap.sel(
        #         cloud_id = cloud_id
        #     ).sel(
        #         time_drop_sondes = time
        #     )
        # temp_dist = dist['temporal_distance'].astype(float) * 1e-9 / 60 / 60
        # space_dist = dist['spatial_distance']

        # temp_dist = np.abs(temp_dist)
        # temp_dist = (temp_dist) / temp_dist.max()
        # temp_dist = 1 - temp_dist
        # space_dist = (space_dist) / space_dist.max()
        # space_dist = 1 - space_dist
        # combined_dist = (temp_dist ** 2 + space_dist ** 2) ** 0.5

        # weight = combined_dist.copy()

        y_train = y_train.transpose("time", "altitude")
        t_train = t_train.transpose("time", "altitude")

        y_train = y_train.sel(altitude=slice(100, 700))
        t_train = t_train.sel(altitude=slice(100, 700))

        w = y_train.mean(dim="altitude")
        w = w - w.min()
        w = w / w.max()
        weight = w
        weight = weight.expand_dims(altitude=altitude)
        weight = weight.transpose("time", "altitude")

        y_train = y_train.sel(altitude=slice(200, 500))
        t_train = t_train.sel(altitude=slice(200, 500))
        weight = weight.sel(altitude=slice(200, 500))

        thermo_fit = FixedSaturatedLinearLeastSquare(
            name="test",
            x0=RelativeHumidityIB().x0,
            bounds=RelativeHumidityIB().bounds,
            y_train=y_train,
            t_train=t_train,
            weight=weight,
        )

        thermo_fit.fit(10)

        result[str(cloud_id)] = thermo_fit.parameters


cloud_ids = np.array(list(result.keys())).astype(int)

100%|██████████| 317/317 [00:39<00:00,  7.98it/s]


In [None]:
fitted_relative_humidity = []
x_split = []
for cloud_id in cloud_ids:

    parameters = result[str(cloud_id)]
    x_s = (1 - parameters["f_0"]) / parameters["slope_1"]

    x = drop_sondes["altitude"]
    y = sdm_models.saturated_linear_func(
        x=x,
        saturation_value=1,
        **result[str(cloud_id)],
    )
    y = 100 * y
    attrs = {
        "long_name": "Relative Humidity",
        "units": "\%",
    }
    y = xr.DataArray(y, dims=x.dims, coords=x.coords, attrs=attrs)
    fitted_relative_humidity.append(y)
    x_split.append(x_s)


fitted_relative_humidity = xr.concat(fitted_relative_humidity, dim="cloud_id")
fitted_relative_humidity["cloud_id"] = np.array(list(result.keys())).astype(int)
x_split = xr.DataArray(x_split, dims="cloud_id", coords=dict(cloud_id=cloud_ids))
x_split.attrs.update(
    long_name="RH saturation altitude",
    units="m",
)
x_split["cloud_id"] = fitted_relative_humidity["cloud_id"]

In [None]:
fig, ax = plt.subplots()

ax.plot(
    fitted_relative_humidity.T,
    fitted_relative_humidity["altitude"],
    color="k",
    alpha=0.2,
    linestyle="-",
)
ax.set_title("")
ax.set_ylim(0, 1500)

(0.0, 1500.0)

In [None]:
cloud_id = np.random.choice(np.array(list(result.keys())).astype(int))

# time_slice = slice(ic.start.data, ic.end.data)
# extended_time_slice = slice(ic.start.data - np.timedelta64(20, "s"), ic.end.data + np.timedelta64(20, "s"))
# extended_time_slice = time_slice
# ic = identified_clusters.sel(time = single_flight_time_slice)
ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

drop_sondes_selected = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(2, "h"),
    max_spatial_distance=1e2,
)
drop_sondes_selected_large = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(12, "h"),
    max_spatial_distance=1e2,
)

if drop_sondes_selected["time"].size == 0:
    raise ValueError("No dropsondes found for the selected cloud")

cloud_composite_selected = match_clouds_and_cloudcomposite(
    ds_clouds=ic,
    ds_cloudcomposite=cloud_composite,
)


# cloud_composite_selected = cloud_composite_selected.sel(time = time_slice)
safire_selected = safire.sel(time=cloud_composite_selected["time"])


fig, ax = plt.subplots()

ax.plot(
    1e2 * drop_sondes_selected["relative_humidity"].T,
    drop_sondes_selected["altitude"],
    color="grey",
    alpha=0.5,
)

ax.plot(
    1e2 * drop_sondes_selected_large["relative_humidity"].T,
    drop_sondes_selected_large["altitude"],
    color="grey",
    alpha=0.1,
)


ax.axhline(
    cloud_composite_selected["altitude"].mean(),
    color="k",
    linestyle=":",
    alpha=1,
    zorder=0,
    label="ATR altitude",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_1"], dims=("time",))
m, sem = safire_selected["relative_humidity_1"].mean(skipna=True), safire_selected[
    "relative_humidity_1"
].std(skipna=True)

ax.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker="x",
    alpha=0.9,
    color="red",
    label="RH WVSS2",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_2"], dims=("time",))
m, sem = safire_selected["relative_humidity_2"].mean(skipna=True), safire_selected[
    "relative_humidity_2"
].std(skipna=True)
ax.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker="o",
    alpha=0.9,
    color="blue",
    label="RH LICOR",
)

ax.plot(
    fitted_relative_humidity.sel(cloud_id=cloud_id),
    fitted_relative_humidity["altitude"],
    color="k",
    alpha=1,
    linestyle="--",
)

ax.legend()
ax.set_ylim(0, 1200)
ax.set_xlim(50, 120)
ax.set_ylabel(r"Altitude [$m$]")
ax.set_xlabel(label_from_attrs(fitted_relative_humidity))
ax.set_title(f"Cloud {cloud_id}")

Text(0.5, 1.0, 'Cloud 302')

In [None]:
fig, ax = plt.subplots()

ax.scatter(
    x_split,
    identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=x_split["cloud_id"])["altitude"],
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(x_split))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
ax.plot([500, 1500], [500, 1500], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")
# ax.set_xlim(0, 1800)
ax.set_xlim(400, 1600)
ax.set_ylim(400, 1600)

(400.0, 1600.0)

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist2d(
    safire_humidity.sel(cloud_id=cloud_id),
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(fitted_relative_humidity))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist2d(
    1e2 * drop_sondes_humidity.sel(cloud_id=cloud_id).sel(altitude=altitudes, method="nearest"),
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    bins=np.arange(60, 110, 2),
)
ax.set_xlabel(label_from_attrs(safire_humidity))
ax.set_ylabel(label_from_attrs(fitted_relative_humidity))
ax.plot([60, 110], [60, 110], color="black", linestyle="--")
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')

In [None]:
fig, ax = plt.subplots()

cloud_id = fitted_relative_humidity["cloud_id"]
altitudes = identified_clusters.swap_dims({"time": "cloud_id"})["altitude"].sel(cloud_id=cloud_id)


ax.hist(
    fitted_relative_humidity.sel(altitude=altitudes, method="nearest"),
    color=default_colors[1],
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(x_split))
ax.set_ylabel(label_from_attrs(identified_clusters["altitude"]))
# ax.plot([500, 1800], [500, 1800], color = 'black', linestyle = '--')
ax.set_title("RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE")

Text(0.5, 1.0, 'RH saturation altitude from JOANNE\n vs. cloud altitude from SAFIRE-CORE')