In [None]:
from sdm_eurec4a import slurm_cluster as scluster

client, cluster = scluster.init_dask_slurm_cluster(
    scale=1,
    processes=16,
    walltime="00:35:00",
    memory="16GB",
    scheduler_options={"dashboard_address": ":8686"},
)

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /scratch/m/m301096/dask_logs/dask-worker-%J.err
#SBATCH -o /scratch/m/m301096/dask_logs/dask-worker-%J.out
#SBATCH -p compute
#SBATCH -A um1487
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem=15G
#SBATCH -t 00:35:00

/work/um1487/m301096/conda/envs/sdm_pysd_python312/bin/python -m distributed.cli.dask_worker tcp://10.128.6.211:41675 --name dummy-name --nthreads 1 --memory-limit 0.93GiB --nworkers 16 --nanny --death-timeout 60 --local-directory /scratch/m/m301096/dask_temp --interface ib0



In [None]:
from ruamel.yaml import YAML
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    set_paper_rcParams,
    label_from_attrs,
    adjust_lightness_array,
    plot_one_one,
    handler_map_alpha,
    save_figure,
    add_subplotlabel,
)
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a import data_loading
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.conversions import (
    msd_from_psd_dataarray,
    potential_temperature_from_temperature_pressure,
    relative_humidity_from_tps,
)
from sdm_eurec4a.input_processing import models as smodels
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite, match_clouds_and_dropsondes
from sdm_eurec4a.constants import TimeSlices


default_colors = set_paper_rcParams()
default_dark_colors = adjust_lightness_array(default_colors, 0.75)

RepoPaths = RepositoryPath("levante")

fig_dir = RepoPaths.fig_dir / "paper-v4.2/observations"
fig_dir.mkdir(parents=False, exist_ok=True)

OBS_data_dir = RepoPaths.data_dir
input_data_dir = OBS_data_dir / Path("model/input_v4.2")
CLEO_data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.2")

fig_dir = RepoPaths.fig_dir / Path("paper-v4.2/observations")
fig_dir.mkdir(exist_ok=True, parents=False)

<!-- ## Data loading -->

In [None]:
cloud_composite = xr.open_dataset(
    OBS_data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"),
)
dropsonde = xr.open_dataset(
    OBS_data_dir / Path("observation/dropsonde/processed/drop_sondes.nc"),
)
ds_distances = xr.open_dataset(
    OBS_data_dir
    / Path("observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"),
)

identified_clusters = xr.open_dataset(
    OBS_data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

# attrs = cloud_composite["radius"].attrs.copy()
# attrs.update({"units": "µm"})
# cloud_composite["radius"] = cloud_composite["radius"]
# cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
# cloud_composite["radius"].attrs = attrs

cloud_composite["radius2D"] = cloud_composite["radius"].expand_dims(time=cloud_composite["time"])
cloud_composite = cloud_composite.transpose("radius", ...)


# convert lwc and MSD to g m-3
# attrs = cloud_composite["liquid_water_content"].attrs.copy()
# attrs.update({"unit": "g m^{-3}"})
# cloud_composite['liquid_water_content'] = cloud_composite['liquid_water_content']
# cloud_composite['liquid_water_content'].attrs = attrs

attrs = cloud_composite["mass_size_distribution"].attrs.copy()
attrs.update({"unit": "g m^{-3} m^{-1}"})
cloud_composite["mass_size_distribution"] = cloud_composite["mass_size_distribution"] * 1e3
cloud_composite["mass_size_distribution"].attrs = attrs

cloud_composite = cloud_composite.sel(radius=slice(10e-6, None))

identified_clusters = identified_clusters.where(
    (
        (identified_clusters.duration.dt.seconds >= 3)
        & (identified_clusters.altitude < 1200)
        & (identified_clusters.altitude > 500)
    ),
    drop=True,
)
# identified_clusters = identified_clusters.isel(cloud_id=slice(0, 20))

In [None]:
# load valid cloud ids
yaml = YAML(typ="safe")  # default, if not specfied, is 'rt' (round-trip)
d = yaml.load(CLEO_data_dir / Path("valid_cloud_ids.yaml"))
valid_cloud_ids = d["valid_cloud_ids"]


identified_clusters = identified_clusters.sel(cloud_id=valid_cloud_ids)

In [None]:
list_lwc = []
list_lwc_sem = []
list_lwc_50um = []
list_lwc_50um_sem = []

list_nbc = []
list_nbc_sem = []
list_nbc_50um = []
list_nbc_50um_sem = []
i = 0
N = len(identified_clusters["cloud_id"])
for cloud_id in identified_clusters["cloud_id"]:
    # i += 1
    # print(f"Processing cloud {i}/{N}")
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=cloud_composite,
    )
    cc = cc.expand_dims(cloud_id=[int(cloud_id.values)])

    lwc = cc["liquid_water_content"]

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(lwc, dims=("time",))
    list_lwc.append(lwc_mean)
    list_lwc_sem.append(lwc_sem)

    lwc_above_50um = (
        (cc["mass_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    lwc_above_50um_mean, lwc_above_50um_sem = mean_and_stderror_of_mean(lwc_above_50um, dims=("time",))
    list_lwc_50um.append(lwc_above_50um_mean)
    list_lwc_50um_sem.append(lwc_above_50um_sem)

    nbc = cc["particle_size_distribution"] * cc["bin_width"]  # .sum("radius")

    nbc_mean, nbc_sem = mean_and_stderror_of_mean(nbc, dims=("time",))
    list_nbc.append(nbc_mean)
    list_nbc_sem.append(nbc_sem)

    nbc_above_50um = (
        (cc["particle_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    nbc_above_50um_mean, nbc_above_50um_sem = mean_and_stderror_of_mean(nbc_above_50um, dims=("time",))
    list_nbc_50um.append(nbc_above_50um_mean)
    list_nbc_50um_sem.append(nbc_above_50um_sem)


da_lwc = xr.concat(
    list_lwc,
    dim="cloud_id",
)
da_lwc.attrs = dict(
    long_name="Liquid water content",
    units="g m^{-3}",
)

da_lwc_sem = xr.concat(
    list_lwc_sem,
    dim="cloud_id",
)
da_lwc_sem.attrs = dict(
    long_name="Standard error of the mean of the liquid water content",
    units="g m^{-3}",
)

da_lwc_50um = xr.concat(
    list_lwc_50um,
    dim="cloud_id",
)
da_lwc_50um.attrs = dict(
    long_name="Rain Water Content",
    units="g m^{-3}",
)

da_lwc_50um_sem = xr.concat(
    list_lwc_50um_sem,
    dim="cloud_id",
)
da_lwc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the Rain Water Content",
    units="g m^{-3}",
)

da_nbc = xr.concat(
    list_nbc,
    dim="cloud_id",
)
da_nbc.attrs = dict(
    long_name="Number concentration",
    units="m^{-3}",
)

da_nbc_sem = xr.concat(
    list_nbc_sem,
    dim="cloud_id",
)
da_nbc_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration",
    units="m^{-3}",
)

da_nbc_50um = xr.concat(
    list_nbc_50um,
    dim="cloud_id",
)
da_nbc_50um.attrs = dict(
    long_name="Number concentration above 50 µm",
    units="m^{-3}",
)

da_nbc_50um_sem = xr.concat(
    list_nbc_50um_sem,
    dim="cloud_id",
)
da_nbc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration above 50 µm",
    units="m^{-3}",
)


ds_observations_backup = xr.Dataset(
    dict(
        liquid_water_content=da_lwc,
        liquid_water_content_sem=da_lwc_sem,
        liquid_water_content_50um=da_lwc_50um,
        liquid_water_content_50um_sem=da_lwc_50um_sem,
        number_concentration=da_nbc,
        number_concentration_sem=da_nbc_sem,
        number_concentration_50um=da_nbc_50um,
        number_concentration_50um_sem=da_nbc_50um_sem,
    )
)

In [None]:
# ds_observations_backup["liquid_water_content"].plot()
# ds_observations_backup["liquid_water_content_50um"].plot()

In [None]:
ds_observations = ds_observations_backup

cleo_dataset = data_loading.CleoDataset(
    data_dir=CLEO_data_dir,
    microphysics=("null_microphysics",),
)
# get physicsal height cleo output data
ds_cleo, ds_cleo_sem = cleo_dataset()


# convert liquid water content to g m-3
attrs = ds_cleo["liquid_water_content"].attrs.copy()
attrs.update({"units": "g m^{-3}"})
attrs.update({"long_name": "Rain Water Content"})

ds_cleo["liquid_water_content"] = ds_cleo["liquid_water_content"] * 1e3
ds_cleo["liquid_water_content"].attrs = attrs


ids = set(valid_cloud_ids).intersection(
    set(ds_observations["cloud_id"].data).intersection(set(ds_cleo["cloud_id"].data))
)
ids = sorted(ids)
ds_observations, ds_cleo, ds_cleo_sem = (
    ds_observations.sel(cloud_id=ids),
    ds_cleo.sel(cloud_id=ids),
    ds_cleo_sem.sel(cloud_id=ids),
)

null_microphysics


In [None]:
x = ds_observations["liquid_water_content_50um"]
xerr = ds_observations["liquid_water_content_50um_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

y.attrs.update(long_name="Model " + y.attrs["long_name"])

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content_50um"]
xerr = ds_observations["liquid_water_content_50um_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

y.attrs.update(long_name="Model " + y.attrs["long_name"])

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)

plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
y.attrs.update(long_name="Model " + y.attrs["long_name"])
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_observations["liquid_water_content_50um"]
yerr = ds_observations["liquid_water_content_50um_sem"]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["number_concentration_50um"]
xerr = ds_observations["number_concentration_50um_sem"]
attrs = ds_cleo["xi_temporal_mean"].attrs.copy()
y = (
    (ds_cleo["xi_temporal_mean"] / ds_cleo["gridbox_volume"])
    .sel(gridbox=ds_cleo["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")
y.attrs.update(units="m^{-3}", long_name="Number concentration")
yerr = (
    (ds_cleo_sem["xi_temporal_mean"] / ds_cleo_sem["gridbox_volume"])
    .sel(gridbox=ds_cleo_sem["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")

corr = xr.corr(x + 1e-28, y + 1e-28, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xlim(0, 2e4)
ax.set_ylim(0, 2e4)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["number_concentration_50um"]
xerr = ds_observations["number_concentration_50um_sem"]
attrs = ds_cleo["xi_temporal_mean"].attrs.copy()
y = (
    (ds_cleo["xi_temporal_mean"] / ds_cleo["gridbox_volume"])
    .sel(gridbox=ds_cleo["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")
y.attrs.update(units="m^{-3}", long_name="Number concentration")
yerr = (
    (ds_cleo_sem["xi_temporal_mean"] / ds_cleo_sem["gridbox_volume"])
    .sel(gridbox=ds_cleo_sem["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")

corr = xr.corr(x + 1e-28, y + 1e-28, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(1e-1, 2e4)
ax.set_ylim(1e-1, 2e4)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
ds_parameters = xr.open_dataset(input_data_dir / "particle_size_distribution_parameters.nc")
ds_parameters = ds_parameters.sel(cloud_id=ids)

ds_parameters_linear = xr.open_dataset(
    input_data_dir / "particle_size_distribution_parameters_linear_space.nc"
)
ds_parameters_linear = ds_parameters_linear.sel(cloud_id=ids)

In [None]:
radius = np.geomspace(50e-6, 3e-3, 100)

# radius = np.array([0, 1, 2, 4, 6, 8, 10, 15, 20, 25])

# radius = ds_cleo['radius_bins'].values * 1e-6

t_test = xr.DataArray(
    radius,
    dims="radius",
    coords={"radius": radius},
)
t_test = t_test.expand_dims(cloud_id=ds_cleo["cloud_id"])
w_test = (t_test["radius"].shift(radius=-1) - t_test["radius"].shift(radius=1)) / 2
w_test = w_test.interpolate_na("radius", method="linear", fill_value="extrapolate")

# noise = 0.1
# t_test = t_test + noise * np.random.randn(*t_test.shape) * t_test

In [None]:
# fit the double log-normal distribution
ds_fitted: xr.DataArray = smodels.double_ln_normal_distribution(
    t=t_test,
    mu1=ds_parameters["mu1"],
    mu2=ds_parameters["mu2"],
    sigma1=ds_parameters["sigma1"],
    sigma2=ds_parameters["sigma2"],
    scale_factor1=ds_parameters["scale_factor1"],
    scale_factor2=ds_parameters["scale_factor2"],
)

fitted_psd = ds_fitted * w_test
fitted_msd = msd_from_psd_dataarray(ds_fitted * w_test)

In [None]:
# fit the double log-normal distribution
ds_fitted_linear: xr.DataArray = smodels.double_log_normal_distribution_all(
    x=t_test,
    mu1=ds_parameters_linear["geometric_mean1"],
    mu2=ds_parameters_linear["geometric_mean2"],
    sigma1=ds_parameters_linear["geometric_std_dev1"],
    sigma2=ds_parameters_linear["geometric_std_dev2"],
    scale1=ds_parameters_linear["scale_factor1"],
    scale2=ds_parameters_linear["scale_factor2"],
    parameter_space="geometric",
    x_space="linear",
)

fitted_linear_psd = ds_fitted_linear * w_test
fitted_linear_msd = msd_from_psd_dataarray(ds_fitted_linear * w_test)

In [None]:
ds_potential_temperature_parameters = xr.open_dataset(
    input_data_dir / "potential_temperature_parameters.nc"
)
ds_relative_humidity_parameters = xr.open_dataset(input_data_dir / "relative_humidity_parameters.nc")

In [None]:
da_potential_temperature = smodels.split_linear_func(
    x=dropsonde["altitude"],
    f_0=ds_potential_temperature_parameters["f_0"],
    slope_1=ds_potential_temperature_parameters["slope_1"],
    slope_2=ds_potential_temperature_parameters["slope_2"],
    x_split=ds_potential_temperature_parameters["x_split"],
)
da_potential_temperature = da_potential_temperature.sel(altitude=slice(0, 1200))

da_relative_humidity = smodels.split_linear_func(
    x=dropsonde["altitude"],
    f_0=ds_relative_humidity_parameters["f_0"],
    slope_1=ds_relative_humidity_parameters["slope_1"],
    slope_2=ds_relative_humidity_parameters["slope_2"],
    x_split=ds_relative_humidity_parameters["x_split"],
)
da_relative_humidity = da_relative_humidity.sel(altitude=slice(0, 1200))

<!-- # Random cloud example -->

In [None]:
np.random.seed(72)  # cloud_id = 470
# np.random.seed(1)
# np.random.seed(2)
# np.random.seed(3)
cloud_id = np.random.choice(valid_cloud_ids, 1)[0]
# cloud_id = 556

fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(8, 3), width_ratios=(1, 0.5, 0.5))

ax_psd: plt.Axes = axs[0]
ax_pt: plt.Axes = axs[1]
ax_rh: plt.Axes = axs[2]

# PARTICLE SIZE DISTRIBUTION

ds_observed_cloud = match_clouds_and_cloudcomposite(
    ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
    ds_cloudcomposite=cloud_composite,
)
x_observed = ds_observed_cloud["radius"] * 1e6
y_observed = ds_observed_cloud["particle_size_distribution"]


ds_cleo_cloud = ds_cleo.sel(gridbox=ds_cleo["max_gridbox"])
ds_cleo_cloud = ds_cleo_cloud.sel(cloud_id=cloud_id)
x_cleo = ds_cleo_cloud["radius_bins"]

w_cleo = (x_cleo.shift(radius_bins=-1) - x_cleo.shift(radius_bins=1)) / 2
w_cleo = w_cleo.interpolate_na("radius_bins", method="linear", fill_value="extrapolate")
w_cleo = w_cleo * 1e-6  # convert µm to m

y_cleo = ds_cleo_cloud["xi_temporal_mean"] / w_cleo / ds_cleo_cloud["gridbox_volume"]
y_cleo = y_cleo.isel(microphysics=0)
y_cleo.attrs.update(
    units="m^{-3} m^{-1}",
    long_name="Number concentration",
)

ds_fitted_cloud = ds_fitted_linear.sel(cloud_id=cloud_id)
y_fitted = ds_fitted_cloud
x_fitted = ds_fitted_cloud["radius"] * 1e6
# ax_psd.set_title("Observed PSD")

# plot observed and fitted PSD
ax_psd.plot(
    x_observed,
    y_observed,
    linestyle="",
    marker=".",
    markersize=2,
    color="grey",
    alpha=0.3,
)
ax_psd.plot(
    x_observed,
    y_observed.mean("time"),
    linestyle="-",
    linewidth=1,
    marker="o",
    markersize=3,
    color="k",
    alpha=0.75,
)

ax_psd.plot(x_cleo, y_cleo.T, linestyle="--", color="red")

ax_psd.axvline(ds_cleo_cloud["cloud_mass_radius_mean"], color="k", linestyle=":")


ax_psd.set_xscale("log")

# ax_psd.set_yscale("log")
# ax_psd.set_ylim(1e-2, 1e12)

ax_psd.set_ylim(0, 20e6)
# ax_psd.set_xlim(0, 3000)
# ax_psd.set_ylim(0, 5e6)
ax_psd.set_xlabel(r"Radius $[\mu m]$")
ax_psd.set_ylabel(r"PSD $[m^{-3} m^{-1}]$")


# DROPSONDE

ds_dropsonde_cloud = match_clouds_and_dropsondes(
    ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
    ds_sonde=dropsonde,
    ds_distance=ds_distances,
    max_temporal_distance=np.timedelta64("3", "h"),
    max_spatial_distance=100,
)
ds_dropsonde_cloud = ds_dropsonde_cloud.sel(altitude=slice(0, 1200))


y_observed = ds_dropsonde_cloud["altitude"]
x_observed = ds_dropsonde_cloud["potential_temperature"].transpose(..., "time")

x_cleo = potential_temperature_from_temperature_pressure(
    air_temperature=ds_cleo["air_temperature"].sel(cloud_id=cloud_id),
    pressure=ds_cleo["pressure"].sel(cloud_id=cloud_id),
    pressure_reference=101300,
)
y_cleo = ds_cleo["gridbox_coord3"].sel(cloud_id=cloud_id)

x_fitted = da_potential_temperature.sel(cloud_id=cloud_id)
y_fitted = da_potential_temperature["altitude"]

ax_pt.plot(
    x_observed,
    y_observed,
    linestyle="-",
    color="grey",
    alpha=0.3,
)
ax_pt.plot(x_observed.mean("time"), y_observed, linestyle="-", color="k", alpha=0.75, lw=2)
# ax_pt.plot(
#     x_cleo.T,
#     y_cleo.T,
#     linestyle = "--",
#     color = 'red'
# );

ax_pt.plot(x_fitted.T, y_fitted.T, linestyle="--", color="red")

ax_pt.set_xlabel("Pot. Temp. $[K]$")
ax_pt.set_ylabel("Altitude $[m]$")

y_observed = ds_dropsonde_cloud["altitude"]
x_observed = ds_dropsonde_cloud["relative_humidity"].transpose(..., "time")

x_cleo = relative_humidity_from_tps(
    temperature=ds_cleo["air_temperature"].sel(cloud_id=cloud_id),
    pressure=ds_cleo["pressure"].sel(cloud_id=cloud_id),
    specific_humidity=ds_cleo["specific_mass_vapour"].sel(cloud_id=cloud_id),
)
y_cleo = ds_cleo["gridbox_coord3"].sel(cloud_id=cloud_id)

x_fitted = da_relative_humidity.sel(cloud_id=cloud_id)
y_fitted = da_relative_humidity["altitude"]

ax_rh.plot(
    x_observed,
    y_observed,
    linestyle="-",
    color="grey",
    alpha=0.3,
)
ax_rh.plot(x_observed.mean("time"), y_observed, linestyle="-", color="k", alpha=0.75, lw=2)
# ax_rh.plot(
#     x_cleo.T,
#     y_cleo.T,
#     linestyle = "--",
#     color = 'red'
# );
ax_rh.plot(x_fitted.T, y_fitted.T, linestyle="--", color="red")

ax_rh.set_xlabel(r"Rel. Hum. $[\%]$")
yticks = ax_pt.get_yticks()
ax_rh.set_yticks(yticks, np.full_like(yticks, "", dtype=str))
# ax_rh.set_ylabel(label_from_attrs(y_observed))

for _ax in [ax_pt, ax_rh]:
    _ax.axhline(
        ds_relative_humidity_parameters["x_split"].sel(cloud_id=cloud_id), color="red", linestyle=":"
    )

    _ax.axhline(ds_observed_cloud["altitude"].mean("time"), color="k", linestyle=":")
    _ax.set_ylim(0, 1200)

# fig.tight_layout()
# ax_pt.set_ylim(0, None)

add_subplotlabel(axs=axs)

ax_psd.set_title(f"Measurements {len(ds_observed_cloud['time'].data)}")
ax_pt.set_title(f"Measurements {len(ds_dropsonde_cloud['time'].data)}")
ax_rh.set_title(f"Measurements {len(ds_dropsonde_cloud['time'].data)}")

fig.tight_layout()
save_figure(fig, fig_dir / f"observations-model-psd-pt-rh-cloud_{cloud_id}")