In [None]:
from ruamel.yaml import YAML
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    set_paper_rcParams,
    label_from_attrs,
    adjust_lightness_array,
    plot_one_one,
    handler_map_alpha,
    save_figure,
    add_subplotlabel,
)
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a import data_loading
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.conversions import (
    msd_from_psd_dataarray,
    potential_temperature_from_temperature_pressure,
    relative_humidity_from_tps,
    temperature_from_potential_temperature_pressure,
)
from sdm_eurec4a.input_processing import models as smodels
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite, match_clouds_and_dropsondes
from sdm_eurec4a.constants import TimeSlices


default_colors = set_paper_rcParams()
default_dark_colors = adjust_lightness_array(default_colors, 0.75)

RepoPaths = RepositoryPath("levante")

OBS_data_dir = RepoPaths.data_dir
input_data_dir = OBS_data_dir / Path("model/input_v4.2")

CLEO_data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.4-CLEO_v0.39.7-input_v4.2")
CLEO_data_dir_v43 = RepoPaths.CLEO_data_dir / Path("output_v4.3-CLEO_v0.39.7-input_v4.2")
CLEO_data_dir_v42 = RepoPaths.CLEO_data_dir / Path("output_v4.2")
CLEO_data_dir_v41 = RepoPaths.CLEO_data_dir / Path("output_v4.1")
CLEO_data_dir_v40 = RepoPaths.CLEO_data_dir / Path("output_v4.0")


master_fig_dir = RepoPaths.fig_dir / "paper-v4.4"
master_fig_dir.mkdir(exist_ok=True, parents=False)
fig_dir = RepoPaths.fig_dir / Path("paper-v4.4/observations")
fig_dir.mkdir(exist_ok=True, parents=False)
appendix_fig_dir = fig_dir / Path("appendix")
appendix_fig_dir.mkdir(exist_ok=True, parents=False)

<!-- ## Data loading -->

In [None]:
cloud_composite = xr.open_dataset(
    OBS_data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"),
)
dropsonde = xr.open_dataset(
    OBS_data_dir / Path("observation/dropsonde/processed/drop_sondes.nc"),
)
ds_distances = xr.open_dataset(
    OBS_data_dir
    / Path("observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"),
)

identified_clusters = xr.open_dataset(
    OBS_data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

# attrs = cloud_composite["radius"].attrs.copy()
# attrs.update({"units": "µm"})
# cloud_composite["radius"] = cloud_composite["radius"]
# cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
# cloud_composite["radius"].attrs = attrs

cloud_composite["radius2D"] = cloud_composite["radius"].expand_dims(time=cloud_composite["time"])
cloud_composite = cloud_composite.transpose("radius", ...)


# convert lwc and MSD to g m-3
# attrs = cloud_composite["liquid_water_content"].attrs.copy()
# attrs.update({"unit": "g m^{-3}"})
# cloud_composite['liquid_water_content'] = cloud_composite['liquid_water_content']
# cloud_composite['liquid_water_content'].attrs = attrs

attrs = cloud_composite["mass_size_distribution"].attrs.copy()
attrs.update({"unit": "g m^{-3} m^{-1}"})
cloud_composite["mass_size_distribution"] = cloud_composite["mass_size_distribution"] * 1e3
cloud_composite["mass_size_distribution"].attrs = attrs

cloud_composite = cloud_composite.sel(radius=slice(10e-6, None))

identified_clusters = identified_clusters.where(
    (
        (identified_clusters.duration.dt.seconds >= 3)
        & (identified_clusters.altitude < 1200)
        & (identified_clusters.altitude > 500)
    ),
    drop=True,
)
# identified_clusters = identified_clusters.isel(cloud_id=slice(0, 20))

In [None]:
cleo_dataset = data_loading.CleoDataset(
    data_dir=CLEO_data_dir,
    microphysics=("null_microphysics",),
)
# get physicsal height cleo output data
ds_cleo, ds_cleo_sem = cleo_dataset()


# convert liquid water content to g m-3
attrs = ds_cleo["liquid_water_content"].attrs.copy()
attrs.update({"units": "g m^{-3}"})
attrs.update({"long_name": "Rain Water Content"})

ds_cleo["liquid_water_content"] = ds_cleo["liquid_water_content"] * 1e3
ds_cleo["liquid_water_content"].attrs = attrs

null_microphysics


In [None]:
# load valid cloud ids
yaml = YAML(typ="safe")  # default, if not specfied, is 'rt' (round-trip)
d = yaml.load(CLEO_data_dir / Path("valid_cloud_ids.yaml"))
valid_cloud_ids = d["valid_cloud_ids"]


identified_clusters = identified_clusters.sel(cloud_id=valid_cloud_ids)
ds_cleo = ds_cleo.sel(cloud_id=valid_cloud_ids)

# Obtain Fit Parameters from files

In [None]:
ds_parameters = xr.open_dataset(input_data_dir / "particle_size_distribution_parameters.nc")
ds_parameters_linear = xr.open_dataset(
    input_data_dir / "particle_size_distribution_parameters_linear_space.nc"
)

In [None]:
radius = np.geomspace(50e-6, 3e-3, 100)

# radius = np.array([0, 1, 2, 4, 6, 8, 10, 15, 20, 25])

# radius = ds_cleo['radius_bins'].values * 1e-6

t_test = xr.DataArray(
    radius,
    dims="radius",
    coords={"radius": radius},
)
t_test = t_test.expand_dims(cloud_id=ds_cleo["cloud_id"])
w_test = (t_test["radius"].shift(radius=-1) - t_test["radius"].shift(radius=1)) / 2
w_test = w_test.interpolate_na("radius", method="linear", fill_value="extrapolate")

# noise = 0.1
# t_test = t_test + noise * np.random.randn(*t_test.shape) * t_test

In [None]:
# fit the double log-normal distribution
ds_fitted: xr.DataArray = smodels.double_ln_normal_distribution(
    t=t_test,
    mu1=ds_parameters["mu1"],
    mu2=ds_parameters["mu2"],
    sigma1=ds_parameters["sigma1"],
    sigma2=ds_parameters["sigma2"],
    scale_factor1=ds_parameters["scale_factor1"],
    scale_factor2=ds_parameters["scale_factor2"],
)

fitted_psd = ds_fitted * w_test
fitted_msd = msd_from_psd_dataarray(ds_fitted * w_test)

In [None]:
# fit the double log-normal distribution
ds_fitted_linear: xr.DataArray = smodels.double_log_normal_distribution_all(
    x=t_test,
    mu1=ds_parameters_linear["geometric_mean1"],
    mu2=ds_parameters_linear["geometric_mean2"],
    sigma1=ds_parameters_linear["geometric_std_dev1"],
    sigma2=ds_parameters_linear["geometric_std_dev2"],
    scale1=ds_parameters_linear["scale_factor1"],
    scale2=ds_parameters_linear["scale_factor2"],
    parameter_space="geometric",
    x_space="linear",
)

fitted_linear_psd = ds_fitted_linear * w_test
fitted_linear_msd = msd_from_psd_dataarray(ds_fitted_linear * w_test)

In [None]:
ds_potential_temperature_parameters = xr.open_dataset(
    input_data_dir / "potential_temperature_parameters.nc"
)
ds_pressure_parameters = xr.open_dataset(input_data_dir / "pressure_parameters.nc")
ds_relative_humidity_parameters = xr.open_dataset(input_data_dir / "relative_humidity_parameters.nc")

In [None]:
da_potential_temperature: xr.DataArray = smodels.split_linear_func(
    x=dropsonde["altitude"],
    f_0=ds_potential_temperature_parameters["f_0"],
    slope_1=ds_potential_temperature_parameters["slope_1"],
    slope_2=ds_potential_temperature_parameters["slope_2"],
    x_split=ds_potential_temperature_parameters["x_split"],
)
da_potential_temperature = da_potential_temperature.sel(altitude=slice(0, 1200))

da_relative_humidity: xr.DataArray = smodels.split_linear_func(
    x=dropsonde["altitude"],
    f_0=ds_relative_humidity_parameters["f_0"],
    slope_1=ds_relative_humidity_parameters["slope_1"],
    slope_2=ds_relative_humidity_parameters["slope_2"],
    x_split=ds_relative_humidity_parameters["x_split"],
)
da_relative_humidity = da_relative_humidity.sel(altitude=slice(0, 1200))

da_pressure: xr.DataArray = smodels.linear_func(
    x=dropsonde["altitude"],
    slope=ds_pressure_parameters["slope"],
    f_0=ds_pressure_parameters["f_0"],
)
da_pressure = da_pressure.sel(altitude=slice(0, 1200))

<!-- # Random cloud example -->

In [None]:
def plot_4_thermo(cloud_id, pressure_reference):
    fig, axs = plt.subplots(nrows=1, ncols=4, width_ratios=(1, 0.5, 0.5, 0.5), figsize=(12, 4))

    ax_psd: plt.Axes = axs[0]
    ax_at: plt.Axes = axs[1]
    ax_pt: plt.Axes = axs[2]
    ax_rh: plt.Axes = axs[3]

    # PARTICLE SIZE DISTRIBUTION

    ds_observed_cloud = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=cloud_composite,
    )

    ds_dropsonde_cloud = match_clouds_and_dropsondes(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_sonde=dropsonde,
        ds_distance=ds_distances,
        max_temporal_distance=np.timedelta64("3", "h"),
        max_spatial_distance=100,
    )
    ds_dropsonde_cloud = ds_dropsonde_cloud.sel(altitude=slice(0, 1200))

    x_observed = ds_observed_cloud["radius"] * 1e6
    y_observed = ds_observed_cloud["particle_size_distribution"]

    ds_cleo_cloud = ds_cleo.sel(gridbox=ds_cleo["max_gridbox"])
    ds_cleo_cloud = ds_cleo_cloud.sel(cloud_id=cloud_id)
    x_cleo = ds_cleo_cloud["radius_bins"]

    w_cleo = (x_cleo.shift(radius_bins=-1) - x_cleo.shift(radius_bins=1)) / 2
    w_cleo = w_cleo.interpolate_na("radius_bins", method="linear", fill_value="extrapolate")
    w_cleo = w_cleo * 1e-6  # convert µm to m

    y_cleo = ds_cleo_cloud["xi_temporal_mean"] / w_cleo / ds_cleo_cloud["gridbox_volume"]
    y_cleo = y_cleo.isel(microphysics=0)
    y_cleo.attrs.update(
        units="m^{-3} m^{-1}",
        long_name="Number concentration",
    )

    ds_fitted_cloud = ds_fitted_linear.sel(cloud_id=cloud_id)
    y_fitted = ds_fitted_cloud
    x_fitted = ds_fitted_cloud["radius"] * 1e6
    # ax_psd.set_title("Observed PSD")

    # plot observed and fitted PSD
    ax_psd.plot(
        x_observed,
        y_observed,
        linestyle="",
        marker=".",
        markersize=2,
        color=default_colors[2],
        alpha=0.3,
    )
    ax_psd.plot(
        x_observed,
        y_observed.mean("time"),
        linestyle="",
        linewidth=1,
        marker="o",
        markersize=3,
        color=default_dark_colors[2],
        alpha=0.75,
        label="Mean. Obs.",
    )

    ax_psd.plot(
        x_cleo,
        y_cleo.T,
        linestyle="--",
        color=default_dark_colors[2],
        lw=2,
        label="CLEO",
    )

    ax_psd.plot(
        x_fitted,
        y_fitted.T,
        linestyle="-",
        color=default_dark_colors[2],
        lw=2,
        label="Fit",
    )

    ax_psd.set_xscale("log")
    ax_psd.set_yscale("symlog", linthresh=0.1, linscale=0.3)
    ax_psd.set_ylim(-0.05, 1e10)
    # ax_psd.set_ylim(0, 20e6)
    # ax_psd.set_xlim(0, 3000)
    # ax_psd.set_ylim(0, 5e6)
    ax_psd.set_xlabel(r"Radius $[\mu m]$")
    ax_psd.set_ylabel(r"PSD $[m^{-3} m^{-1}]$")

    # DROPSONDE

    # ----------------------
    # AIR TEMPERATURE
    # ----------------------

    y_observed = ds_dropsonde_cloud["altitude"]
    x_observed = ds_dropsonde_cloud["air_temperature"].transpose(..., "time")

    x_cleo = ds_cleo["air_temperature"].sel(cloud_id=cloud_id)
    y_cleo = ds_cleo["gridbox_coord3"].sel(cloud_id=cloud_id)

    x_fitted = temperature_from_potential_temperature_pressure(
        potential_temperature=da_potential_temperature.sel(cloud_id=cloud_id),
        pressure=da_pressure.sel(cloud_id=cloud_id),
        pressure_reference=pressure_reference,
    )
    y_fitted = da_pressure["altitude"]

    ax_at.plot(
        x_observed,
        y_observed,
        linestyle="-",
        color=default_colors[3],
        alpha=0.3,
    )
    ax_at.plot(
        x_observed.mean("time"),
        y_observed,
        linestyle="-",
        color=default_dark_colors[3],
        alpha=0.75,
        lw=1,
        label="Mean. Obs.",
    )
    ax_at.plot(
        x_cleo.T,
        y_cleo.T,
        linestyle="--",
        color=default_dark_colors[3],
        lw=2,
        label="CLEO",
    )

    ax_at.plot(
        x_fitted.T,
        y_fitted.T,
        linestyle="-",
        color=default_dark_colors[3],
        lw=2,
        label="Fit",
    )

    ax_at.set_xlabel("Air. Temp. $[K]$")
    ax_at.set_ylabel("Altitude $[m]$")

    # ----------------------
    # POTENTIAL TEMPERATURE
    # ----------------------

    y_observed = ds_dropsonde_cloud["altitude"]
    x_observed = ds_dropsonde_cloud["potential_temperature"].transpose(..., "time")

    x_cleo = potential_temperature_from_temperature_pressure(
        air_temperature=ds_cleo["air_temperature"].sel(cloud_id=cloud_id),
        pressure=ds_cleo["pressure"].sel(cloud_id=cloud_id),
        pressure_reference=pressure_reference,
    )
    y_cleo = ds_cleo["gridbox_coord3"].sel(cloud_id=cloud_id)

    x_fitted = da_potential_temperature.sel(cloud_id=cloud_id)
    y_fitted = da_pressure["altitude"]

    ax_pt.plot(
        x_observed,
        y_observed,
        linestyle="-",
        color=default_colors[0],
        alpha=0.3,
    )
    ax_pt.plot(
        x_observed.mean("time"),
        y_observed,
        linestyle="-",
        color=default_dark_colors[0],
        alpha=0.75,
        lw=1,
        label="Mean. Obs.",
    )
    ax_pt.plot(
        x_cleo.T,
        y_cleo.T,
        linestyle="--",
        color=default_dark_colors[0],
        lw=2,
        label="CLEO",
    )

    ax_pt.plot(
        x_fitted.T,
        y_fitted.T,
        linestyle="-",
        color=default_dark_colors[0],
        lw=2,
        label="Fit",
    )

    ax_pt.set_xlabel("Pot. Temp. $[K]$")
    ax_pt.set_ylabel("Altitude $[m]$")

    # ----------------------
    # RELATIVE HUMIDITY
    # ----------------------

    y_observed = ds_dropsonde_cloud["altitude"]
    x_observed = ds_dropsonde_cloud["relative_humidity"].transpose(..., "time")

    x_cleo = relative_humidity_from_tps(
        temperature=ds_cleo["air_temperature"].sel(cloud_id=cloud_id),
        pressure=ds_cleo["pressure"].sel(cloud_id=cloud_id),
        specific_humidity=ds_cleo["specific_mass_vapour"].sel(cloud_id=cloud_id),
    )
    y_cleo = ds_cleo["gridbox_coord3"].sel(cloud_id=cloud_id)

    x_fitted = da_relative_humidity.sel(cloud_id=cloud_id)
    y_fitted = da_relative_humidity["altitude"]

    ax_rh.plot(
        x_observed,
        y_observed,
        linestyle="-",
        color=default_colors[1],
        alpha=0.3,
    )
    ax_rh.plot(
        x_observed.mean("time"),
        y_observed,
        linestyle="-",
        color=default_dark_colors[1],
        alpha=0.75,
        lw=1,
        label="Mean. Obs.",
    )
    ax_rh.plot(
        x_cleo.T,
        y_cleo.T,
        linestyle="--",
        color=default_dark_colors[1],
        lw=2,
        label="CLEO",
    )
    ax_rh.plot(
        x_fitted.T,
        y_fitted.T,
        linestyle="-",
        color=default_dark_colors[1],
        lw=2,
        label="Fit",
    )

    ax_rh.set_xlabel(r"Rel. Hum. $[\%]$")
    yticks = ax_at.get_yticks()
    ax_pt.set_yticks(yticks, np.full_like(yticks, "", dtype=str))
    ax_rh.set_yticks(yticks, np.full_like(yticks, "", dtype=str))
    # ax_rh.set_ylabel(label_from_attrs(y_observed))
    ax_at.set_xticks(np.arange(290, 303, 2))
    ax_pt.set_xticks(np.arange(296, 302, 2))

    for _ax in [ax_at, ax_pt, ax_rh]:
        _ax.axhline(
            ds_relative_humidity_parameters["x_split"].sel(cloud_id=cloud_id), color="red", linestyle=":"
        )
        _ax.axhline(ds_observed_cloud["altitude"].mean("time"), color="k", linestyle=":")
        _ax.set_ylim(0, 1200)
        _ax.tick_params(axis="x", rotation=45)
        _ax.legend()
    ax_psd.legend(loc="lower left")

    ax_psd.set_title(f"Measurements {len(ds_observed_cloud['time'].data)}")
    ax_at.set_title(f"Measurements {len(ds_dropsonde_cloud['time'].data)}")
    ax_pt.set_title(f"Measurements {len(ds_dropsonde_cloud['time'].data)}")
    ax_rh.set_title(f"Measurements {len(ds_dropsonde_cloud['time'].data)}")

    # # remove all labels and spines
    # for _ax in axs:
    #     _ax.spines["top"].set_visible(False)
    #     _ax.spines["right"].set_visible(False)
    #     _ax.spines["left"].set_visible(False)
    #     _ax.spines["bottom"].set_visible(False)

    #     _ax.tick_params(
    #         axis="both",
    #         which="both",
    #         bottom=False,
    #         top=False,
    #         left=False,
    #         right=False,
    #         labelbottom=False,
    #         labelleft=False,
    #     )
    #     _ax.set_xlabel("")
    #     _ax.set_ylabel("")
    #     _ax.set_title("")

    # fig.tight_layout()
    # ax_pt.set_ylim(0, None)
    fig.suptitle(f"Cloud ID {cloud_id} | Pressure Ref. {pressure_reference/100:.2f} hPa")
    fig.tight_layout()
    return fig, axs

# Schematic

In [None]:
# cloud_id = 87
# cloud_id = np.random.choice(valid_cloud_ids, 1)[0]
cloud_id = 396
fig, axs = plot_4_thermo(cloud_id=cloud_id, pressure_reference=100000)
# axs[0].set_yscale("linear")
# axs[0].set_ylim(0, 1e8)


# remove all labels and spines
for _ax in axs:
    _ax.spines["top"].set_visible(False)
    _ax.spines["right"].set_visible(False)
    _ax.spines["left"].set_visible(False)
    _ax.spines["bottom"].set_visible(False)

    _ax.tick_params(
        axis="both",
        which="both",
        bottom=False,
        top=False,
        left=False,
        right=False,
        labelbottom=False,
        labelleft=False,
    )
    _ax.set_xlabel("")
    _ax.set_ylabel("")
    _ax.set_title("")
    _ax.legend().set_visible(False)


save_figure(fig, fig_dir / Path(f"SCHEMATIC-cloud_id_{cloud_id}_thermo"))

# Visulalise all clouds Fit, CLEO, Observations

In [None]:
cloud_id = 396
fig, axs = plot_4_thermo(cloud_id=cloud_id, pressure_reference=100000)

In [None]:
# for cloud_id in tqdm(ds_cleo['cloud_id'].values):
#     # 1000 hPa reference pressure
#     fig, axs = plot_4_thermo(cloud_id = cloud_id, pressure_reference = 100000)
#     save_figure(fig, appendix_fig_dir / f"{cloud_id}_observations-model-psd-at-rh-1000hPa")
#     plt.close(fig)

In [None]:
a = potential_temperature_from_temperature_pressure(
    air_temperature=dropsonde["air_temperature"],
    pressure=dropsonde["pressure"],
    pressure_reference=100000,
)
b = dropsonde["potential_temperature"]

plt.scatter(a, b)
plt.figure()
plt.scatter(a - b, a / b)

<matplotlib.collections.PathCollection at 0x7ffb49879490>