In [None]:
from ruamel.yaml import YAML
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    set_paper_rcParams,
    label_from_attrs,
    adjust_lightness_array,
    plot_one_one,
    handler_map_alpha,
    save_figure,
    add_subplotlabel,
)
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a import data_loading
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.conversions import (
    msd_from_psd_dataarray,
    potential_temperature_from_temperature_pressure,
    relative_humidity_from_tps,
    temperature_from_potential_temperature_pressure,
)
from sdm_eurec4a.input_processing import models as smodels
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite, match_clouds_and_dropsondes
from sdm_eurec4a.constants import TimeSlices


default_colors = set_paper_rcParams()
default_dark_colors = adjust_lightness_array(default_colors, 0.75)

RepoPaths = RepositoryPath("levante")

OBS_data_dir = RepoPaths.data_dir
input_data_dir = OBS_data_dir / Path("model/input_v4.2")

CLEO_data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.4-CLEO_v0.39.7-input_v4.2")
CLEO_data_dir_v43 = RepoPaths.CLEO_data_dir / Path("output_v4.3-CLEO_v0.39.7-input_v4.2")
CLEO_data_dir_v42 = RepoPaths.CLEO_data_dir / Path("output_v4.2")
CLEO_data_dir_v41 = RepoPaths.CLEO_data_dir / Path("output_v4.1")
CLEO_data_dir_v40 = RepoPaths.CLEO_data_dir / Path("output_v4.0")


master_fig_dir = RepoPaths.fig_dir / "paper-v4.4"
master_fig_dir.mkdir(exist_ok=True, parents=False)
fig_dir = RepoPaths.fig_dir / Path("paper-v4.4/observations")
fig_dir.mkdir(exist_ok=True, parents=False)
appendix_fig_dir = fig_dir / Path("appendix")
appendix_fig_dir.mkdir(exist_ok=True, parents=False)

<!-- ## Data loading -->

In [None]:
cloud_composite = xr.open_dataset(
    OBS_data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"),
)
dropsonde = xr.open_dataset(
    OBS_data_dir / Path("observation/dropsonde/processed/drop_sondes.nc"),
)
ds_distances = xr.open_dataset(
    OBS_data_dir
    / Path("observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"),
)

identified_clusters = xr.open_dataset(
    OBS_data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

# attrs = cloud_composite["radius"].attrs.copy()
# attrs.update({"units": "µm"})
# cloud_composite["radius"] = cloud_composite["radius"]
# cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
# cloud_composite["radius"].attrs = attrs

cloud_composite["radius2D"] = cloud_composite["radius"].expand_dims(time=cloud_composite["time"])
cloud_composite = cloud_composite.transpose("radius", ...)


# convert lwc and MSD to g m-3
# attrs = cloud_composite["liquid_water_content"].attrs.copy()
# attrs.update({"unit": "g m^{-3}"})
# cloud_composite['liquid_water_content'] = cloud_composite['liquid_water_content']
# cloud_composite['liquid_water_content'].attrs = attrs

attrs = cloud_composite["mass_size_distribution"].attrs.copy()
attrs.update({"unit": "g m^{-3} m^{-1}"})
cloud_composite["mass_size_distribution"] = cloud_composite["mass_size_distribution"] * 1e3
cloud_composite["mass_size_distribution"].attrs = attrs

cloud_composite = cloud_composite.sel(radius=slice(10e-6, None))

identified_clusters = identified_clusters.where(
    (
        (identified_clusters.duration.dt.seconds >= 3)
        & (identified_clusters.altitude < 1200)
        & (identified_clusters.altitude > 500)
    ),
    drop=True,
)
# identified_clusters = identified_clusters.isel(cloud_id=slice(0, 20))

In [None]:
cleo_dataset = data_loading.CleoDataset(
    data_dir=CLEO_data_dir,
    microphysics=("null_microphysics",),
)
# get physicsal height cleo output data
ds_cleo, ds_cleo_sem = cleo_dataset()


# convert liquid water content to g m-3
attrs = ds_cleo["liquid_water_content"].attrs.copy()
attrs.update({"units": "g m^{-3}"})
attrs.update({"long_name": "Rain Water Content"})

ds_cleo["liquid_water_content"] = ds_cleo["liquid_water_content"] * 1e3
ds_cleo["liquid_water_content"].attrs = attrs

null_microphysics


In [None]:
# load valid cloud ids
yaml = YAML(typ="safe")  # default, if not specfied, is 'rt' (round-trip)
d = yaml.load(CLEO_data_dir / Path("valid_cloud_ids.yaml"))
valid_cloud_ids = d["valid_cloud_ids"]


identified_clusters = identified_clusters.sel(cloud_id=valid_cloud_ids)
ds_cleo = ds_cleo.sel(cloud_id=valid_cloud_ids)

# Obtain observation RWC, LWC and NBC

In [None]:
list_lwc = []
list_lwc_sem = []
list_lwc_50um = []
list_lwc_50um_sem = []

list_nbc = []
list_nbc_sem = []
list_nbc_50um = []
list_nbc_50um_sem = []
i = 0
N = len(identified_clusters["cloud_id"])
for cloud_id in identified_clusters["cloud_id"]:
    # i += 1
    # print(f"Processing cloud {i}/{N}")
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=cloud_composite,
    )
    cc = cc.expand_dims(cloud_id=[int(cloud_id.values)])

    lwc = cc["liquid_water_content"]

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(lwc, dims=("time",))
    list_lwc.append(lwc_mean)
    list_lwc_sem.append(lwc_sem)

    lwc_above_50um = (
        (cc["mass_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    lwc_above_50um_mean, lwc_above_50um_sem = mean_and_stderror_of_mean(lwc_above_50um, dims=("time",))
    list_lwc_50um.append(lwc_above_50um_mean)
    list_lwc_50um_sem.append(lwc_above_50um_sem)

    nbc = cc["particle_size_distribution"] * cc["bin_width"]  # .sum("radius")

    nbc_mean, nbc_sem = mean_and_stderror_of_mean(nbc, dims=("time",))
    list_nbc.append(nbc_mean)
    list_nbc_sem.append(nbc_sem)

    nbc_above_50um = (
        (cc["particle_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    nbc_above_50um_mean, nbc_above_50um_sem = mean_and_stderror_of_mean(nbc_above_50um, dims=("time",))
    list_nbc_50um.append(nbc_above_50um_mean)
    list_nbc_50um_sem.append(nbc_above_50um_sem)


da_lwc = xr.concat(
    list_lwc,
    dim="cloud_id",
)
da_lwc.attrs = dict(
    long_name="Liquid water content",
    units="g m^{-3}",
)

da_lwc_sem = xr.concat(
    list_lwc_sem,
    dim="cloud_id",
)
da_lwc_sem.attrs = dict(
    long_name="Standard error of the mean of the liquid water content",
    units="g m^{-3}",
)

da_lwc_50um = xr.concat(
    list_lwc_50um,
    dim="cloud_id",
)
da_lwc_50um.attrs = dict(
    long_name="Rain Water Content",
    units="g m^{-3}",
)

da_lwc_50um_sem = xr.concat(
    list_lwc_50um_sem,
    dim="cloud_id",
)
da_lwc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the Rain Water Content",
    units="g m^{-3}",
)

da_nbc = xr.concat(
    list_nbc,
    dim="cloud_id",
)
da_nbc.attrs = dict(
    long_name="Number concentration",
    units="m^{-3}",
)

da_nbc_sem = xr.concat(
    list_nbc_sem,
    dim="cloud_id",
)
da_nbc_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration",
    units="m^{-3}",
)

da_nbc_50um = xr.concat(
    list_nbc_50um,
    dim="cloud_id",
)
da_nbc_50um.attrs = dict(
    long_name="Number concentration above 50 µm",
    units="m^{-3}",
)

da_nbc_50um_sem = xr.concat(
    list_nbc_50um_sem,
    dim="cloud_id",
)
da_nbc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration above 50 µm",
    units="m^{-3}",
)


ds_observations_backup = xr.Dataset(
    dict(
        liquid_water_content=da_lwc,
        liquid_water_content_sem=da_lwc_sem,
        liquid_water_content_50um=da_lwc_50um,
        liquid_water_content_50um_sem=da_lwc_50um_sem,
        number_concentration=da_nbc,
        number_concentration_sem=da_nbc_sem,
        number_concentration_50um=da_nbc_50um,
        number_concentration_50um_sem=da_nbc_50um_sem,
    )
)

In [None]:
ds_observations = ds_observations_backup
ds_observations = ds_observations.sel(cloud_id=valid_cloud_ids)

In [None]:
# ds_observations_backup["liquid_water_content"].plot()
# ds_observations_backup["liquid_water_content_50um"].plot()

# Scatter plots and correlations

In [None]:
x = ds_observations["liquid_water_content_50um"]
xerr = ds_observations["liquid_water_content_50um_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

y.attrs.update(long_name="Model " + y.attrs["long_name"])

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content_50um"]
xerr = ds_observations["liquid_water_content_50um_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

y.attrs.update(long_name="Model " + y.attrs["long_name"])

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)

plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_cleo["cloud_liquid_water_content"].sel(microphysics="null_microphysics")
y.attrs.update(long_name="Model " + y.attrs["long_name"])
yerr = ds_cleo_sem["cloud_liquid_water_content"].sel(microphysics="null_microphysics")

# indices = (x != 0) & (y != 0)

# x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_observations["liquid_water_content_50um"]
yerr = ds_observations["liquid_water_content_50um_sem"]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xscale("symlog", linthresh=1e-4)
ax.set_yscale("symlog", linthresh=1e-4)
ax.set_xlim(0, 5)
ax.set_ylim(0, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["number_concentration_50um"]
xerr = ds_observations["number_concentration_50um_sem"]
attrs = ds_cleo["xi_temporal_mean"].attrs.copy()
y = (
    (ds_cleo["xi_temporal_mean"] / ds_cleo["gridbox_volume"])
    .sel(gridbox=ds_cleo["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")
y.attrs.update(units="m^{-3}", long_name="Number concentration")
yerr = (
    (ds_cleo_sem["xi_temporal_mean"] / ds_cleo_sem["gridbox_volume"])
    .sel(gridbox=ds_cleo_sem["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")

corr = xr.corr(x + 1e-28, y + 1e-28, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.set_xlim(0, 2e4)
ax.set_ylim(0, 2e4)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["number_concentration_50um"]
xerr = ds_observations["number_concentration_50um_sem"]
attrs = ds_cleo["xi_temporal_mean"].attrs.copy()
y = (
    (ds_cleo["xi_temporal_mean"] / ds_cleo["gridbox_volume"])
    .sel(gridbox=ds_cleo["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")
y.attrs.update(units="m^{-3}", long_name="Number concentration")
yerr = (
    (ds_cleo_sem["xi_temporal_mean"] / ds_cleo_sem["gridbox_volume"])
    .sel(gridbox=ds_cleo_sem["max_gridbox"])
    .sum("radius_bins")
).sel(microphysics="null_microphysics")

corr = xr.corr(x + 1e-28, y + 1e-28, dim="cloud_id")
corr_loglog = xr.corr(np.log(x + 1e-28), np.log(y + 1e-28), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="k",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="k",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(1e-1, 2e4)
ax.set_ylim(1e-1, 2e4)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel(label_from_attrs(x, name_width=25))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f}\nCorrelation log-log: {corr_loglog.values:.2f}")
fig.tight_layout()

save_figure(fig, fig_dir / f"{x.name}-{y.name}")