In [None]:
import numpy as np
import xarray as xr
from pathlib import Path

from sdm_eurec4a.visulization import set_custom_rcParams, label_from_attrs
from sdm_eurec4a.reductions import mean_and_stderror_of_mean

set_custom_rcParams()

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

from sdm_eurec4a import RepositoryPath

repo_dir = RepositoryPath("levante").repo_dir

In [None]:
def plot_one_one(ax, **kwargs):
    # get current axis limits
    lims = [
        np.min([ax.get_xlim(), ax.get_ylim()]),  # min of both axes
        np.max([ax.get_xlim(), ax.get_ylim()]),  # max of both axes
    ]
    ax.plot(lims, lims, **kwargs)

In [None]:
# microphysics = 'null_microphysics'
# microphysics = 'condensation'
# microphysics = 'collision_condensation'
# microphysics = 'coalbure_condensation_small'
microphysics = "coalbure_condensation_large"
fig_dir = repo_dir / Path(f"results/notebooks/paper/{microphysics}")
fig_dir.mkdir(exist_ok=True)
microphysics_title = dict(
    null_microphysics="Null Microphysics",
    condensation="Condensation",
    collision_condensation="Collision and Condensation",
    coalbure_condensation_small="Coalbure and Condensation (few particles)",
    coalbure_condensation_large="Coalbure and Condensation (many particles)",
)

In [None]:
data_dir = Path(
    f"/home/m/m301096/CLEO/data/output_v4.0/{microphysics}/combined/eulerian_dataset_combined.nc"
)
full_ds = xr.open_dataset(data_dir)
full_ds["radius_bins"].attrs.update(long_name="Radius", units="µm")
ds = full_ds.sel(time=slice(2000, 3500))
ds["radius_bins"] = ds["radius_bins"].where(ds["radius_bins"] > 0, 0)

ds["mass_represented"] = ds["mass_represented"].fillna(0)
ds["mass_represented"].attrs.update(long_name="Mass represented", units="kg m-3")

### Mass profile

In [None]:
mass_profile = 1e3 * (ds["mass_represented"]).mean(dim="time", keep_attrs=True, skipna=True).compute()
mass_profile.attrs.update(long_name="Mass", units="g m^{-3}")

total_mass_profile = (
    1e3
    * (ds["mass_represented"])
    .sum(dim="radius_bins", keep_attrs=True, skipna=True)
    .mean(dim="time", keep_attrs=True, skipna=True)
    .compute()
)
total_mass_profile.attrs.update(long_name="Total mass per gridbox", units="g m^{-3}")

In [None]:
sorted_cloud_ids = mass_profile["cloud_id"].sortby(mass_profile.sel(gridbox=0).sum("radius_bins"))

In [None]:
selected_clouds = [409, 95, 384, 385]

fig, axs = plt.subplots(
    ncols=len(selected_clouds), figsize=(4 * len(selected_clouds), 5), sharex=True, sharey=True
)

for i, cloud_id in enumerate(selected_clouds):

    max_gridbox_cloud = ds["max_gridbox"].sel(cloud_id=cloud_id)
    mass_profile_cloud = mass_profile.sel(cloud_id=cloud_id).sel(gridbox=slice(0, max_gridbox_cloud))
    ds_selected_cloud = ds.sel(cloud_id=cloud_id).sel(gridbox=slice(0, max_gridbox_cloud))

    mappable = axs[i].pcolormesh(
        ds_selected_cloud["radius_bins"],
        ds_selected_cloud["gridbox_coord3"],
        mass_profile_cloud / ds_selected_cloud["gridbox_volume"],
        shading="auto",
        # norm = mcolors.LogNorm(),
        cmap="Blues",
    )
    fig.colorbar(
        mappable=mappable,
        alpha=1,
        ax=axs[i],
        label=label_from_attrs(mass_profile),
        orientation="horizontal",
    )
    _ax_twiny = axs[i].twiny()
    _ax_twiny.plot(
        total_mass_profile.sel(cloud_id=cloud_id) / ds_selected_cloud["gridbox_volume"],
        ds_selected_cloud["gridbox_coord3"],
        color="k",
        lw=3,
    )
    _ax_twiny.tick_params(axis="x", labelrotation=45)
    _ax_twiny.set_xlabel(label_from_attrs(total_mass_profile) + "\n")
    _ax_twiny.set_title(f"Cloud ID: {cloud_id}")


for _ax in axs.ravel():
    _ax.set_xscale("log")
    _ax.set_xlim(1e0, 3e3)
    # _ax.set_xlabel(label_from_attrs(ds['radius_bins']))

fig.suptitle(f"Mass distribution of selected clouds\n{microphysics_title[microphysics]}")
fig.supylabel(label_from_attrs(ds["gridbox_coord3"]))
fig.supxlabel(label_from_attrs(ds["radius_bins"]))
fig.tight_layout()
fig.savefig(f"{fig_dir}/mass_distribution_selected_clouds_{microphysics}.png", dpi=300)

### Mass difference profiles

In [None]:
mass_diff_profile = (
    1e6 * (ds["mass_difference_per_volume"]).mean(dim="time", keep_attrs=True, skipna=True).compute()
)
mass_diff_profile.attrs.update(long_name="Mass change", units="mg m^{-3} s^{-1}")

total_mass_diff_profile = (
    1e6
    * (ds["mass_difference_per_volume"])
    .sum(dim="radius_bins", keep_attrs=True, skipna=True)
    .mean(dim="time", keep_attrs=True, skipna=True)
    .compute()
)
total_mass_diff_profile.attrs.update(long_name="Total mass change per gridbox", units="mg m^{-3} s^{-1}")

In [None]:
sorted_cloud_ids = mass_profile["cloud_id"].sortby(mass_profile.sel(gridbox=0).sum("radius_bins"))

In [None]:
selected_clouds = [409, 95, 384, 385]

fig, axs = plt.subplots(
    ncols=len(selected_clouds), figsize=(4 * len(selected_clouds), 5), sharex=True, sharey=True
)

for i, cloud_id in enumerate(selected_clouds):

    max_gridbox_cloud = ds["max_gridbox"].sel(cloud_id=cloud_id)
    mass_diff_profile_cloud = mass_diff_profile.sel(cloud_id=cloud_id).sel(
        gridbox=slice(0, max_gridbox_cloud)
    )
    ds_selected_cloud = ds.sel(cloud_id=cloud_id).sel(gridbox=slice(0, max_gridbox_cloud))

    mappable = axs[i].pcolormesh(
        ds_selected_cloud["radius_bins"],
        ds_selected_cloud["gridbox_coord3"],
        mass_diff_profile_cloud,
        shading="auto",
        norm=mcolors.CenteredNorm(vcenter=0),
        cmap="PuOr",
    )
    fig.colorbar(
        mappable=mappable,
        alpha=1,
        ax=axs[i],
        label=label_from_attrs(mass_diff_profile),
        orientation="horizontal",
    )
    _ax_twiny = axs[i].twiny()
    _ax_twiny.plot(
        total_mass_diff_profile.sel(cloud_id=cloud_id).sel(gridbox=slice(0, max_gridbox_cloud)),
        ds_selected_cloud["gridbox_coord3"],
        color="k",
        lw=3,
    )
    _ax_twiny.tick_params(axis="x", labelrotation=45)
    _ax_twiny.set_xlabel(label_from_attrs(total_mass_diff_profile) + "\n")
    _ax_twiny.set_title(f"Cloud ID: {cloud_id}")


for _ax in axs.ravel():
    _ax.set_xscale("log")
    _ax.set_xlim(1e0, 3e3)
    # _ax.set_xlabel(label_from_attrs(ds['radius_bins']))

fig.suptitle(f"Mass distribution of selected clouds\n{microphysics_title[microphysics]}")
fig.supylabel(label_from_attrs(ds["gridbox_coord3"]))
fig.supxlabel(label_from_attrs(ds["radius_bins"]))
fig.tight_layout()
fig.savefig(f"{fig_dir}/mass_diff_distribution_selected_clouds_{microphysics}.png", dpi=300)

### Profiles of all clouds

In [None]:
fig, axs = plt.subplots(nrows=2, figsize=(8, 6))

for _ax in axs:
    _ax.plot(
        (total_mass_profile / ds["gridbox_volume"]).T,
        ds["gridbox_coord3"].T,
    )
    _ax.set_ylabel("Height [m]")
    _ax.set_xlabel("Mass concentration [g/m³]")

axs[1].set_xscale("log")

fig.suptitle(f"LWC profiles\n{microphysics_title[microphysics]}")
fig.tight_layout()
fig.savefig(f"{fig_dir}/lwc_profiles.png")

In [None]:
init_mass = total_mass_profile.sel(gridbox=ds["max_gridbox"]) / ds["gridbox_volume"].sel(
    gridbox=ds["max_gridbox"]
)
end_mass = total_mass_profile.sel(gridbox=1) / ds["gridbox_volume"].sel(gridbox=1)
ef = 100 - 1e2 * (end_mass / init_mass)
ef.attrs.update(long_name="Evaporation fraction", units=r"\%")

fig, ax = plt.subplots(figsize=(8, 3))

ax.hist(
    ef,
    bins=np.arange(-75, 120, 5),
)
ax.axvline(0, color="k", lw=1)
ax.annotate(
    "LWC increase\n(unrealistic)",
    xy=(-50, 20),
)
ax.annotate(
    "LWC decrease\n(evaporation)",
    xy=(40, 20),
)
ax.set_xlabel(label_from_attrs(ef))
ax.set_ylabel("Count")

fig.suptitle(f"Evaporation fraction\n{microphysics_title[microphysics]}")
fig.tight_layout()
fig.savefig(f"{fig_dir}/evaporation_fraction.png")

In [None]:
ds["cloud_id"].sortby((init_mass - end_mass))

# PSD and MSD distributions

plot the cloud and subcloud layer PSD and MSD

In [None]:
psd_cleo = ds["xi"] / ds["gridbox_volume"]
psd_cleo.attrs.update(
    {
        "long_name": "Particle size distribution",
        "units": "m^{-3}",
        "standard_name": "particle_size_distribution",
    }
)
msd_cleo = 1e3 * (ds["mass_represented"] / ds["gridbox_volume"])
msd_cleo.attrs.update(
    {
        "long_name": "Mass size distribution",
        "units": "g m^{-3}",
        "standard_name": "mass_size_distribution",
    }
)

In [None]:
psd_cloud = psd_cleo.sel(gridbox=ds["max_gridbox"]).mean("time", keep_attrs=True)
msd_cloud = msd_cleo.sel(gridbox=ds["max_gridbox"]).mean("time", keep_attrs=True)

psd_sub_cloud = psd_cleo.sel(gridbox=ds["max_gridbox"] - 1).mean("time", keep_attrs=True)
msd_sub_cloud = msd_cleo.sel(gridbox=ds["max_gridbox"] - 1).mean("time", keep_attrs=True)

psd_surface = psd_cleo.sel(gridbox=1).mean("time", keep_attrs=True)
msd_surface = msd_cleo.sel(gridbox=1).mean("time", keep_attrs=True)

Cloud Layer

In [None]:
# fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

# # psd_m, psd_sem = mean_and_stderror_of_mean(psd, dims=('time',))

# axs[0].plot(
#     psd_cloud['radius_bins'],
#     psd_cloud.T,
#     alpha = 0.5,
# );
# axs[0].set_yscale('log')
# axs[0].set_ylabel(label_from_attrs(psd_cloud))

# axs[1].plot(
#     msd_cloud['radius_bins'],
#     msd_cloud.T
# );
# axs[1].set_ylabel(label_from_attrs(msd_cloud))

# for _ax in axs:
#     _ax.set_xlabel(label_from_attrs(ds['radius_bins']))
#     _ax.set_xscale('log')

# fig.tight_layout()

Sub Cloud layer

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

# psd_m, psd_sem = mean_and_stderror_of_mean(psd, dims=('time',))

axs[0].plot(
    psd_sub_cloud["radius_bins"],
    psd_sub_cloud.T,
    alpha=0.5,
)
axs[0].set_yscale("log")
axs[0].set_ylabel(label_from_attrs(psd_sub_cloud))

axs[1].plot(msd_sub_cloud["radius_bins"], msd_sub_cloud.T)
axs[1].set_ylabel(label_from_attrs(msd_sub_cloud))

for _ax in axs:
    _ax.set_xlabel(label_from_attrs(ds["radius_bins"]))
    _ax.set_xscale("log")


fig.suptitle(f"Sub cloud PSD and MSD\n{microphysics_title[microphysics]}")
fig.tight_layout()

fig.savefig(f"{fig_dir}/psd_msd_subcloud.png")

Surface

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

# psd_m, psd_sem = mean_and_stderror_of_mean(psd, dims=('time',))

axs[0].plot(
    psd_surface["radius_bins"],
    psd_surface.T,
    alpha=0.5,
)
axs[0].set_yscale("log")
axs[0].set_ylabel(label_from_attrs(psd_surface))

axs[1].plot(msd_surface["radius_bins"], msd_surface.T)
axs[1].set_ylabel(label_from_attrs(msd_surface))

for _ax in axs:
    _ax.set_xlabel(label_from_attrs(ds["radius_bins"]))
    _ax.set_xscale("log")

fig.suptitle(f"Surface PSD and MSD\n{microphysics_title[microphysics]}")
fig.tight_layout()

fig.savefig(f"{fig_dir}/psd_msd_surface.png")

### Comparison of PSD and MSD at cloud base and surface

compare PSD and MSDs

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

# psd_m, psd_sem = mean_and_stderror_of_mean(psd, dims=('time',))

axs[0].plot(psd_cloud.T, psd_sub_cloud.T, alpha=0.5, marker=".", linestyle="None")
plot_one_one(axs[0], color="k")
axs[0].set_yscale("log")
axs[0].set_xscale("log")
axs[0].set_ylabel("Top sub cloud layer")
axs[0].set_xlabel("Cloud layer")
axs[0].set_title(label_from_attrs(psd_cloud))


axs[1].plot(msd_cloud.T, msd_sub_cloud.T, alpha=0.5, marker=".", linestyle="None")
plot_one_one(axs[1], color="k")
axs[1].set_yscale("log")
axs[1].set_xscale("log")
axs[1].set_ylabel("Top sub cloud layer")
axs[1].set_xlabel("Cloud layer")
axs[1].set_title(label_from_attrs(msd_cloud))

fig.suptitle(f"Particle size distribution comparison\n{microphysics_title[microphysics]}")
fig.tight_layout()

fig.savefig(f"{fig_dir}/psd_comparison_cloud_subcloud_{microphysics}.png", dpi=300)

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

# psd_m, psd_sem = mean_and_stderror_of_mean(psd, dims=('time',))

axs[0].plot(psd_cloud.T, psd_surface.T, alpha=0.5, marker=".", linestyle="None")
plot_one_one(axs[0], color="k")
axs[0].set_yscale("log")
axs[0].set_xscale("log")
axs[0].set_ylabel("Surface Gridbox")
axs[0].set_xlabel("Cloud layer")
axs[0].set_title(label_from_attrs(psd_cloud))


axs[1].plot(msd_cloud.T, msd_surface.T, alpha=0.5, marker=".", linestyle="None")
plot_one_one(axs[1], color="k")
axs[1].set_yscale("log")
axs[1].set_xscale("log")
axs[1].set_ylabel("Surface Gridbox")
axs[1].set_xlabel("Cloud layer")
axs[1].set_title(label_from_attrs(msd_cloud))

fig.suptitle(f"Particle size distribution comparison\n{microphysics_title[microphysics]}")
fig.tight_layout()
fig.savefig(f"{fig_dir}/psd_comparison_cloud_surface_{microphysics}.png", dpi=300)

### Load observarion dataset

In [None]:
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite
from sdm_eurec4a import RepositoryPath
from tqdm import tqdm

RP = RepositoryPath("levante")
repo_dir = RP.repo_dir
data_dir = RP.data_dir

ds_clouds = xr.open_dataset(
    repo_dir
    / "data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)
ds_clouds = ds_clouds.swap_dims({"time": "cloud_id"})
ds_cloud_composite = xr.open_dataset(
    repo_dir / "data/observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"
)

In [None]:
lwc_obs_mean_list = []
lwc_obs_sem_list = []
nbc_obs_mean_list = []
nbc_obs_sem_list = []
psd_obs_mean_list = []
psd_obs_sem_list = []

for cloud_id in tqdm(ds["cloud_id"]):
    ds_select = match_clouds_and_cloudcomposite(
        ds_clouds=ds_clouds.sel(cloud_id=cloud_id),
        ds_cloudcomposite=ds_cloud_composite.sel(radius=slice(50e-6, 3e-3)),
    )
    m, s = mean_and_stderror_of_mean(
        (ds_select["mass_size_distribution"] * ds_select["bin_width"]).sum("radius"), dims=("time",)
    )
    m, s = m.expand_dims(dim=dict(cloud_id=[cloud_id])), s.expand_dims(dim=dict(cloud_id=[cloud_id]))
    lwc_obs_mean_list.append(m)
    lwc_obs_sem_list.append(s)

    nbc = ds_select["particle_size_distribution_non_normalized"].sum("radius", keep_attrs=True)
    m, s = mean_and_stderror_of_mean(nbc, dims=("time",))
    m, s = m.expand_dims(dim=dict(cloud_id=[cloud_id])), s.expand_dims(dim=dict(cloud_id=[cloud_id]))

    nbc_obs_mean_list.append(m)
    nbc_obs_sem_list.append(s)

    psd = ds_select["particle_size_distribution_non_normalized"]
    m, s = mean_and_stderror_of_mean(psd, dims=("time",))
    m, s = m.expand_dims(dim=dict(cloud_id=[cloud_id])), s.expand_dims(dim=dict(cloud_id=[cloud_id]))

    psd_obs_mean_list.append(m)
    psd_obs_sem_list.append(s)


lwc_obs_mean = xr.concat(lwc_obs_mean_list, dim="cloud_id")
lwc_obs_sem = xr.concat(lwc_obs_sem_list, dim="cloud_id")
nbc_obs_mean = xr.concat(nbc_obs_mean_list, dim="cloud_id")
nbc_obs_sem = xr.concat(nbc_obs_sem_list, dim="cloud_id")
psd_obs_mean = xr.concat(psd_obs_mean_list, dim="cloud_id")
psd_obs_sem = xr.concat(psd_obs_sem_list, dim="cloud_id")

  0%|          | 0/123 [00:00<?, ?it/s]
  """


In [None]:
lwc_cleo = msd_cleo.sel(gridbox=ds["max_gridbox"]).sum("radius_bins")
nbc_cleo = psd_cleo.sel(gridbox=ds["max_gridbox"]).sum("radius_bins")

### comparison of observations and CLEO LWC and NBC

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(8, 4))

axs[0].errorbar(
    x=nbc_obs_mean,
    xerr=2 * nbc_obs_sem,
    y=nbc_cleo.mean("time"),
    yerr=2 * nbc_cleo.std("time"),
    linestyle="",
    marker=".",
    color="b",
    label="Sub Cloud Layer",
)
axs[0].set_xlabel("Observations")
axs[0].set_ylabel("CLEO")
axs[0].set_title("Total number concentration in [m$^{-3}$]")


axs[1].errorbar(
    x=1e3 * lwc_obs_mean,
    xerr=1e3 * 2 * lwc_obs_sem,
    y=lwc_cleo.mean("time"),
    yerr=lwc_cleo.std("time"),
    linestyle="",
    marker=".",
    color="b",
    label="Sub Cloud Layer",
)
axs[1].set_xlabel("Observations")
axs[1].set_ylabel("CLEO")
axs[1].set_title("Liquid water content in [g m$^{-3}$]")
# axs[1].set_xlim(0, 1)
# axs[1].set_ylim(0, 1)
for _ax in axs.ravel():
    _ax.legend()

    plot_one_one(_ax, color="k", linestyle="--")

fig.suptitle(f"Observation vs. CLEO comparison\n{microphysics_title[microphysics]}")

fig.tight_layout()

fig.savefig(f"{fig_dir}/obs_vs_cleo_comparison_{microphysics}.png", dpi=300)