In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    label_from_attrs,
    adjust_lightness_array,
    plot_one_one,
    handler_map_alpha,
    save_figure,
)
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.conversions import msd_from_psd_dataarray
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a.input_processing import models as smodels

default_colors = set_custom_rcParams()
default_dark_colors = adjust_lightness_array(default_colors, 0.75)

RepoPaths = RepositoryPath("levante")

fig_dir = RepoPaths.fig_dir / "paper-v4.1/observations"
fig_dir.mkdir(parents=False, exist_ok=True)

from sdm_eurec4a import slurm_cluster as scluster

  """
  """


In [None]:
client, cluster = scluster.init_dask_slurm_cluster(
    scale=1,
    processes=16,
    walltime="00:35:00",
    memory="16GB",
    scheduler_options={"dashboard_address": ":8686"},
)

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /scratch/m/m301096/dask_logs/dask-worker-%J.err
#SBATCH -o /scratch/m/m301096/dask_logs/dask-worker-%J.out
#SBATCH -p compute
#SBATCH -A um1487
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem=15G
#SBATCH -t 00:35:00

/work/um1487/m301096/conda/envs/sdm_pysd_python312/bin/python -m distributed.cli.dask_worker tcp://10.128.2.29:36117 --name dummy-name --nthreads 1 --memory-limit 0.93GiB --nworkers 16 --nanny --death-timeout 60 --local-directory /scratch/m/m301096/dask_temp --interface ib0



## Data loading

In [None]:
OBS_data_dir = RepoPaths.data_dir
input_data_dir = OBS_data_dir / Path("model/input_v4.1")
CLEO_data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.1")

fig_dir = RepoPaths.fig_dir / Path("paper-v4.1/observations")
fig_dir.mkdir(exist_ok=True, parents=False)

In [None]:
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite


cloud_composite = xr.open_dataset(
    OBS_data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"),
)
identified_clusters = xr.open_dataset(
    OBS_data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

# attrs = cloud_composite["radius"].attrs.copy()
# attrs.update({"units": "µm"})
# cloud_composite["radius"] = cloud_composite["radius"]
# cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
# cloud_composite["radius"].attrs = attrs

cloud_composite["radius2D"] = cloud_composite["radius"].expand_dims(time=cloud_composite["time"])
cloud_composite = cloud_composite.transpose("radius", ...)


# convert lwc and MSD to g m-3
# attrs = cloud_composite["liquid_water_content"].attrs.copy()
# attrs.update({"unit": "g m^{-3}"})
# cloud_composite['liquid_water_content'] = cloud_composite['liquid_water_content']
# cloud_composite['liquid_water_content'].attrs = attrs

attrs = cloud_composite["mass_size_distribution"].attrs.copy()
attrs.update({"unit": "g m^{-3} m^{-1}"})
cloud_composite["mass_size_distribution"] = cloud_composite["mass_size_distribution"] * 1e3
cloud_composite["mass_size_distribution"].attrs = attrs

cloud_composite = cloud_composite.sel(radius=slice(10e-6, None))

identified_clusters = identified_clusters.where(
    (
        (identified_clusters.duration.dt.seconds >= 3)
        & (identified_clusters.altitude < 1200)
        & (identified_clusters.altitude > 500)
    ),
    drop=True,
)
# identified_clusters = identified_clusters.isel(cloud_id=slice(0, 20))

In [None]:
list_lwc = []
list_lwc_sem = []
list_lwc_50um = []
list_lwc_50um_sem = []

list_nbc = []
list_nbc_sem = []
list_nbc_50um = []
list_nbc_50um_sem = []
i = 0
N = len(identified_clusters["cloud_id"])
for cloud_id in identified_clusters["cloud_id"]:
    # i += 1
    # print(f"Processing cloud {i}/{N}")
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=cloud_composite,
    )
    cc = cc.expand_dims(cloud_id=[int(cloud_id.values)])

    lwc = cc["liquid_water_content"]

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(lwc, dims=("time",))
    list_lwc.append(lwc_mean)
    list_lwc_sem.append(lwc_sem)

    lwc_above_50um = (
        (cc["mass_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    lwc_above_50um_mean, lwc_above_50um_sem = mean_and_stderror_of_mean(lwc_above_50um, dims=("time",))
    list_lwc_50um.append(lwc_above_50um_mean)
    list_lwc_50um_sem.append(lwc_above_50um_sem)

    nbc = cc["particle_size_distribution"] * cc["bin_width"]  # .sum("radius")

    nbc_mean, nbc_sem = mean_and_stderror_of_mean(nbc, dims=("time",))
    list_nbc.append(nbc_mean)
    list_nbc_sem.append(nbc_sem)

    nbc_above_50um = (
        (cc["particle_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    nbc_above_50um_mean, nbc_above_50um_sem = mean_and_stderror_of_mean(nbc_above_50um, dims=("time",))
    list_nbc_50um.append(nbc_above_50um_mean)
    list_nbc_50um_sem.append(nbc_above_50um_sem)


da_lwc = xr.concat(
    list_lwc,
    dim="cloud_id",
)
da_lwc.attrs = dict(
    long_name="Liquid water content",
    units="g m^{-3}",
)

da_lwc_sem = xr.concat(
    list_lwc_sem,
    dim="cloud_id",
)
da_lwc_sem.attrs = dict(
    long_name="Standard error of the mean of the liquid water content",
    units="g m^{-3}",
)

da_lwc_50um = xr.concat(
    list_lwc_50um,
    dim="cloud_id",
)
da_lwc_50um.attrs = dict(
    long_name="Rain Water Content",
    units="g m^{-3}",
)

da_lwc_50um_sem = xr.concat(
    list_lwc_50um_sem,
    dim="cloud_id",
)
da_lwc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the Rain Water Content",
    units="g m^{-3}",
)

da_nbc = xr.concat(
    list_nbc,
    dim="cloud_id",
)
da_nbc.attrs = dict(
    long_name="Number concentration",
    units="m^{-3}",
)

da_nbc_sem = xr.concat(
    list_nbc_sem,
    dim="cloud_id",
)
da_nbc_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration",
    units="m^{-3}",
)

da_nbc_50um = xr.concat(
    list_nbc_50um,
    dim="cloud_id",
)
da_nbc_50um.attrs = dict(
    long_name="Number concentration above 50 µm",
    units="m^{-3}",
)

da_nbc_50um_sem = xr.concat(
    list_nbc_50um_sem,
    dim="cloud_id",
)
da_nbc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration above 50 µm",
    units="m^{-3}",
)


ds_observations_backup = xr.Dataset(
    dict(
        liquid_water_content=da_lwc,
        liquid_water_content_sem=da_lwc_sem,
        liquid_water_content_50um=da_lwc_50um,
        liquid_water_content_50um_sem=da_lwc_50um_sem,
        number_concentration=da_nbc,
        number_concentration_sem=da_nbc_sem,
        number_concentration_50um=da_nbc_50um,
        number_concentration_50um_sem=da_nbc_50um_sem,
    )
)

In [None]:
ds_observations_backup["liquid_water_content"].plot()
ds_observations_backup["liquid_water_content_50um"].plot()

[<matplotlib.lines.Line2D at 0x7ffb1d990f50>]

In [None]:
ds_observations = ds_observations_backup
ds_cleo: xr.Dataset = xr.open_dataset(
    CLEO_data_dir / Path("null_microphysics/combined/eulerian_dataset_combined.nc")
)
ds_cleo: xr.Dataset = ds_cleo.sel(time=slice(1500, 3590))
# convert liquid water content to g m-3
attrs = ds_cleo["liquid_water_content"].attrs.copy()
attrs.update({"units": "g m^{-3}"})
attrs.update({"long_name": "Rain Water Content"})

ds_cleo["liquid_water_content"] = ds_cleo["liquid_water_content"] * 1e3
ds_cleo["liquid_water_content"].attrs = attrs


ids = np.intersect1d(ds_observations["cloud_id"].values, ds_cleo["cloud_id"].values)
ds_observations, ds_cleo = ds_observations.sel(cloud_id=ids), ds_cleo.sel(cloud_id=ids)

In [None]:
x = ds_observations["liquid_water_content_50um"]
xerr = ds_observations["liquid_water_content_50um_sem"]
y = ds_cleo["liquid_water_content"].sel(gridbox=ds_cleo["max_gridbox"]).mean("time", keep_attrs=True)
y.attrs.update({"units": "g m^{-3}"})
yerr = ds_cleo["liquid_water_content"].sel(gridbox=ds_cleo["max_gridbox"]).std("time", keep_attrs=True)

indices = (x != 0) & (y != 0)

x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x), np.log(y), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="purple",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="purple",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(3e-4, 5)
ax.set_ylim(3e-4, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel("Observations\n" + label_from_attrs(x))
ax.set_ylabel("Model\n" + label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f} | log-log Correlation: {corr_loglog.values:.2f}")
save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_cleo["liquid_water_content"].sel(gridbox=ds_cleo["max_gridbox"]).mean("time", keep_attrs=True)
y.attrs.update({"units": "g m^{-3}"})
yerr = ds_cleo["liquid_water_content"].sel(gridbox=ds_cleo["max_gridbox"]).std("time", keep_attrs=True)

indices = (x != 0) & (y != 0)

x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x), np.log(y), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="purple",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="purple",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(3e-4, 5)
ax.set_ylim(3e-4, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel("Observations\n" + label_from_attrs(x))
ax.set_ylabel("Model\n" + label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f} | log-log Correlation: {corr_loglog.values:.2f}")
save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["liquid_water_content"]
xerr = ds_observations["liquid_water_content_sem"]
y = ds_observations["liquid_water_content_50um"]
yerr = ds_observations["liquid_water_content_50um_sem"]

indices = (x != 0) & (y != 0)

x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x), np.log(y), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="purple",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="purple",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(3e-4, 5)
ax.set_ylim(3e-4, 5)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel("Observations\n" + label_from_attrs(x))
ax.set_ylabel("Model\n" + label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f} | log-log Correlation: {corr_loglog.values:.2f}")
save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
x = ds_observations["number_concentration_50um"]
xerr = ds_observations["number_concentration_50um_sem"]
attrs = ds_cleo["xi_temporal_mean"].attrs.copy()
y = (
    (ds_cleo["xi_temporal_mean"] / ds_cleo["gridbox_volume"])
    .sel(gridbox=ds_cleo["max_gridbox"])
    .sum("radius_bins")
)
y.attrs.update(units="m^{-3}", long_name="Number concentration")
yerr = y * 0

indices = (x != 0) & (y != 0)

x, xerr, y, yerr = x[indices], xerr[indices], y[indices], yerr[indices]

corr = xr.corr(x, y, dim="cloud_id")
corr_loglog = xr.corr(np.log(x), np.log(y), dim="cloud_id")

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 4))


ax.scatter(
    x=x,
    y=y,
    marker=".",
    color="orange",
    alpha=0.75,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)

ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt=".",
    color="orange",
    alpha=0.1,
    # label = f"Pearson correlation coefficient: {corr.values:.2f}"
)


# plt.xlim(0, 0.7)
# plt.ylim(0, 0.7)
ax.loglog()
ax.set_xlim(1e-1, 2e4)
ax.set_ylim(1e-1, 2e4)
plot_one_one(ax, color="grey", linestyle="--")
ax.set_xlabel("Observations\n" + label_from_attrs(x))
ax.set_ylabel("Model\n" + label_from_attrs(y))
# ax.legend(loc="upper left")
ax.set_title(f"Correlation: {corr.values:.2f} | log-log Correlation: {corr_loglog.values:.2f}")
save_figure(fig, fig_dir / f"{x.name}-{y.name}")

In [None]:
ds_parameters = xr.open_dataset(input_data_dir / "particle_size_distribution_parameters.nc")
ds_parameters = ds_parameters.sel(cloud_id=ids)

ds_parameters_linear = xr.open_dataset(
    input_data_dir / "particle_size_distribution_parameters_linear_space.nc"
)
ds_parameters_linear = ds_parameters_linear.sel(cloud_id=ids)

In [None]:
ds_parameters = xr.open_dataset(input_data_dir / "particle_size_distribution_parameters.nc")
ds_parameters = ds_parameters.sel(cloud_id=ids)

ds_parameters_linear = xr.open_dataset(
    input_data_dir / "particle_size_distribution_parameters_linear_space.nc"
)
ds_parameters_linear = ds_parameters_linear.sel(cloud_id=ids)

In [None]:
radius = np.geomspace(50e-6, 3e-3, 100)

# radius = np.array([0, 1, 2, 4, 6, 8, 10, 15, 20, 25])

# radius = ds_cleo['radius_bins'].values * 1e-6

t_test = xr.DataArray(
    radius,
    dims="radius",
    coords={"radius": radius},
)
t_test = t_test.expand_dims(cloud_id=ds_cleo["cloud_id"])
w_test = ((t_test["radius"] - t_test["radius"].shift(radius=2)) / 2).shift(radius=-1)
w_test = w_test.interpolate_na("radius", method="linear", fill_value="extrapolate")

# noise = 0.1
# t_test = t_test + noise * np.random.randn(*t_test.shape) * t_test

In [None]:
# fit the double log-normal distribution
fitted = smodels.double_ln_normal_distribution(
    t=t_test,
    mu1=ds_parameters["mu1"],
    mu2=ds_parameters["mu2"],
    sigma1=ds_parameters["sigma1"],
    sigma2=ds_parameters["sigma2"],
    scale_factor1=ds_parameters["scale_factor1"],
    scale_factor2=ds_parameters["scale_factor2"],
)

fitted_psd = fitted * w_test
fitted_msd = msd_from_psd_dataarray(fitted * w_test)

In [None]:
# fit the double log-normal distribution
fitted_linear = smodels.double_log_normal_distribution_all(
    x=t_test,
    mu1=ds_parameters_linear["geometric_mean1"],
    mu2=ds_parameters_linear["geometric_mean2"],
    sigma1=ds_parameters_linear["geometric_std_dev1"],
    sigma2=ds_parameters_linear["geometric_std_dev2"],
    scale1=ds_parameters_linear["scale_factor1"],
    scale2=ds_parameters_linear["scale_factor2"],
    parameter_space="geometric",
    x_space="linear",
)

fitted_linear_psd = fitted_linear * w_test
fitted_linear_msd = msd_from_psd_dataarray(fitted_linear * w_test)