# Notebook to share our initial data with Mampi

To share the initial data with Mampi

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import xarray as xr
import textwrap

from pathlib import Path
import matplotlib.pyplot as plt

from sdm_eurec4a import RepositoryPath

from sdm_eurec4a.visulization import label_from_attrs, save_figure, set_custom_rcParams, plot_one_one

from sdm_eurec4a.reductions import mean_and_stderror_of_mean

set_custom_rcParams()

RepoPaths = RepositoryPath("levante")

## Share data with mampi

In [None]:
OBS_data_dir = RepoPaths.data_dir
input_data_dir = OBS_data_dir / Path("model/input_v4.1")
CLEO_data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.1")

sharing_dir = RepoPaths.data_dir / Path("sharing/")
sharing_dir.mkdir(exist_ok=True, parents=False)

fig_dir = sharing_dir / Path("figures")
fig_dir.mkdir(exist_ok=True, parents=False)

In [None]:
identified_clusters = xr.open_dataset(
    OBS_data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

ds_cleo = xr.open_dataset(
    CLEO_data_dir / Path("null_microphysics/combined/eulerian_dataset_combined.nc")
)
ds_cleo_condensation = xr.open_dataset(
    CLEO_data_dir / Path("condensation/combined/eulerian_dataset_combined.nc")
)
ds_box_model = xr.open_dataset(
    CLEO_data_dir / Path("condensation/combined/conservation_dataset_combined.nc")
)

cloud_ids = np.intersect1d(identified_clusters.cloud_id, ds_cleo.cloud_id)

ds_cleo = ds_cleo.sel(cloud_id=cloud_ids)
identified_clusters = identified_clusters.sel(cloud_id=cloud_ids)
ds_cleo_condensation = ds_cleo_condensation.sel(cloud_id=cloud_ids)

ds_cloud = ds_cleo.sel(gridbox=ds_cleo["max_gridbox"])

cloud_composite = xr.open_dataset(
    OBS_data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc")
)

In [None]:
# store information in README.md
def ds_to_readme(ds: xr.Dataset, filepath: Path) -> None:
    """
    This function writes the attributes of a xarray.Dataset to a README.md file.
    Here an example of the output:
    - var1
        - attr1: value1
        - attr2: value2
    - var2
        - attr1: value1
        - attr2: value2
    """

    with open(filepath, "w") as f:

        f.write(f"### Description of the dataset:\n")
        for k, v in ds.attrs.items():
            f.write(f"- {(k + ':').ljust(15)} {v.replace('\n', ' ')}\n")

        f.write(f"\n===============================\n\n")
        f.write(f"### More information about the variables in the dataset:\n")
        for key in ds.variables:
            f.write(f"- {key}\n")
            for k, v in ds[key].attrs.items():
                f.write(f"  - {(k + ':').ljust(15)} {v.replace('\n', ' ')}\n")

        f.write(f"\n===============================\n\n")
        f.write(f"### Xarray NETCDF dataset information:\n")

        f.write("````python\n" + str(ds) + "\n````\n")

In [None]:
# calculate observations rwc and sem

cloud_composite["mass_size_distribution_non_normalized"] = (
    1e3 * cloud_composite["mass_size_distribution"] * cloud_composite["bin_width"]
)
cloud_composite["mass_size_distribution_non_normalized"].attrs.update(
    long_name="mass size distribution non normalized", units="g m^{-3}"
)


list_liquid_water_content_mean = list()
list_liquid_water_content_sem = list()
list_rwc_50µm_mean = list()
list_rwc_50µm_sem = list()

for cloud_id in identified_clusters["cloud_id"]:
    start = identified_clusters["start"].sel(cloud_id=cloud_id).values
    stop = identified_clusters["end"].sel(cloud_id=cloud_id).values

    _ds = cloud_composite.sel(time=slice(start, stop))
    _ds_50µm = _ds.sel(radius=slice(50e-6, None))

    rwc = _ds["mass_size_distribution_non_normalized"].sum("radius", keep_attrs=True)
    rwc.attrs.update(
        long_name="Liquid Water Content\n(All water droplets)",
    )
    attrs = rwc.attrs.copy()

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(rwc, dims="time")
    lwc_mean = lwc_mean.expand_dims(cloud_id=[cloud_id])
    lwc_mean.attrs.update(**attrs)
    lwc_mean.attrs.update(
        description=textwrap.dedent(
            """
            The liquid water content (LWC) is calculated by summing the mass of all water droplets in the cloud.
            The values are the temporal means of all measurements within an individual cloud.
            """
        )
    )

    lwc_sem = lwc_sem.expand_dims(cloud_id=[cloud_id])
    lwc_sem.attrs.update(**attrs)
    lwc_sem.attrs.update(
        description=textwrap.dedent(
            """
            This standard error of the mean correponds to the temporal mean of all individual measurements within an individual cloud.
            Which is given as variable `liquid_water_content_mean`.
            """
        )
    )

    list_liquid_water_content_mean.append(lwc_mean)
    list_liquid_water_content_sem.append(lwc_sem)

    rwc_50µm = _ds_50µm["mass_size_distribution_non_normalized"].sum("radius", keep_attrs=True)
    rwc_50µm.attrs.update(
        long_name="Rain Water Content\n(Drops radii > 50µm)",
    )
    attrs = rwc_50µm.attrs.copy()

    rwc_50µm_mean, rwc_50µm_sem = mean_and_stderror_of_mean(rwc_50µm, dims="time")
    rwc_50µm_mean = rwc_50µm_mean.expand_dims(cloud_id=[cloud_id])
    rwc_50µm_mean.attrs.update(**attrs)
    rwc_50µm_mean.attrs.update(
        description=textwrap.dedent(
            """
            The rain water content (RWC) is calculated by summing the mass of all rain droplets in the cloud.
            Rain drops have radius above 50µm.
            The values are the temporal means of all measurements within an individual cloud.
            """
        )
    )
    rwc_50µm_sem = rwc_50µm_sem.expand_dims(cloud_id=[cloud_id])
    rwc_50µm_sem.attrs.update(**attrs)
    rwc_50µm_sem.attrs.update(
        description=textwrap.dedent(
            """
            This standard error of the mean correponds to the temporal mean of all individual measurements within an individual cloud.
            Which is given as variable `rain_water_content_mean`.
            """
        )
    )

    list_rwc_50µm_mean.append(rwc_50µm_mean)
    list_rwc_50µm_sem.append(rwc_50µm_sem)

ds_observations = xr.Dataset()
ds_observations["liquid_water_content_mean"] = xr.concat(list_liquid_water_content_mean, dim="cloud_id")
ds_observations["liquid_water_content_sem"] = xr.concat(list_liquid_water_content_sem, dim="cloud_id")
ds_observations["rain_water_content_mean"] = xr.concat(list_rwc_50µm_mean, dim="cloud_id")
ds_observations["rain_water_content_sem"] = xr.concat(list_rwc_50µm_sem, dim="cloud_id")
ds_observations.attrs.update(
    creator="Nils Niebaum",
    description=textwrap.dedent(
        """
        This dataset provides the in situ RWC for all clouds used for the CLEO analysis.
        The liquid water content (LWC) and the rain water content (RWC) for an individual cloud is calculated as the mean over all measurements within the cloud.
        For the LWC all droplets are considered.
        For the RWC only droplets with a radius above 50µm are considered.
    """
    ),
)
ds_observations.to_netcdf(sharing_dir / "datasets" / "observations-clouds-lwc-rwc.nc")
ds_observations.to_dataframe().to_csv(sharing_dir / "datasets" / "observations-clouds-lwc-rwc.csv")
ds_to_readme(ds_observations, sharing_dir / "datasets" / "observations-clouds-lwc-rwc_README.md")

In [None]:
plt.errorbar(
    x=ds_observations["liquid_water_content_mean"],
    xerr=ds_observations["liquid_water_content_sem"],
    y=ds_observations["rain_water_content_mean"],
    yerr=ds_observations["rain_water_content_sem"],
    fmt=".",
    alpha=0.7,
)

plt.xscale("log")
plt.yscale("log")
plot_one_one(plt.gca(), N=100, color="k")
plt.xlim(1e-3, 1e1)
plt.ylim(1e-3, 1e1)
plt.xlabel(label_from_attrs(ds_observations["liquid_water_content_mean"]))
plt.ylabel(label_from_attrs(ds_observations["rain_water_content_mean"]))

Text(0, 0.5, 'Rain Water Content\n(Drops radii > 50µm) $\\left[  g m^{-3}  \\right]$')

In [None]:
filepath = sharing_dir / "datasets/identified-clusters-simulated"
identified_clusters.to_netcdf(filepath.with_suffix(".nc"))
identified_clusters.to_dataframe().to_csv(filepath.with_suffix(".csv"), sep=";")
ds_to_readme(identified_clusters, filepath.with_suffix(".md"))

Store all parameter files as csv too and a README too

In [None]:
parent_dir = sharing_dir / "distribution-and-thermodynamic-fits"
parent_dir.mkdir(exist_ok=True, parents=False)

for filepath in list((RepoPaths.data_dir / Path("model/input_v4.1/")).glob("*.nc")):
    new_filepath = parent_dir / filepath.name

    ds = xr.open_dataset(filepath)
    df = ds.to_dataframe()
    try:
        ds.to_netcdf(new_filepath)
    except PermissionError:
        print(f"PermissionError: {new_filepath}")
        continue
    ds.close()

    df.to_csv(new_filepath.with_suffix(".csv"), sep=";")

    ds_to_readme(ds, new_filepath.with_suffix(".md"))

In [None]:
ds_psd = xr.open_dataset(parent_dir / "particle_size_distribution_parameters_linear_space.nc")
ds_psd = ds_psd.sel(cloud_id=cloud_ids)

In [None]:
# open the file which contains the dsd, the relative humiditz

from pySD.initsuperdropsbinary_src.probdists import DoubleLogNormal
from pySD.initsuperdropsbinary_src.attrsgen import AttrsGeneratorBinWidth
from pySD.initsuperdropsbinary_src.rgens import SampleLog10RadiiWithBinWidth
from pySD.initsuperdropsbinary_src.rgens import MonoAttrGen

keys = DoubleLogNormal.__init__.__annotations__
mapping = dict([(k, ds_psd[k]) for k in keys])
xiprobdist = DoubleLogNormal(**mapping)


# attrsgen = AttrsGeneratorBinWidth(
#     radiigen=SampleLog10RadiiWithBinWidth(rspan=(0.5, 3.5), n=100),
#     dryradiigen=MonoAttrGen(1e-9),
#     xiprobdist=DoubleLogNormal(

In [None]:
ds_niebaum = xr.Dataset()

ds_niebaum["radius"] = ds_cleo["radius_bins"] * 1e-3  # mm
ds_niebaum["radius"].attrs.update(
    {
        "long_name": "radius",
        "units": "mm",
    }
)
ds_niebaum["radius_SI"] = ds_niebaum["radius"] * 1e-3  # m
ds_niebaum["radius_SI"].attrs.update(
    {
        "long_name": "radius in SI",
        "units": "m",
    }
)

ds_niebaum["radius_bin_width"] = 0.5 * (
    ds_niebaum["radius"].shift(radius_bins=-1) - ds_niebaum["radius"].shift(radius_bins=1)
)
ds_niebaum["radius_bin_width"].attrs.update(
    {
        "long_name": "width of each radius bin",
        "units": "mm",
    }
)

# print(ds_niebaum.cloud_id.size)
# create the theoretical psd fromt he xiprobdist
ds_niebaum["psd"] = xiprobdist(ds_niebaum["radius_SI"]).T
# now psd is given in m^-3 m^-1
# to get it in m^-3 mm^-1 we need to multiply by 1e-3
ds_niebaum["psd"] = ds_niebaum["psd"] * 1e-3
ds_niebaum["psd"].attrs.update(
    long_name="N",
    units="m^{-3} mm^{-1}",
    description=textwrap.dedent(
        """
            Theoretical particle size distribution (PSD) calculated from the DoubleLogNormal distribution.
            This is the fitted distribution to the observational data.
            """
    ),
)

ds_niebaum["psd_cleo"] = ds_cloud["xi_temporal_mean"] / ds_cloud["gridbox_volume"]
ds_niebaum["psd_cleo"].attrs.update(
    long_name="N in CLEO",
    units="m^{-3}",
    description=textwrap.dedent(
        """
        The particle size distribution (PSD) which is used in the CLEO model.
        Please note, that this is not a normalized distribution.
        It counts the multiplicities of all superdroplets within a radius bin.
        To normalize the distribution, please use the `bin_width` variable.
        """
    ),
)


ds_niebaum["air_temperature"] = ds_cleo["air_temperature"]
ds_niebaum["relative_humidity"] = ds_cleo["relative_humidity"]

ds_niebaum["altitude"] = ds_cleo["gridbox_coord3"]

ds_niebaum = ds_niebaum.transpose("radius_bins", "gridbox", "cloud_id", ...)


lwc_cleo = (1e3 * ds_cloud["mass_represented_temporal_mean"] / ds_cloud["gridbox_volume"]).sum(
    "radius_bins"
)
lwc_cleo.attrs.update(
    long_name="Rain water content in CLEO",
    units="g m^{-3}",
    description="Rain water content, which CLEO simulates for the cloud layer. It is the sum of mass represented by all superdroplets in the cloud.",
)
ds_niebaum["rain_water_content_cleo"] = lwc_cleo


time_slice = slice(1500, 3490)
# add the evaporation fraction
ds_niebaum["evaporation_fraction"] = -ds_box_model["source"].sel(time=time_slice).mean(
    "time", keep_attrs=True
) / ds_box_model["inflow"].sel(time=time_slice).mean("time", keep_attrs=True)
# ds_niebaum['evaporation_fraction_median'] = - ds_box_model['source'].sel(time = time_slice).median('time', keep_attrs = True) / ds_box_model['inflow'].sel(time = time_slice).median('time', keep_attrs = True)
ds_niebaum["evaporation_fraction"].attrs.update(
    long_name="Evaporation fraction",
    units="1",
    description=textwrap.dedent(
        """
        The evaporation fraction is calculated as the ratio of the evaporated mass (E) within the whole domain to the inflow into the domain from the cloud (I).
        Both E and I are averaged over the stationary state the their ratio is calculated.
        """
    ),
)


ds_niebaum.attrs.update(
    creator="Nils Niebaum",
    description=textwrap.dedent(
        """
        This dataset provides the theoretical particle size distribution (PSD) for the CLEO analysis.
        The PSD is calculated from the DoubleLogNormal distribution which is fitted to the observational data.
        The dataset also contains the air temperature, the relative humidity, the altitude and the rain water content (RWC) for each gridbox and each cloud.
        
        Most of the variables are given for the setup with only evaporation enabled. No collisional effects are considered.
        """
    ),
)


# ds_niebaum.to_netcdf(sharing_dir / "niebaum-CLEO.nc")
# ds_to_readme(ds_niebaum, sharing_dir / "niebaum-CLEO.md")

ds_niebaum_cloud = ds_niebaum[
    ["radius", "psd", "psd_cleo", "rain_water_content_cleo", "evaporation_fraction"]
]
ds_niebaum_cloud.to_netcdf(sharing_dir / "datasets" / "niebaum-simulations.nc")
ds_niebaum_cloud.to_dataframe().to_csv(sharing_dir / "datasets" / "niebaum-simulations.csv")
ds_to_readme(ds_niebaum_cloud, sharing_dir / "datasets" / "niebaum-simulations.md")

In [None]:
plt.plot(ds_niebaum["psd"], ds_niebaum["psd_cleo"] / ds_niebaum["radius_bin_width"], marker="x")

plt.xscale("log")
plt.yscale("log")
# plt.xlim(1e-3, None)
# plt.ylim(1e-3, None)

In [None]:
fig, ax = plt.subplots()

ax.plot(
    ds_niebaum["radius"],
    ds_niebaum["psd"],
)
ax.set_xscale("log")
ax.set_yscale("log")

ax.set_xlabel(label_from_attrs(ds_niebaum["radius"]))
ax.set_ylabel(label_from_attrs(ds_niebaum["psd"]))

ax.set_ylim(1e-4, 1e8)

fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_raindrop_size_distribution",
    formats=[
        ".png",
    ],
)

ax.set_xlim(1e-1, 3e0)
ax.set_yticks([1e0, 1e5])
ax.set_ylim(1e-4, 1e6)
fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_raindrop_size_distribution-zoom",
    formats=[
        ".png",
    ],
)

fig, ax = plt.subplots()

ax.plot(
    ds_niebaum["radius"],
    ds_niebaum["psd_cleo"] / ds_niebaum["radius_bin_width"],
)
ax.set_xscale("log")
ax.set_yscale("log")

ax.set_xlabel(label_from_attrs(ds_niebaum["radius"]))
ax.set_ylabel(label_from_attrs(ds_niebaum["psd"]))

ax.set_ylim(1e-4, 1e8)

fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_raindrop_size_distribution-cleo",
    formats=[
        ".png",
    ],
)

ax.set_xlim(1e-1, 3e0)
ax.set_yticks([1e0, 1e5])
ax.set_ylim(1e-4, 1e6)
fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_raindrop_size_distribution-cleo-zoom",
    formats=[
        ".png",
    ],
)


fig, ax = plt.subplots()
x = ds_niebaum["relative_humidity"]
y = ds_niebaum["altitude"]
ax.plot(
    x,
    y,
)
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y))

fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_relative_humidity",
    formats=[
        ".png",
    ],
)


fig, ax = plt.subplots()
x = ds_niebaum["air_temperature"]
y = ds_niebaum["altitude"]
ax.plot(
    x,
    y,
)
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y))

fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_air_temperature",
    formats=[
        ".png",
    ],
)



In [None]:
plt.figure(figsize=(8, 6))
plt.errorbar(
    y=ds_observations["liquid_water_content_mean"],
    yerr=ds_observations["liquid_water_content_sem"],
    x=ds_niebaum["rain_water_content_cleo"],
    xerr=0,
    fmt=".",
    alpha=0.7,
    label=label_from_attrs(ds_observations["liquid_water_content_mean"], return_units=False),
)

plt.errorbar(
    y=ds_observations["rain_water_content_mean"],
    yerr=ds_observations["rain_water_content_sem"],
    x=ds_niebaum["rain_water_content_cleo"],
    xerr=0,
    fmt=".",
    alpha=0.7,
    label=label_from_attrs(ds_observations["rain_water_content_mean"], return_units=False),
)

plt.xlabel("RWC (in CLEO) " + label_from_attrs(ds_niebaum["rain_water_content_cleo"], return_name=False))
plt.ylabel(
    "RWC (in-situ) " + label_from_attrs(ds_observations["liquid_water_content_mean"], return_name=False)
)

plt.xscale("log")
plt.yscale("log")
plt.legend()
plt.xlim(4e-4, 1e2)
plt.xticks([1e-2, 1e0])
plt.ylim(2e-5, 7e1)
plt.yticks([1e-4, 1e-2, 1e0])

plot_one_one(plt.gca(), N=100, color="k", linewidth=0.5)

title = "RWC comparsion between CLEO and in-situ values.\n" + textwrap.fill(
    "For in-situ: dots are the temporal mean for each individual cloud and errorbars are SEM", 60
)

plt.title(title)
plt.tight_layout()

save_figure(
    plt.gcf(),
    fig_dir / "RWC_comparison",
    formats=[
        ".png",
    ],
)

In [None]:
for var in [
    "evaporation_fraction",
]:
    plt.hist(
        ds_niebaum[var],
        bins=np.arange(0, 1.1, 0.1),
        edgecolor="k",
        alpha=0.5,
        # label = var
    )
plt.legend()
plt.xlabel("REF (fraction)")
plt.ylabel("Occurence")
plt.title("Evaporation fraction")
plt.tight_layout()
save_figure(
    plt.gcf(),
    fig_dir / "evaporation_fraction",
    formats=[
        ".png",
    ],
)

  plt.legend()


In [None]:
ef_ids = ds["cloud_id"].sortby(ds_niebaum["evaporation_fraction"])

In [None]:
fig, ax = plt.subplots()

ds = ds_niebaum

y = ds["psd"]
x = ds["radius"].expand_dims(cloud_id=ds["cloud_id"])
c = ds["evaporation_fraction"].expand_dims(radius_bins=ds["radius_bins"])

x = x.transpose("cloud_id", "radius_bins")
y = y.transpose("cloud_id", "radius_bins")
c = c.transpose("cloud_id", "radius_bins")


cmap = plt.get_cmap("plasma_r")

for cloud_id in ds["cloud_id"]:
    plt.plot(
        x.sel(cloud_id=cloud_id),
        y.sel(cloud_id=cloud_id),
        c=cmap(ds["evaporation_fraction"].sel(cloud_id=cloud_id)),
        alpha=0.7,
    )

plt.scatter(x, y, c=c, cmap=cmap, s=0)

plt.colorbar(label=label_from_attrs(ds["evaporation_fraction"]))
plt.xscale("log")
plt.yscale("log")

plt.xlabel(label_from_attrs(x))
plt.ylabel(label_from_attrs(y))

fig.tight_layout()
save_figure(
    fig,
    fig_dir / "niebaum_psd_evaporation_fraction",
    formats=[
        ".png",
    ],
)

## look into Sarkar distirbutions

In [None]:
mat = sp.io.loadmat(sharing_dir / "Sarkar2023-Rain_retrievals_9to13Feb2020.mat")
mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'Dcb', 'Ncb', 'RH', 'R_radarret', 'cbh', 'sigcb', 'time', 'z_norm'])

In [None]:
np.unique(mat["cbh"])

array([ 540.,  560.,  575.,  620.,  650.,  665.,  680.,  740.,  755.,
        800.,  815.,  845.,  860.,  890.,  905.,  920.,  935.,  950.,
        965., 1010., 1025., 1070., 1085., 1130.,   nan])

In [None]:
key_mapping = {
    "Dcb": "geometric_mean",
    "Ncb": "number_concentration",
    "sigcb": "geometric_standard_deviation",
    "time": "time",
    "RH": "surface_relative_humidity",
    "cbh": "cloud_base_height",
}

ds_sarkar = xr.Dataset()

for old_key, new_key in key_mapping.items():
    # print(old_key, new_key)
    v = mat[old_key]

    assert np.max(v.shape) == np.size(v.squeeze())

    if new_key == "normalized_height":
        v = v.squeeze()
        ds_sarkar[new_key] = xr.DataArray(v, dims="normalized_height")
    elif new_key == "time":
        v = mat["time"].squeeze()
        v = v - v.min()
        v = pd.to_datetime(v.squeeze(), unit="d", origin=pd.Timestamp("2021-02-09"))
        ds_sarkar[new_key] = xr.DataArray(v, dims="time")
    elif v.shape[0] == 1:
        v = v.squeeze()
        ds_sarkar[new_key] = xr.DataArray(v, dims="time")


ds_sarkar_null = ~ds_sarkar.isnull()

keys = list(ds_sarkar_null.keys())

mask = ds_sarkar_null[keys[0]]
for key in keys[1:]:
    mask = mask & ds_sarkar_null[key]

ds_sarkar = ds_sarkar.where(mask, drop=True)

# Convert geometric mean from mm to µm

ds_sarkar["geometric_mean"] = ds_sarkar["geometric_mean"] * 1e3  # Convert from mm to µm
ds_sarkar["geometric_mean"].attrs.update(
    units="m", long_name="Geometric Mean of Raindrop Size Distribution"
)
ds_sarkar["number_concentration"].attrs.update(
    units="m^{-3}", long_name="Number Concentration of Raindrops"
)
ds_sarkar["geometric_standard_deviation"].attrs.update(
    units="1", long_name="Geometric Standard Deviation of Raindrop Size Distribution"
)

ds_sarkar["surface_relative_humidity"].attrs.update(units="%", long_name="Surface Relative Humidity")

ds_sarkar["cloud_base_temperature"] = ds_sarkar["surface_relative_humidity"] * 0 + 292  # K

ds_sarkar["cloud_base_temperature"].attrs.update(units="K", long_name="Cloud Base Temperature")


# add altitude, air temperature and relative humidity fields_sarkar

ds_sarkar["altitude"] = xr.DataArray(np.linspace(0, 1300, 100), dims="altitude")
ds_sarkar["altitude"].attrs.update(units="m", long_name="Altitude")


ds_sarkar["surface_air_temperature"] = (
    9.8e-3 * ds_sarkar["cloud_base_height"] + ds_sarkar["cloud_base_temperature"]
)
ds_sarkar["surface_air_temperature"].attrs.update(units="K", long_name="Surface Air Temperature")

ds_sarkar["air_temperature"] = ds_sarkar["surface_air_temperature"] - 9.8e-3 * ds_sarkar["altitude"]
ds_sarkar["air_temperature"] = ds_sarkar["air_temperature"].where(
    ds_sarkar["altitude"] <= ds_sarkar["cloud_base_height"]
)
ds_sarkar["air_temperature"].attrs.update(units="K", long_name="Air Temperature")

slope = (100 - ds_sarkar["surface_relative_humidity"]) / ds_sarkar["cloud_base_height"]
ds_sarkar["relative_humidity"] = ds_sarkar["surface_relative_humidity"] + slope * ds_sarkar["altitude"]
ds_sarkar["relative_humidity"] = ds_sarkar["relative_humidity"].where(
    ds_sarkar["altitude"] <= ds_sarkar["cloud_base_height"]
)
ds_sarkar["relative_humidity"].attrs.update(units=r"\%", long_name="Relative Humidity")


# add particle size distribution

radius = 1e3 * np.geomspace(0.1, 3, int(1e3))

ds_sarkar["radius"] = xr.DataArray(radius, dims="radius", attrs={"units": "µm", "long_name": "Radius"})


def log_normal(r, mu, sigma, N) -> np.ndarray:
    sigma = np.log(sigma)
    mu = np.log(mu)
    factor = N / (sigma**2 * np.sqrt(2 * np.pi))
    exponent = -0.5 * ((np.log(r) - mu) / sigma) ** 2

    return factor * np.exp(exponent)


ds_sarkar["psd"] = log_normal(
    r=ds_sarkar["radius"],
    mu=ds_sarkar["geometric_mean"],
    sigma=ds_sarkar["geometric_standard_deviation"],
    N=ds_sarkar["number_concentration"],
)
ds_sarkar["psd"].attrs.update(
    units="m^{-3} µm^{-1}", long_name="Raindrop Size Distribution\n Normalized by bin width"
)

ds_sarkar

In [None]:
surface = ds_cleo_condensation["liquid_water_content"].sel(gridbox=0).sel(time=time_slice).mean("time")
top = (
    ds_cleo_condensation["liquid_water_content"]
    .sel(gridbox=ds_cleo_condensation["max_gridbox"] - 1)
    .sel(time=time_slice)
    .mean("time")
)

plt.hist((top - surface) / top)

(array([53., 34., 23.,  6.,  1.,  2.,  0.,  0.,  0.,  7.]),
 array([0.00124258, 0.09821865, 0.19519472, 0.29217079, 0.38914686,
        0.48612293, 0.583099  , 0.68007507, 0.77705114, 0.87402721,
        0.97100328]),
 <BarContainer object of 10 artists>)

In [None]:
# fig, ax = plt.subplots()

# ax.plot(
#     ds_sarkar['radius'],
#     ds_sarkar['psd'].T,
# )
# ax.set_xscale('log')
# ax.set_yscale('log')
# ax.set_ylim(1e-2, 1e4)

# ax.set_xlabel(label_from_attrs(ds_sarkar['radius']))
# ax.set_ylabel(label_from_attrs(ds_sarkar['psd']))
# save_figure(fig, fig_dir / "raindrop_size_distribution", formats = ['.png',])


# fig, ax = plt.subplots()
# x = ds_sarkar['relative_humidity'].T
# y = ds_sarkar['altitude']
# ax.plot(
#     x, y,
# )
# ax.set_xlabel(label_from_attrs(x))
# ax.set_ylabel(label_from_attrs(y))
# save_figure(fig, fig_dir / "relative_humidity", formats = ['.png',])


# fig, ax = plt.subplots()
# x = ds_sarkar['air_temperature'].T
# y = ds_sarkar['altitude']
# ax.plot(
#     x, y,
# )
# ax.set_xlabel(label_from_attrs(x))
# ax.set_ylabel(label_from_attrs(y))
# save_figure(fig, fig_dir / "air_temperature", formats = ['.png',])