With this Notebook, the datasets which were created by CLEO of all clusters using rain mask with 5 timestep holes removed will be compared.


In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.animation as animation
import numpy as np
import xarray as xr
import yaml

from sdm_eurec4a.visulization import set_custom_rcParams, adjust_lightness_array, handler_map_alpha

from sdm_eurec4a.identifications import (
    match_clouds_and_cloudcomposite,
    match_clouds_and_dropsondes,
    select_individual_cloud_by_id,
)

from sdm_eurec4a import RepositoryPath
from sdm_eurec4a.input_processing import transfer
from sdm_eurec4a.reductions import shape_dim_as_dataarray
from sdm_eurec4a.conversions import msd_from_psd

In [None]:
def adjust_spines(ax, visible_spines, position=("outward", 5)):
    ax.label_outer(remove_inner_ticks=False)

    for loc, spine in ax.spines.items():
        if loc in visible_spines:
            spine.set_position(position)  # outward by 10 points
        else:
            spine.set_visible(False)

In [None]:
plt.style.use("default")
default_colors = set_custom_rcParams()
darker_colors = adjust_lightness_array(default_colors, 0.75)

REPOSITORY_ROOT = RepositoryPath("levante").get_repo_dir()

output_dir = REPOSITORY_ROOT / Path("data/model/input_examples/")
# output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
config_yaml_filepath = REPOSITORY_ROOT / Path("data/model/input/new/clusters_18.yaml")

with open(config_yaml_filepath, "r") as file:
    config_yaml = yaml.safe_load(file)

identification_type = config_yaml["cloud"]["identification_type"]
cloud_id = config_yaml["cloud"]["cloud_id"]
path2CLEO = Path("/home/m/m301096/CLEO")
cleo_data_dir = path2CLEO / "data/output"
cleo_dataset_dir = cleo_data_dir / "processed/rain/" f"{identification_type}_{cloud_id}"

cleo_output_path = cleo_dataset_dir / "full_dataset.nc"

fig_path = REPOSITORY_ROOT / Path(f"results/CLEO_output/no_aerosols/{identification_type}_{cloud_id}")
fig_path.mkdir(parents=True, exist_ok=True)
clusters = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(
        "data/observation/cloud_composite/processed/identified_clouds/identified_clusters_rain_mask_5.nc"
    )
)
cluster = select_individual_cloud_by_id(clusters, cloud_id)

distance_clusters = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(f"data/observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc")
)

cloud_composite = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/cloud_composite/processed/cloud_composite_si_units.nc"),
    chunks={"time": 1000},
)

cloud_composite = match_clouds_and_cloudcomposite(
    ds_clouds=cluster,
    ds_cloudcomposite=cloud_composite,
    dim="time",
)

drop_sondes = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/dropsonde/processed/drop_sondes.nc")
)


dt = config_yaml["cloud"]["dropsonde_distance"]["max_temporal_distance"].split(" ")
max_temporal_distance = np.timedelta64(int(dt[0]), dt[1][0])
max_spatial_distance = config_yaml["cloud"]["dropsonde_distance"]["max_spatial_distance"]
drop_sondes = match_clouds_and_dropsondes(
    ds_clouds=cluster,
    ds_sonde=drop_sondes,
    ds_distance=distance_clusters,
    max_temporal_distance=max_temporal_distance,
    max_spatial_distance=max_spatial_distance,
)


### Load CLEO output and preprocess

- Convert Multiplicity $\xi$ from #/gridbox to #/m^3
- calculate mass of each SD and mass represented in total by each SD 

In [None]:
ds_cleo = xr.open_dataset(cleo_output_path)
ds_cleo["radius"] = ds_cleo["radius"] * 1e-6
ds_cleo["mass"] = 4 / 3 * np.pi * ds_cleo["radius"] ** 3 * 1000  # kg/m^3

ds_cleo["xi_per_gridbox"] = ds_cleo["xi"]
ds_cleo["xi"] = ds_cleo["xi_per_gridbox"] / 20**3
# create total represented mass
ds_cleo["mass_represented"] = ds_cleo["mass"] * ds_cleo["xi"]

Reconstruct the fitted distribution

In [None]:
parameters = config_yaml["particle_size_distribution"]["parameters"]

psd = transfer.PSD_LnNormal(
    geometric_means=parameters["geometric_means"],
    geometric_sigmas=parameters["geometric_sigmas"],
    scale_factors=parameters["scale_factors"],
)

cloud_base = config_yaml["thermodynamics"]["air_temperature"]["parameters"]["x_split"][0]

Calculate the mass size distribution

In [None]:
cloud_composite["mass_size_distribution"] = msd_from_psd(cloud_composite)
cloud_composite["particle_size_distribution_fit"] = psd.eval_func(cloud_composite.radius)
cloud_composite["mass_size_distribution_fit"] = msd_from_psd(
    cloud_composite, psd_name="particle_size_distribution_fit"
)
# get the 2D radius
cloud_composite["radius_2D"] = shape_dim_as_dataarray(
    cloud_composite["particle_size_distribution"], output_dim="radius"
)

### Create variable of $r_{init}$, $r_{max}$, $r_{final}$ and corresponding $m$ and $t$ values

In [None]:
ds_cleo["mass_represented"] = ds_cleo["mass"] * ds_cleo["xi"]
ds_cleo["mass_represented"] = ds_cleo["mass_represented"].assign_attrs(
    units="kg",
    long_name="Mass represented by the superdroplet",
)

In [None]:
# only use drolets which reach the lowest gridbox. So where the minimum of the coord3 is smaller than 20 m.
ds_cleo = ds_cleo  # .where(ds_cleo["coord3"].min("time") <= 20, drop=True)
# ds_cleo = ds_cleo.sortby(ds_cleo["radius"].isel(time = 0))
ds_cleo["minimum_coord3"] = ds_cleo["coord3"].min("time")
ds_cleo["minimum_coord3"].assign_attrs(
    units="m",
    long_name="Minimum height of the droplet in the domain",
)

ds_cleo["time_domain_leave"] = ds_cleo.isel(time=ds_cleo["coord3"].argmin("time")).time
ds_cleo["time_domain_leave"] = ds_cleo["time_domain_leave"].assign_attrs(
    units="s",
    long_name="Time when droplet leaves the domain",
)
ds_cleo["time_at_cloud_base"] = ds_cleo["time"].where(ds_cleo["coord3"] <= cloud_base).min("time")
ds_cleo["time_at_cloud_base"] = ds_cleo["time_at_cloud_base"].assign_attrs(
    units="s",
    long_name="Time when droplet reaches cloud base",
)

ds_cleo["time_at_maximum_radius"] = ds_cleo.where(
    ds_cleo["time"] == ds_cleo["radius"].argmax("time")
).time
ds_cleo["time_at_maximum_radius"] = ds_cleo["time_at_maximum_radius"].assign_attrs(
    units="s",
    long_name="Time when droplet reaches maximum radius",
)

ds_cleo["radius_at_init"] = ds_cleo["radius"].isel(time=0)
ds_cleo["radius_at_init"] = ds_cleo["radius_at_init"].assign_attrs(
    units="m",
    long_name="Initial radius of the droplet",
)

ds_cleo["radius_at_cloud_base"] = ds_cleo["radius"].where(
    ds_cleo["time"] == ds_cleo["time_at_cloud_base"]
)
ds_cleo["radius_at_cloud_base"] = ds_cleo["radius_at_cloud_base"].assign_attrs(
    units="m",
    long_name="Radius of the droplet at cloud base",
)

ds_cleo["radius_at_domain_leave"] = ds_cleo["radius"].where(
    ds_cleo["time"] == ds_cleo["time_domain_leave"]
)
ds_cleo["radius_at_domain_leave"] = ds_cleo["radius_at_domain_leave"].assign_attrs(
    units="m",
    long_name="Radius of the droplet when it leaves the domain",
)

ds_cleo["radius_maximum"] = ds_cleo["radius"].where(ds_cleo["time"] == ds_cleo["time_at_maximum_radius"])
ds_cleo["radius_maximum"] = ds_cleo["radius_maximum"].assign_attrs(
    units="m",
    long_name="Maximum radius of the droplet",
)

ds_cleo["mass_init"] = ds_cleo["mass"].isel(time=0)
ds_cleo["mass_init"] = ds_cleo["mass_init"].assign_attrs(
    units="kg",
    long_name="Initial mass of the droplet",
)

ds_cleo["mass_cloud_base"] = ds_cleo["mass"].where(ds_cleo["time"] == ds_cleo["time_at_cloud_base"])
ds_cleo["mass_cloud_base"] = ds_cleo["mass_cloud_base"].assign_attrs(
    units="kg",
    long_name="Mass of the droplet at cloud base",
)

ds_cleo["mass_at_domain_leave"] = ds_cleo["mass"].where(ds_cleo["time"] == ds_cleo["time_domain_leave"])
ds_cleo["mass_at_domain_leave"] = ds_cleo["mass_at_domain_leave"].assign_attrs(
    units="kg",
    long_name="Mass of the droplet when it leaves the domain",
)

ds_cleo["mass_maximum"] = ds_cleo["mass"].where(ds_cleo["time"] == ds_cleo["time_at_maximum_radius"])
ds_cleo["mass_maximum"] = ds_cleo["mass_maximum"].assign_attrs(
    units="kg",
    long_name="Maximum mass of the droplet",
)
# ds_cleo["droplet_growth"] = ds_cleo["radius_at_domain_leave"] - ds_cleo["radius_at_init"]
# ds_cleo["droplet_growth_above_cloud"] = ds_cleo["radius_at_cloud_base"] - ds_cleo["radius_at_init"]
# ds_cleo["droplet_growth_below_cloud"] = ds_cleo["radius_at_domain_leave"] - ds_cleo["radius_at_cloud_base"]

# Eulerian view on PSD and MSD

To change from a Lagrangian point of view 
````
dimensions = ("time", "sd_id")
````
to a more or less Eulerian point of view, enables to plot e.g. PSD and MSD at specific time steps and heights.
For the PSD and MSD can be retireved from the multiplicity $\xi$, radius $r$ and spehrical mass $m$ of the $N$ SDs
For a given timestep $\tilde{t}$ we can bin by radius and altitude bins giving new dimensions:
````
dimensions = ("time", "radius_bins", "altitude_bins")
````
- PSD : $\sum_{id = 0}^{N} \xi_{id}^{binned}$
- MSD : $\sum_{id = 0}^{N} \xi_{id}^{binned} \cdot m_{id}^{binned}$

#### Using xarray groupby which is super slow.

In [None]:
def calculate_psd_cleo(ds):
    r_bins = np.logspace(-7, -3, 100)
    groups = dict(ds.groupby(ds["sdgbxindex"]))
    interval_mean = np.vectorize(lambda x: x.mid)
    result_list = []
    for key, ds_test in groups.items():
        result = ds_test["xi"].groupby_bins(ds_test["radius"], bins=r_bins).sum()
        result["radius_bins_mid"] = ("radius_bins", interval_mean(result["radius_bins"]))
        result = result.swap_dims({"radius_bins": "radius_bins_mid"})
        result_list.append(result)
    res = xr.concat(objs=result_list, dim="sdgbxindex")
    res = res.assign_coords(sdgbxindex=res["sdgbxindex"])
    return res

### Use the 2D binning with a 2 for loops.

In [None]:
# Define the radius and altitude bins
r_bins = np.logspace(-7, -3, 100)
h_bins = np.arange(0, 1200, 20)

# Find the bin indices for r and h
# In other words digitize the continous values of r and h into the bins
r_bin_indices = np.digitize(ds_cleo["radius"], r_bins) - 1
h_bin_indices = np.digitize(ds_cleo["coord3"], h_bins) - 1

# Initialize the arrays to store the results
m, i, j = len(ds_cleo["time"]), len(r_bins - 1), len(h_bins - 1)
psd = np.zeros((m, i, j))
msd = np.zeros((m, i, j))
xi = np.zeros((m, i, j))

# Loop over the unique bin combinations
for r_bin in range(i):
    for h_bin in range(j):
        # Find the indices where r and h fall into this bin
        indices = (r_bin_indices == r_bin) & (h_bin_indices == h_bin)
        # Sum the corresponding values xi along the sd_id dimension and store the result of the
        # Particle size distribution
        res = np.where(indices, ds_cleo["xi"], np.nan)
        psd[:, r_bin, h_bin] = np.nansum(res, axis=1)
        # Sum the corresponding values the represented mass along the sd_id dimension and store the result of the
        # Particle size distribution
        res = np.where(indices, ds_cleo["mass_represented"], np.nan)
        msd[:, r_bin, h_bin] = np.nansum(res, axis=1)

        xi = np.where(indices, ds_cleo["xi"], np.nan)
         = np.nansum(xi, axis=1)

Create the eulerian Dataset

In [None]:
ds_euler = xr.Dataset(
    data_vars=dict(
        particle_size_distribution=(
            ["time", "radius_bins", "height_bins"],
            psd,
            {"long_name": "Particle size distribution", "units": "1/m^3"},
        ),
        mass_size_distribution=(
            ["time", "radius_bins", "height_bins"],
            msd,
            {"long_name": "Mass size distribution", "units": "kg/m^3"},
        ),
    ),
    coords={
        "time": ds_cleo["time"],
        "radius_bins": r_bins,
        "height_bins": h_bins,
    },
)

Animate the evolution

In [None]:
t_select

In [None]:
t_select = ds_cleo["time"][::3].data
t = t_select[0]
fig, axs = plt.subplots(ncols=2, figsize=(16, 9))

quad_psd = axs[0].pcolormesh(
    ds_euler["radius_bins"],
    ds_euler["height_bins"],
    ds_euler["particle_size_distribution"].sel(time=t, method="nearest").T,
    shading="nearest",
    cmap="Reds",
    norm=mcolors.LogNorm(vmin=1e-4, vmax=1e4),
)
quad_msd = axs[1].pcolormesh(
    ds_euler["radius_bins"],
    ds_euler["height_bins"],
    ds_euler["mass_size_distribution"].sel(time=t, method="nearest").T,
    shading="nearest",
    cmap="Reds",
    norm=mcolors.LogNorm(vmin=1e-18, vmax=1e0),
)

axs[0].set_title(f"Particle size distribution")
axs[1].set_title(f"Mass size distribution")


cbar_psd = fig.colorbar(quad_psd, ax=axs[0], orientation="vertical")
cbar_msd = fig.colorbar(quad_msd, ax=axs[1], orientation="vertical")

cbar_psd.set_label("PSD $\\#/m^3$")
cbar_msd.set_label("MSD $kg/m^3$")

for ax in axs.flatten():
    ax.set_ylabel(f"Altitude $m$")
    ax.set_xlabel(f"Radius $m$")
    # ax.set_title(f"{t}")
    ax.set_xscale("log")
    ax.set_xlim((6e-08, 2e-03))
title = fig.suptitle("Time: ")


def draw(t):
    z = ds_euler["particle_size_distribution"].sel(time=t).T.data
    quad_psd.set_array(z.ravel())
    z = ds_euler["mass_size_distribution"].sel(time=t).T.data
    quad_msd.set_array(z.ravel())
    minutes, seconds = np.round(t / 60, decimals=0), np.round(t % 60, decimals=1)
    title.set_text(f"Time : {minutes:02.0f} min {seconds:02.0f} s")
    return quad_psd, quad_msd, title


def init():
    iter = 1
    t = t_select[iter]
    return draw(t)


def animate(iter):
    t = t_select[iter]
    return draw(t)


anim = animation.FuncAnimation(fig, animate, frames=len(t_select), interval=50, blit=False, repeat=False)
anim.save(fig_path / "animation.gif", writer="imagemagick", fps=10)

MovieWriter imagemagick unavailable; using Pillow instead.


## Looking at the evaporating droplets

In [None]:
ds_cleo.where(ds_cleo["mass_at_domain_leave"] <= ds_cleo["mass_at_cloud_base"])

In [None]:
ds_cleo["time_at_cloud_base"].plot()

[<matplotlib.lines.Line2D at 0x7fff29598230>]

In [None]:
# %%
style = dict(
    marker="None",
    linestyle="-",
    # markersize = 1,
    alpha=0.8,
)

fig, ax = plt.subplots(ncols=1, figsize=(16, 9))
ax.plot(ds_cleo.time, ds_cleo.coord3, **style)
ax.set_title("Time")
ax.set_ylabel("ALtitude $[m]$")
ax.axhline(cloud_base, color="k", linestyle="--", label="cloud base")
ax.legend(loc="lower left")
# ax.grid(True)

fig.suptitle(
    f"Cloud {cloud_id} at {cluster.time.dt.date.astype(str).values[0]} - Random sample of SD\nTop: Whole CLEO output.   Bottom: SDs which show decrease in $r$  "
)

Text(0.5, 0.98, 'Cloud 18 at 2020-01-26 - Random sample of SD\nTop: Whole CLEO output.   Bottom: SDs which show decrease in $r$  ')