In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import awkward as ak

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
from sdm_eurec4a.visulization import set_custom_rcParams


from pySD.sdmout_src import sdtracing
from pySD.sdmout_src import supersdata
from pySD.sdmout_src import pygbxsdat, pysetuptxt, supersdata

set_custom_rcParams()
strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

In [None]:
microphysics = "condensation"
microphysics = "collision_condensation"
microphysics = "coalbure_condensation_large"
# microphysics = "coalbure_condensation_small"

In [None]:
data_dir = Path(f"/home/m/m301096/CLEO/data/output_v4.0/{microphysics}/cluster_384")
cloud_id = int(data_dir.name.split("_")[1])

# output_dir = data_dir / "processed"
# output_dir.mkdir(exist_ok=True, parents=False)

# output_path = output_dir / "eulerian_dataset.nc"
# output_path.parent.mkdir(exist_ok=True)

setupfile_path = data_dir / "config" / "eurec4a1d_setup.txt"
statsfile_path = data_dir / "config" / "eurec4a1d_stats.txt"
zarr_path = data_dir / "eurec4a1d_sol.zarr"
gridfile_path = data_dir / "share/eurec4a1d_ddimlessGBxboundaries.dat"


# read in constants and intial setup from setup .txt file
config = pysetuptxt.get_config(str(setupfile_path), nattrs=3, isprint=False)
consts = pysetuptxt.get_consts(str(setupfile_path), isprint=False)
gridbox_dict = pygbxsdat.get_gridboxes(str(gridfile_path), consts["COORD0"], isprint=False)

ds_zarr = xr.open_zarr(zarr_path, consolidated=False)
ds_zarr = ds_zarr.rename({"gbxindex": "gridbox"})
ds_zarr["time"] = np.round(ds_zarr["time"], 1)
ds_zarr = ds_zarr.compute()


ds_eulerian = xr.open_dataset(data_dir / "processed/eulerian_dataset.nc")
ds_eulerian["time"] = np.round(ds_eulerian["time"], 1)
ds_eulerian["radius_bins"] = ds_eulerian["radius_bins"].where(ds_eulerian["radius_bins"] > 0, 1e-3)

Reading binary file:
 /home/m/m301096/CLEO/data/output_v4.0/coalbure_condensation_large/cluster_384/share/eurec4a1d_ddimlessGBxboundaries.dat


In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(6, 3), sharey=True)
ds_eulerian["relative_humidity"].plot(y="gridbox", ax=axs[0])
ds_eulerian["air_temperature"].plot(y="gridbox", ax=axs[1])

[<matplotlib.lines.Line2D at 0x7fff5b389be0>]

In [None]:
ds_eulerian["massdelta_condensation"].sel(time=slice(2000, 3000)).mean("time").plot(y="gridbox")

[<matplotlib.lines.Line2D at 0x7fff5aff1820>]

In [None]:
stationary_phase = slice(1500, 3500)

fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

data = 1 / (100**2 * 20) * ds_zarr["massmom1"].sel(gridbox=slice(0, 37))

data.plot(ax=axs[0], x="time", y="gridbox", cmap="plasma_r")
data.sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="gridbox")
# data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

axs[0].axvline(stationary_phase.start, color="k")
axs[0].axvline(stationary_phase.stop, color="k")

axs[0].set_title("Temporal evolution with stationary phase in black")
axs[1].set_title("Mean over stationary phase")

fig.tight_layout()

In [None]:
stationary_phase = slice(1500, 3500)

gridboxes = [30, 25, 20, 15]

for gbx in gridboxes:
    fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

    data = (ds_eulerian["xi"] / ds_eulerian["gridbox_volume"]).sel(gridbox=gbx)

    # data = data.rolling(time = 30, center = True).mean()

    data.plot(ax=axs[0], x="time", y="radius_bins", cmap=strength_cmap, vmin=0)
    data.sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="radius_bins")
    # data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

    axs[0].set_yscale("log")
    axs[0].set_ylim(50, None)

    axs[0].axvline(stationary_phase.start, color="k")
    axs[0].axvline(stationary_phase.stop, color="k")

    axs[0].set_title("Temporal evolution with stationary phase in black")
    axs[1].set_title("Mean over stationary phase")
    fig.suptitle(f"Number of superdroplets within gridbox {gbx}")
    fig.tight_layout()

In [None]:
stationary_phase = slice(1500, 3500)

number_slices = (
    slice(0, np.inf),
    slice(1000, 3000),
    slice(400, 1000),
    slice(200, 400),
    slice(105, 200),
    slice(75, 105),
    slice(5, 75),
    slice(0, 5),
)
for nslice in number_slices:
    fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

    data = ds_eulerian["gridbox_volume"].sel(gridbox=0) * (
        ds_eulerian["number_superdroplets"] / ds_eulerian["gridbox_volume"]
    ).sel(gridbox=slice(0, 37)).sel(radius_bins=nslice).sum("radius_bins")

    data.plot(ax=axs[0], x="time", y="gridbox", cmap=strength_cmap)
    data.sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="gridbox")
    # data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

    axs[0].axvline(stationary_phase.start, color="k")
    axs[0].axvline(stationary_phase.stop, color="k")

    axs[0].set_title("Temporal evolution with stationary phase in black")
    axs[1].set_title("Mean over stationary phase")
    fig.suptitle(f"Number of superdroplets with radius {nslice.start} and {nslice.stop} µm")
    fig.tight_layout()

In [None]:
stationary_phase = slice(1500, 3500)

gridboxes = [30, 25, 20, 15]

for gbx in gridboxes:
    fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

    data = ds_eulerian["number_superdroplets"].sel(gridbox=gbx)

    data.plot(ax=axs[0], x="time", y="radius_bins", cmap=strength_cmap)
    data.sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="radius_bins")
    # data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

    axs[0].set_yscale("log")
    axs[0].set_ylim(50, None)

    axs[0].axvline(stationary_phase.start, color="k")
    axs[0].axvline(stationary_phase.stop, color="k")

    axs[0].set_title("Temporal evolution with stationary phase in black")
    axs[1].set_title("Mean over stationary phase")
    fig.suptitle(f"Number of superdroplets within gridbox {gbx}")
    fig.tight_layout()

In [None]:
data

In [None]:
stationary_phase = slice(1500, 3500)

time_slices = (
    slice(0, 50),
    slice(50, 200),
    slice(200, 1000),
    slice(1000, 2000),
    slice(2000, 3000),
    slice(3000, 3500),
)

for tslice in time_slices:
    fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

    data = ds_eulerian["gridbox_volume"].sel(gridbox=0) * (
        ds_eulerian["number_superdroplets"] / ds_eulerian["gridbox_volume"]
    ).sel(time=tslice).sel(radius_bins=slice(1, None)).mean("time")

    data.plot(ax=axs[0], x="radius_bins", y="gridbox", cmap=strength_cmap)
    data.sum("radius_bins").plot(ax=axs[1], y="gridbox")
    # data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

    axs[0].set_xscale("log")
    axs[0].set_xlim(1, None)
    axs[0].axvline(stationary_phase.start, color="k")
    axs[0].axvline(stationary_phase.stop, color="k")

    axs[0].set_title("Temporal evolution with stationary phase in black")
    axs[1].set_title("Mean over stationary phase")

    fig.suptitle(f"Time slice {tslice.start} to {tslice.stop} s")

    fig.tight_layout()

In [None]:
strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)
stationary_phase = slice(1500, 3500)

time_slices = (
    slice(0, 50),
    slice(50, 200),
    slice(200, 1000),
    slice(1000, 2000),
    slice(2000, 3000),
)

for tslice in time_slices:
    fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])

    data = 1e3 * (ds_eulerian["mass_represented"] / ds_eulerian["gridbox_volume"]).sel(time=tslice).sel(
        radius_bins=slice(1, None)
    ).mean("time")

    data.plot(ax=axs[0], x="radius_bins", y="gridbox", cmap=strength_cmap)
    data.sum("radius_bins").plot(ax=axs[1], y="gridbox")
    # data.sel(time = stationary_phase).median("time").plot(ax = axs[1], y="gridbox")

    axs[0].set_xscale("log")
    axs[0].set_xlim(1, None)
    axs[0].axvline(stationary_phase.start, color="k")
    axs[0].axvline(stationary_phase.stop, color="k")

    axs[0].set_title("Temporal evolution with stationary phase in black")
    axs[1].set_title("Mean over stationary phase")

    fig.suptitle(f"Time slice {tslice.start} to {tslice.stop} s")

    fig.tight_layout()

### Index by Superdroplet ID


Analyse the 

In [None]:

# Use the SupersDataNew class to read the dataset
dataset = supersdata.SupersDataNew(dataset=ds_zarr, consts=consts)

%time dataset.set_attribute(dataset["sdId"].attribute_to_indexer_unique())
%time dataset.index_by_indexer(dataset["sdId"])
# print(dataset)

---- Superdrop Properties -----
RHO_L = 998.203 Kg/m^3
RHO_SOL = 2016.5 Kg/m^3
MR_SOL = 0.05844277 Kg/mol
IONIC = 2.0
-------------------------------
Attribute coord1 not found in dataset
Attribute coord2 not found in dataset
CPU times: user 8.15 s, sys: 210 ms, total: 8.36 s
Wall time: 8.38 s
CPU times: user 8.66 s, sys: 1.15 s, total: 9.81 s
Wall time: 9.85 s


In [None]:
number_above_zero = (ds_zarr["xi"] > 0).sum().values
number_equal_zero = (ds_zarr["xi"] == 0).sum().values
number_below_zero = (ds_zarr["xi"] < 0).sum().values

print(f"Number of xi above zero: {number_above_zero}")
print(f"Number of xi equal to zero: {number_equal_zero}")
print(f"Number of xi below zero: {number_below_zero}")

Number of xi above zero: 34073370
Number of xi equal to zero: 0
Number of xi below zero: 0


In [None]:
# number of droplets to plot
N = 5

data = dataset["sdId"].data

max_sdid = ak.max(data)
# minimum number of timesteps a droplet has to exist
min_existance_timesteps = 35
mask = ak.num(data) >= min_existance_timesteps

np.random.seed(45)
selected_sdids = np.random.randint(0, sdtracing.get_awkward_shape(data[mask])[0], N)
selected_sdids

array([137630, 137084,  60960,  81853,  49315])

In [None]:
N = len(selected_sdids)
x_data = dataset["time"].data[mask][selected_sdids]
y_data = dataset["mass"].data[mask][selected_sdids]
color_data = dataset["xi"].data[mask][selected_sdids]

x_data = x_data - x_data[:, 0]
y_data = y_data
norm = mcolors.LogNorm(ak.min(color_data), ak.max(color_data))
cmap = plt.cm.plasma.resampled(20)


for i in range(N):
    plt.plot(
        x_data[i],
        y_data[i],
        alpha=0.5,
        color="grey",
        linewidth=0.5,
    )
    plt.scatter(
        x_data[i],
        y_data[i],
        c=color_data[i],
        s=2,
        cmap=cmap,
        norm=norm,
        alpha=1,
        marker=".",
    )
plt.scatter([], [], c=[], cmap=cmap, norm=norm)
plt.colorbar(label="Multiplicity $\\xi$")
plt.xlabel("Time since spawning in s")
plt.ylabel("Droplet mass in kg")
plt.yscale("log")

# plt.xlim(0, 1000)
# plt.ylim(0, 1000)

plt.title(f"Cloud {cloud_id} Temporal evolution of {N} random droplets")

Text(0.5, 1.0, 'Cloud 384 Temporal evolution of 5 random droplets')

In [None]:
N = len(selected_sdids)
x_data = dataset["time"].data[mask][selected_sdids]
y_data = dataset["xi"].data[mask][selected_sdids]
color_data = dataset["mass"].data[mask][selected_sdids]

x_data = x_data - x_data[:, 0]
y_data = y_data
norm = mcolors.LogNorm(ak.min(color_data), ak.max(color_data))
cmap = plt.cm.plasma.resampled(20)


for i in range(N):
    plt.plot(
        x_data[i],
        y_data[i],
        alpha=0.5,
        color="grey",
        linewidth=0.5,
    )
    plt.scatter(
        x_data[i],
        y_data[i],
        c=color_data[i],
        s=2,
        cmap=cmap,
        norm=norm,
        alpha=1,
        marker=".",
    )
plt.scatter([], [], c=[], cmap=cmap, norm=norm)
plt.colorbar(label="Mass in kg")
plt.xlabel("Time since spawning in s")
plt.ylabel("Multiplicity $\\xi$")

plt.yscale("log")

# plt.xlim(0, 1000)
# plt.ylim(0, 1000)

plt.title(f"Cloud {cloud_id} Temporal evolution of {N} random droplets")

Text(0.5, 1.0, 'Cloud 384 Temporal evolution of 5 random droplets')

In [None]:
dataset["radius"].data[mask][selected_sdids]

In [None]:
N = len(selected_sdids)
max_timesteps = 100
x_data = dataset["radius"].data[mask][selected_sdids][:, :max_timesteps]
y_data = dataset["xi"].data[mask][selected_sdids][:, :max_timesteps]
color_data = dataset["time"].data[mask][selected_sdids][:, :max_timesteps]
color_data2 = dataset["sdId"].data[mask][selected_sdids][:, :max_timesteps]

color_data = color_data - color_data[:, 0]
norm = mcolors.Normalize(ak.min(color_data), ak.max(color_data))
time_colors = plt.cm.plasma

id_colors = plt.cm.Set1.resampled(N)(np.linspace(0, 1, N))

fig, axs = plt.subplots(ncols=2, figsize=(15, 4), sharey=True, sharex=True)

for i in range(N):
    axs[0].plot(
        x_data[i],
        y_data[i],
        color=id_colors[i],
        alpha=0.5,
        # color = "grey",
        linewidth=1,
        markersize=1,
        marker=".",
        label=f"sdId: {selected_sdids[i]}",
    )
    axs[1].plot(
        x_data[i],
        y_data[i],
        color=id_colors[i],
        alpha=0.5,
        # color = "grey",
        linewidth=1,
        markersize=1,
        marker=".",
        label=f"sdId: {selected_sdids[i]}",
    )

    axs[1].scatter(
        x_data[i],
        y_data[i],
        c=color_data[i],
        s=2,
        cmap=time_colors,
        norm=norm,
        zorder=10,
    )

fig.colorbar(
    plt.cm.ScalarMappable(norm=norm, cmap=time_colors), ax=axs[1], label="Time since spawning in s"
)
for _ax in axs:
    _ax.set_xlabel("Radius in µm")
    _ax.set_ylabel("Multiplication factor xi")
    _ax.set_xscale("log")
    _ax.set_yscale("log")
axs[0].legend(loc="lower left")
fig.suptitle(f"Cloud {cloud_id} Temporal evolution of {N} random droplets")

Text(0.5, 0.98, 'Cloud 384 Temporal evolution of 5 random droplets')

### Time dataset

In [None]:
# Use the SupersDataNew class to read the dataset
dataset_time = supersdata.SupersDataNew(dataset=ds_zarr, consts=consts)

dataset_time.set_attribute(dataset_time["time"].attribute_to_indexer_unique())
dataset_time.set_attribute(dataset_time["sdgbxindex"].attribute_to_indexer_unique())

dataset_time.index_by_indexer(dataset_time["time"])
dataset_time.index_by_indexer(dataset_time["sdgbxindex"])

---- Superdrop Properties -----
RHO_L = 998.203 Kg/m^3
RHO_SOL = 2016.5 Kg/m^3
MR_SOL = 0.05844277 Kg/mol
IONIC = 2.0
-------------------------------
Attribute coord1 not found in dataset
Attribute coord2 not found in dataset


In [None]:
# extract the awkward array from the dataset
x_data = 1e3 * dataset_time["mass"].data[1200]
y_data = dataset_time["xi"].data[1200]
c_data = 1e3 * dataset_time["mass_represented"].data[1200]

In [None]:
N = 35
norm = mcolors.Normalize(ak.min(c_data[:N]), ak.max(c_data[:N]))
gridboxes = [34, 30, 27, 23, 20, 10]
colors = plt.cm.plasma.resampled(len(gridboxes))(np.linspace(0, 1, len(gridboxes)))

fig, axs = plt.subplots(1, 4, figsize=(12, 4), width_ratios=[3, 0.5, 0.5, 0.5])

for i, c in zip(gridboxes, colors):
    axs[0].scatter(x_data[i], y_data[i], s=1, color=c, alpha=1, marker=".", label=f"Gridbox {i}")
    for j, d in enumerate([x_data, y_data, c_data]):
        axs[j + 1].scatter(
            0,
            ak.sum(d[i]),
            color=c,
        )
        axs[j + 1].set_xticks([])
axs[0].legend(loc="lower left", fontsize=8)
axs[0].set_yscale("log")
axs[0].set_xscale("log")

axs[0].set_xlim(1e-7, None)

axs[0].set_xlabel("Individual droplet mass in g")
axs[0].set_ylabel(r"Multiplicity $\xi$")
axs[1].set_ylabel(r"$\sum m$ per GBX [g]")
axs[2].set_ylabel(r"$\sum \xi$ per GBX")
axs[3].set_ylabel(r"$\sum m_{rep}$ per GBX [g]")

fig.suptitle(f"Distribution of SDs mass and ")
fig.tight_layout()

In [None]:
da_mass_represented = dataset_time.attribute_to_DataArray("mass_represented")
da_mass = dataset_time.attribute_to_DataArray("mass")
da_xi = dataset_time.attribute_to_DataArray("xi")

In [None]:
stationary_phase = slice(1500, 3500)


fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])
data = da_xi.mean("ragged_dimension_2") / (100**2 * 20)
label = r"Mean $\xi$ $\left[m^{-3} \right]$"

data.sel(sdgbxindex=slice(0, 37)).plot(
    ax=axs[0], x="time", y="sdgbxindex", cmap="plasma_r", cbar_kwargs=dict(label=label)
)
data.sel(sdgbxindex=slice(0, 37)).sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="sdgbxindex")
# data.sel(sdgbxindex = slice(0, 37)).sel(time = stationary_phase).median("time").plot(ax = axs[1], y="sdgbxindex")
axs[0].axvline(stationary_phase.start, color="k")
axs[0].axvline(stationary_phase.stop, color="k")
axs[0].set_title("Temporal evolution with stationary phase in black")
axs[1].set_title("Mean over stationary phase")
axs[1].set_xlabel(label)
fig.tight_layout()


fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])
data = 1e3 * da_mass.mean("ragged_dimension_2").sel(time=slice(500, None)) / (100**2 * 20)
label = r"Mean ind. droplet mass $\left[ g m^{-3} \right]$"

data.sel(sdgbxindex=slice(0, 37)).plot(
    ax=axs[0], x="time", y="sdgbxindex", cmap="plasma_r", cbar_kwargs=dict(label=label)
)
data.sel(sdgbxindex=slice(0, 37)).sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="sdgbxindex")
# data.sel(sdgbxindex = slice(0, 37)).sel(time = stationary_phase).median("time").plot(ax = axs[1], y="sdgbxindex")
axs[0].axvline(stationary_phase.start, color="k")
axs[0].axvline(stationary_phase.stop, color="k")
axs[0].set_title("Temporal evolution with stationary phase in black")
axs[1].set_title("Mean over stationary phase")
axs[1].set_xlabel(label)
fig.tight_layout()


fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True, width_ratios=[1, 0.5])
data = 1e3 * da_mass_represented.sum("ragged_dimension_2") / (100**2 * 20)
label = r"Total mass $\left[ g m^{-3} \right]$"

data.sel(sdgbxindex=slice(0, 37)).plot(
    ax=axs[0], x="time", y="sdgbxindex", cmap="plasma_r", cbar_kwargs=dict(label=label)
)
data.sel(sdgbxindex=slice(0, 37)).sel(time=stationary_phase).mean("time").plot(ax=axs[1], y="sdgbxindex")
# data.sel(sdgbxindex = slice(0, 37)).sel(time = stationary_phase).median("time").plot(ax = axs[1], y="sdgbxindex")
axs[0].axvline(stationary_phase.start, color="k")
axs[0].axvline(stationary_phase.stop, color="k")
axs[0].set_title("Temporal evolution with stationary phase in black")
axs[1].set_title("Mean over stationary phase")
axs[1].set_xlabel(label)
fig.tight_layout()


# plt.figure()
# (1e3 * da_mass_represented.sum("ragged_dimension_2") / (100 **2 * 20)).sel(sdgbxindex = slice(0, 37)).T.plot(cmap = "plasma_r_r", cbar_kwargs = dict(label = r"Total mass $\left[ g m^{-3} \right]$"))

### Time,  and gridbox 

lets select only superdroplet which exist in the 30 to 20 gridbox and understand their behaviour there

In [None]:
# Use the SupersDataNew class to read the dataset
dataset_gridbox = supersdata.SupersDataNew(dataset=ds_zarr, consts=consts)

dataset_gridbox.set_attribute(dataset_gridbox["sdgbxindex"].attribute_to_indexer_unique())
dataset_gridbox.set_attribute(dataset_gridbox["sdId"].attribute_to_indexer_unique())

dataset_gridbox.index_by_indexer(dataset_gridbox["sdId"])
dataset_gridbox.index_by_indexer(dataset_gridbox["sdgbxindex"])

dataset_gridbox

---- Superdrop Properties -----
RHO_L = 998.203 Kg/m^3
RHO_SOL = 2016.5 Kg/m^3
MR_SOL = 0.05844277 Kg/mol
IONIC = 2.0
-------------------------------
Attribute coord1 not found in dataset
Attribute coord2 not found in dataset


<pySD.sdmout_src.supersdata.SupersDataNew at 0x7fff5b00a1e0>

In [None]:
gridbox_slice = slice(20, 30)

mask = ak.sum(ak.num(dataset_gridbox["mass"].data[:, gridbox_slice, :], axis=-1), axis=1) > 10

attrs_list = []

units_dict = {
    "mass": "kg",
    "xi": "",
    "radius": "µm",
    "time": "s",
    "sdId": "",
    "sdgbxindex": "",
}

for variable in ["mass", "xi", "radius", "time", "sdId", "sdgbxindex"]:
    data = ak.flatten(dataset_gridbox[variable].data[mask][:, gridbox_slice, :], axis=None)
    attrs_list.append(
        supersdata.SupersAttribute(
            name=variable,
            data=data,
            units=units_dict[variable],
        )
    )

dataset_gridbox_subset = supersdata.SupersDataSimple(
    attributes=attrs_list,
)

dataset_gridbox_subset.set_attribute(dataset_gridbox_subset["sdId"].attribute_to_indexer_unique())
dataset_gridbox_subset.index_by_indexer(dataset_gridbox_subset["sdId"])

print(dataset_gridbox_subset)

Attributes:
--------------
mass (kg)
139913 * var * float64
xi ()
139913 * var * float64
radius (µm)
139913 * var * float64
time (s)
139913 * var * float64
sdId ()
coord: [3, 52, 101, 168, 278, 303, ..., 1819629, 1819634, 1819979, 1820345, 1822268]
139913 * var * uint32
139913 * var * int64
sdgbxindex ()
139913 * var * uint32

Indexes:
--------------
sdId
[3, 52, 101, 168, 278, 303, ..., 1819629, 1819634, 1819979, 1820345, 1822268]



In [None]:
da_mass = dataset_gridbox_subset.attribute_to_DataArray("mass")
da_xi = dataset_gridbox_subset.attribute_to_DataArray("xi")
da_radius = dataset_gridbox_subset.attribute_to_DataArray("radius")
da_time = dataset_gridbox_subset.attribute_to_DataArray("time")
da_time = np.round(da_time - da_time.min("ragged_dimension_1"))
da_sdId = dataset_gridbox_subset.attribute_to_DataArray("sdId")
da_gridbox = dataset_gridbox_subset.attribute_to_DataArray("sdgbxindex")

In [None]:
# select some random sdIds
# N = 5
N = 4
np.random.seed(42)
selected_sdIds = np.random.choice(da_sdId["sdId"], N, replace=False)
selected_sdIds = np.random.choice(
    da_sdId["sdId"].where(da_radius.max("ragged_dimension_1") > 500, drop=True), N, replace=False
)
selected_sdIds

array([ 852598.,  373045., 1614861., 1374685.])

In [None]:
for i in selected_sdIds:
    plt.plot(
        da_mass.sel(sdId=i).sortby(da_time.sel(sdId=i)),
        da_xi.sel(sdId=i).sortby(da_time.sel(sdId=i)),
        alpha=0.5,
        color="grey",
        linewidth=0.5,
    )
plt.scatter(
    da_mass.sel(sdId=selected_sdIds),
    da_xi.sel(sdId=selected_sdIds),
    c=da_gridbox.sel(sdId=selected_sdIds),
    s=20,
    cmap=sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True).resampled(9),
    alpha=1,
    marker=".",
)
plt.colorbar(label="Gridbox")

plt.xlabel("Mass in kg")
plt.ylabel("Multiplicity $\\xi$")
plt.yscale("log")
plt.xscale("log")

In [None]:
for i in selected_sdIds:
    plt.plot(
        da_radius.sel(sdId=i).sortby(da_time.sel(sdId=i)),
        da_xi.sel(sdId=i).sortby(da_time.sel(sdId=i)),
        alpha=0.5,
        color="grey",
        linewidth=0.5,
    )
plt.scatter(
    da_radius.sel(sdId=selected_sdIds),
    da_xi.sel(sdId=selected_sdIds),
    c=da_gridbox.sel(sdId=selected_sdIds),
    s=20,
    cmap=sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True).resampled(9),
    alpha=1,
    marker=".",
)
plt.colorbar(label="Gridbox")

plt.xlabel("Radius in µm")
plt.ylabel("Multiplicity $\\xi$")
plt.yscale("log")
plt.xscale("log")

# plt.ylim(3e7, None)

### Pseudo Lagrangian view to understand origin of droplets

now we want to make sure to create an init radius

In [None]:
dataset_pseudo = supersdata.SupersDataNew(dataset=ds_zarr, consts=consts)
dataset_pseudo.set_attribute(dataset_pseudo["sdId"].attribute_to_indexer_unique())
dataset_pseudo.set_attribute(dataset_pseudo["sdgbxindex"].attribute_to_indexer_unique())
dataset_pseudo.index_by_indexer(dataset_pseudo["sdId"])
dataset_pseudo.index_by_indexer(dataset_pseudo["sdgbxindex"])

print(dataset_pseudo)

In [None]:
radius = dataset_pseudo["radius"].data
radius_select = radius
radius_init = ak.mean(radius_select, axis=2)[:, 20]
radius_init = (
    ak.fill_none(ak.nan_to_none(radius_init), 1e-3)[:, np.newaxis, np.newaxis] + radius_select * 0
)
dataset_pseudo.set_attribute(
    supersdata.SupersAttribute(name="radius_init", data=radius_init, units="micro-m")
)

In [None]:
radius_bins = np.geomspace(10, 4e3, 151)

dataset_pseudo.flatten()
print(dataset_pseudo)
dataset_pseudo.set_attribute(dataset_pseudo["time"].attribute_to_indexer_unique())
dataset_pseudo.set_attribute(dataset_pseudo["sdgbxindex"].attribute_to_indexer_unique())
dataset_pseudo.set_attribute(
    dataset_pseudo["radius_init"].attribute_to_indexer_binned(bins=radius_bins, new_name="radius_bins")
)

dataset_pseudo.index_by_indexer(dataset_pseudo["time"])
dataset_pseudo.index_by_indexer(dataset_pseudo["sdgbxindex"])
dataset_pseudo.index_by_indexer(dataset_pseudo["radius_bins"])

print(dataset_pseudo)

In [None]:
da_mass = dataset_pseudo.attribute_to_DataArray_reduction(
    attribute_name="mass_represented",
    reduction_func=ak.sum,
)
da_mass = da_mass.rename({"sdgbxindex": "gridbox"})
da_mass["radius_bins"] = da_mass["radius_bins"].where(da_mass["radius_bins"] > 0, 1e-3)

In [None]:
data = 1e3 * da_mass / ds_eulerian["gridbox_volume"]
data.plot()

(array([1.0111676e+07, 8.8600000e+02, 1.2990000e+03, 1.7400000e+02,
        1.9800000e+02, 1.3500000e+02, 1.3000000e+01, 1.2000000e+01,
        1.5000000e+01, 8.0000000e+00]),
 array([0.        , 0.52142436, 1.04284872, 1.56427309, 2.08569745,
        2.60712181, 3.12854617, 3.64997054, 4.1713949 , 4.69281926,
        5.21424362]),
 <BarContainer object of 10 artists>)

In [None]:
plt.pcolormesh(
    data["radius_bins"],
    data["gridbox"],
    data.sel(time=slice(2000, None)).mean("time"),
    cmap="Reds",
    vmax=data.quantile(0.99),
)
plt.xscale("log")
plt.xlim(1, 1e3)
plt.colorbar()
# plt.figure()
# plt.plot(
#     data.sum("radius_bins").mean("time"),
#     ds_eulerian["gridbox"],
#     color = "k",
#     linewidth = 0.5,
#     alpha = 0.5,
# )

<matplotlib.colorbar.Colorbar at 0x7fff537d8800>