In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import awkward as ak
from typing import List, Tuple, Dict
import numpy.typing as npt

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
from sdm_eurec4a.visulization import set_custom_rcParams, plot_one_one, label_from_attrs
from sdm_eurec4a.reductions import mean_and_stderror_of_mean

from pySD.sdmout_src import sdtracing
from pySD.sdmout_src import supersdata
from pySD.sdmout_src import pygbxsdat, pysetuptxt, supersdata

default_colors = set_custom_rcParams()

from sdm_eurec4a import RepositoryPath  #

RP = RepositoryPath("levante")

In [None]:
# microphysics = "null_microphysics"
microphysics = "condensation"
microphysics = "collision_condensation"
# microphysics = "coalbure_condensation_large"
# microphysics = "coalbure_condensation_small"

fig_dir = RP.repo_dir / "results/notebooks/paper/mass_conservation" / microphysics
fig_dir.mkdir(exist_ok=True, parents=True)

In [None]:
data_dir = Path(f"/home/m/m301096/CLEO/data/output_v4.0/{microphysics}/cluster_384")
# data_dir = Path(f"/home/m/m301096/CLEO/data/output_v4.0/{microphysics}/cluster_81")
cloud_id = int(data_dir.name.split("_")[1])

# output_dir = data_dir / "processed"
# output_dir.mkdir(exist_ok=True, parents=False)

# output_path = output_dir / "eulerian_dataset.nc"
# output_path.parent.mkdir(exist_ok=True)

setupfile_path = data_dir / "config" / "eurec4a1d_setup.txt"
statsfile_path = data_dir / "config" / "eurec4a1d_stats.txt"
zarr_path = data_dir / "eurec4a1d_sol.zarr"
gridfile_path = data_dir / "share/eurec4a1d_ddimlessGBxboundaries.dat"


# read in constants and intial setup from setup .txt file
config = pysetuptxt.get_config(str(setupfile_path), nattrs=3, isprint=False)
consts = pysetuptxt.get_consts(str(setupfile_path), isprint=False)
gridbox_dict = pygbxsdat.get_gridboxes(str(gridfile_path), consts["COORD0"], isprint=False)

ds_zarr = xr.open_zarr(zarr_path, consolidated=False)
ds_zarr = ds_zarr.rename({"gbxindex": "gridbox"})
ds_zarr["time"] = np.round(ds_zarr["time"], 1)


ds_eulerian = xr.open_dataset(data_dir / "processed/eulerian_dataset.nc")
ds_eulerian["time"] = np.round(ds_eulerian["time"], 1)
# ds_eulerian['radius_bins'] = ds_eulerian['radius_bins'].where(ds_eulerian['radius_bins'] > 0, 1e-3)
ds_eulerian

Reading binary file:
 /home/m/m301096/CLEO/data/output_v4.0/collision_condensation/cluster_384/share/eurec4a1d_ddimlessGBxboundaries.dat


## Attempt to understand conservation of mass in the setup

This is a bit like in a reservoir.

We have:
- $\mathit{I}$   Inflow of SDs 
- $\mathit{O}$   Outflow of SDs
- $\mathit{R}$   Reservoir of SDs within the domain 


For gridboxes $g \in \left[0, G\right]$
For time $t \in \left[0, T\right]$
For time $i \in \left[0, S\right]$


- The inflow at time t is given by the first time instance of a Droplet, in the top most SUBCLOUD gridbox. This should be the second timestep of existence.
- The outflow is given by the last timestep of existance.
- The resevoir are all OTHER values at this timestep for ALL gridboxes.

The differnce should be exactly in line with CLEOs output of ``masscondensed`` $dM$

$I = R + O + dM$

We need to use the represented mass for each SDs

In [None]:
dataset = supersdata.SupersDataNew(
    dataset=ds_zarr,
    consts=consts,
)
dataset.set_attribute(dataset["sdId"].attribute_to_indexer_unique())
dataset.index_by_indexer(dataset["sdId"])

---- Superdrop Properties -----
RHO_L = 998.203 Kg/m^3
RHO_SOL = 2016.5 Kg/m^3
MR_SOL = 0.05844277 Kg/mol
IONIC = 2.0
-------------------------------


Attribute coord1 not found in dataset
Attribute coord2 not found in dataset


In [None]:
id_slice = 3
time_slice = slice(0, 10)

timestep = np.arange(time_slice.start, time_slice.stop + 1)

time_data = dataset["time"].data[id_slice, time_slice].to_numpy()

lables = [f"t{j}= {i:.0f}s" for j, i in enumerate(time_data)]

fig, ax = plt.subplots(figsize=(3, 0.5))

ax: plt.Axes = ax

ax.scatter(time_data, time_data * 0, label="time")
for t in time_data:
    ax.annotate("", xy=(t + 2, 0), xytext=(t, 0), arrowprops=dict(arrowstyle="->"))
    ax.annotate("m", xy=(t, 1), fontsize=10, color=default_colors[0])

    # ax.annotate("", xy=(t + 2, 2), xytext=(t, 2),
    #         arrowprops=dict(arrowstyle="-", color = 'k'))

    # ax.annotate("e", xy=(t + 0.25, 2.5), fontsize=10, color = default_colors[1])
    # ax.annotate("e", xy=(t + 1.5, 2.5), fontsize=10, color = default_colors[2])


ax.set_yticks([])
ax.set_xticks(time_data, lables)
ax.set_ylim(-0.5, 3)
ax.set_xlim(-0.5, 6.2)

(-0.5, 6.2)

In [None]:
def create_init_exit_reservoir_data(dataset: supersdata.SupersDataNew) -> Tuple[
    supersdata.SupersDataSimple,
    supersdata.SupersDataSimple,
    supersdata.SupersDataSimple,
]:

    data = dataset["time"].data
    mask = ak.num(data, axis=-1) > 1

    dataset_inflow = supersdata.SupersDataSimple([])
    dataset_outflow = supersdata.SupersDataSimple([])
    dataset_reservoir = supersdata.SupersDataSimple([])

    for key, attribute in dataset.attributes.items():
        data = attribute.data
        data = data[mask]
        inflow_array = data[:, 1]
        outflow_array = data[:, -1]
        reservoir_data = data[:, 1:-1]
        reservoir_data = ak.flatten(reservoir_data, axis=-1)

        dataset_inflow.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=inflow_array, units=attribute.units, metadata=attribute.metadata
            )
        )
        dataset_outflow.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=outflow_array, units=attribute.units, metadata=attribute.metadata
            )
        )
        dataset_reservoir.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=reservoir_data, units=attribute.units, metadata=attribute.metadata
            )
        )

    return dataset_inflow, dataset_outflow, dataset_reservoir


dataset_inflow, dataset_outflow, dataset_reservoir = create_init_exit_reservoir_data(dataset)

In [None]:
dataset_inflow.set_attribute(dataset_inflow["time"].attribute_to_indexer_unique())
dataset_inflow.index_by_indexer(dataset_inflow["time"])

dataset_outflow.set_attribute(dataset_outflow["time"].attribute_to_indexer_unique())
dataset_outflow.index_by_indexer(dataset_outflow["time"])

dataset_reservoir.set_attribute(dataset_reservoir["time"].attribute_to_indexer_unique())
dataset_reservoir.set_attribute(dataset_reservoir["sdgbxindex"].attribute_to_indexer_unique())
dataset_reservoir.index_by_indexer(dataset_reservoir["time"])
dataset_reservoir.index_by_indexer(dataset_reservoir["sdgbxindex"])

We can only use the timesteps up to the last time!!!!

In [None]:
da_inflow = dataset_inflow.attribute_to_DataArray_reduction("mass_represented", reduction_func=ak.sum)
da_outflow = dataset_outflow.attribute_to_DataArray_reduction("mass_represented", reduction_func=ak.sum)
da_reservoir = dataset_reservoir.attribute_to_DataArray_reduction(
    "mass_represented", reduction_func=ak.sum
)

# outflow should be negative
da_outflow = -da_outflow
da_reservoir = da_reservoir

ds_box_model = xr.Dataset(
    {
        "inflow": da_inflow,
        "outflow": da_outflow,
        "reservoir": da_reservoir,
    }
)

# !!!!!!!!!!!!
# The data is now given in kg per timestep. We need to convert it to kg/s

ds_box_model = ds_box_model.rename({"sdgbxindex": "gridbox"})
ds_box_model = ds_box_model.fillna(0)
attrs = {key: ds_box_model[key].attrs.copy() for key in ds_box_model.data_vars}

ds_box_model = ds_box_model

ds_box_model["reservoir"] = ds_box_model["reservoir"].sum("gridbox")


for key in ds_box_model.data_vars:
    ds_box_model[key].attrs = attrs[key]

ds_box_model["inflow"].attrs["units"] = "kg dT^{-1}"
ds_box_model["outflow"].attrs["units"] = "kg dT^{-1}"
ds_box_model["reservoir"].attrs["units"] = "kg"

ds_box_model["reservoir"][0] = ds_box_model["inflow"][0]


# ds_box_model['inflow_integrate'] = ds_box_model['inflow'].cumsum('time', keep_attrs=True)
# ds_box_model['inflow_integrate'] = ds_box_model['inflow_integrate'].shift(time = 0)
# ds_box_model['inflow_integrate'].attrs['units'] = 'kg'

# ds_box_model['outflow_integrate'] = ds_box_model['outflow'].cumsum('time', keep_attrs=True)
# ds_box_model['outflow_integrate'] = ds_box_model['outflow_integrate'].shift(time = 0)
# ds_box_model['outflow_integrate'].attrs['units'] = 'kg'

ds_box_model["reservoir_differentiate"] = ds_box_model["reservoir"].diff("time")
ds_box_model["reservoir_differentiate"] = ds_box_model["reservoir_differentiate"].shift(time=0)
ds_box_model["reservoir_differentiate"].attrs["units"] = "kg dT^{-1}"

ds_box_model["reservoir_differentiate"][0] = ds_box_model["reservoir"][0] - 0


# add the source terms

da_source = ds_eulerian["massdelta_condensation"]
# make sure to have kg per gridbox
da_source = da_source * ds_eulerian["gridbox_volume"]
# make sure to have per timestep NOT per second
da_source = da_source * ds_eulerian["time"].diff("time").mean()
# only use sub cloud layer gridboxes
da_source = da_source.sel(gridbox=slice(0, ds_eulerian["gridbox"].max() - 1))


ds_box_model["source"] = da_source.isel(gridbox=slice(None, None)).sum("gridbox").shift(time=0)
ds_box_model["source"].attrs = dict(
    long_name="Source term",
    units="kg dT^{-1}",
)

# ds_box_model['source_integrate'] = ds_box_model['source'].cumsum('time', keep_attrs=True)
# ds_box_model['source_integrate'].attrs = dict(
#     long_name='Integrated source term',
#     units='kg',
# )

ds_box_model["sum"] = (
    -ds_box_model["reservoir_differentiate"]
    + ds_box_model["inflow"]
    + ds_box_model["outflow"]
    + ds_box_model["source"]
)
ds_box_model["sum_integrate"] = (
    -ds_box_model["reservoir"]
    + ds_box_model["inflow_integrate"]
    + ds_box_model["outflow_integrate"]
    + ds_box_model["source_integrate"]
)

ds_box_model = ds_box_model.sel(time=slice(0, 3000)).fillna(0)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 5), sharex=True)

axs = [None, ax]
ds_box_model["reservoir_differentiate"].plot(
    ax=axs[1], label="reservoir change", marker="X", alpha=0.7, markersize=5, linestyle="None"
)
(ds_box_model["inflow"]).plot(
    ax=axs[1], label="inflow", marker="^", alpha=0.7, markersize=5, linestyle="None"
)
(ds_box_model["outflow"]).plot(
    ax=axs[1], label="outflow", marker="v", alpha=0.7, markersize=5, linestyle="None"
)
(ds_box_model["source"]).plot(
    ax=axs[1], label="source", marker="P", alpha=0.7, markersize=5, linestyle="None"
)


(ds_box_model["sum"]).plot(label="SUM", alpha=0.7, markersize=5, ax=axs[1], color="k")
# (ds_box_model['inflow_integrate'] + ds_box_model['reservoir'] + ds_box_model['outflow_integrate']+ ds_box_model['source_integrate']).diff('time').plot(label = 'SUM reconstr', marker = '.', alpha = 0.7, markersize = 5, linestyle = 'None', ax = axs[1])
axs[1].legend()
# plt.axhline(0.00015, color = 'k', linestyle = '--')
# plt.axhline(-0.00015, color = 'k', linestyle = '--')
axs[1].set_yscale("symlog", linthresh=1e-10)

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(12, 6))
ds_box_model["reservoir_differentiate"].plot(ax=axs[0], label="reservoir")
ds_box_model["inflow"].plot(ax=axs[0], label="inflow")
ds_box_model["outflow"].plot(ax=axs[0], label="outflow")
ds_box_model["source"].plot(ax=axs[0], label="source")
ds_box_model["sum"].plot(ax=axs[0], label="sum", color="k", linestyle="-")

ds_box_model["reservoir"].plot(ax=axs[1], label="reservoir")
ds_box_model["inflow_integrate"].plot(ax=axs[1], label="inflow")
ds_box_model["outflow_integrate"].plot(ax=axs[1], label="outflow")
ds_box_model["source_integrate"].plot(ax=axs[1], label="source")
ds_box_model["sum_integrate"].plot(ax=axs[1], label="sum", color="k", linestyle="-")


for _ax in axs:
    _ax.set_xlabel(label_from_attrs(ds_box_model["time"]))
    _ax.set_ylabel(label_from_attrs(ds_box_model["source"]))
    _ax.legend()

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(12, 6))
(ds_box_model["sum"]).plot(
    ax=axs[0],
    label=r"$- \frac{dR}{dt} + I + O + S$",
    marker=".",
    alpha=0.7,
    markersize=5,
    linestyle="None",
)
(ds_box_model["sum_integrate"].diff("time")).plot(
    ax=axs[0], label="Differentiate", marker=".", alpha=0.7, markersize=5, linestyle="None"
)

(ds_box_model["sum"]).cumsum("time").plot(
    ax=axs[1], label="Integrate", marker=".", alpha=0.7, markersize=5, linestyle="None"
)
(ds_box_model["sum_integrate"]).plot(
    ax=axs[1],
    label=r"$-R + \int_{0}^{t}I + O + S dt$",
    marker=".",
    alpha=0.7,
    markersize=5,
    linestyle="None",
)

for _ax in axs:
    _ax.set_xlabel(label_from_attrs(ds_box_model["time"]))
    _ax.legend()
    # _ax.set_xlim(1000, 1100)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12, 8), sharex=True)

axs: Tuple[plt.Axes, plt.Axes, plt.Axes, plt.Axes] = axs.flatten()

axs[0].plot(
    ds_box_model["time"], ds_box_model["inflow_integrate"], label="I (inflow mass in topmost gridbox)"
)
axs[0].plot(
    ds_box_model["time"], ds_box_model["outflow_integrate"], label="O (outflow mass in lowest gridbox)"
)

axs[0].legend(loc="center right")

axs[1].plot(
    ds_box_model["time"],
    ds_box_model["inflow_integrate"] + ds_box_model["outflow_integrate"],
    label="$I - O$",
)

axs[1].legend(loc="center right")

data = ds_box_model["reservoir"]

axs[1].plot(data["time"], data, label="R reservoir")
axs[1].legend(loc="upper right")

data = ds_box_model["inflow_integrate"] + ds_box_model["outflow_integrate"] - ds_box_model["reservoir"]
axs[2].plot(data["time"], data, label=r"$I-O-\frac{dR}{dt}$")
axs[2].plot(
    ds_box_model["source_integrate"]["time"],
    ds_box_model["source_integrate"],
    label="E",
)

axs[2].legend(loc="upper left")

data = (
    ds_box_model["inflow_integrate"]
    + ds_box_model["outflow_integrate"]
    - ds_box_model["reservoir"]
    + ds_box_model["source_integrate"]
)
axs[3].plot(data["time"], data, label=r"$I-O-\frac{dR}{dt} - S$")
axs[3].legend(loc="upper left")

<matplotlib.legend.Legend at 0x7fff5fc65310>

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12, 8), sharex=True)

axs: Tuple[plt.Axes, plt.Axes, plt.Axes, plt.Axes] = axs.flatten()

axs[0].plot(ds_box_model["time"], ds_box_model["inflow"], label="I (inflow mass in topmost gridbox)")
axs[0].plot(ds_box_model["time"], ds_box_model["outflow"], label="O (outflow mass in lowest gridbox)")

axs[0].legend(loc="center right")

axs[1].plot(ds_box_model["time"], ds_box_model["inflow"] + ds_box_model["outflow"], label="$I + O$")

axs[1].legend(loc="center right")

data = ds_box_model["reservoir_differentiate"]

axs[1].plot(data["time"], data, label=r"$\frac{dR}{dt}$ reservoir")
axs[1].legend(loc="upper right")

data = ds_box_model["inflow"] + ds_box_model["outflow"] - ds_box_model["reservoir_differentiate"]
axs[2].plot(data["time"], data, label=r"$-\frac{dR}{dt} + I + O$")
axs[2].plot(
    ds_box_model["source"]["time"],
    ds_box_model["source"],
    label="S",
)

axs[2].legend(loc="upper left")

data = (
    ds_box_model["inflow"]
    + ds_box_model["outflow"]
    - ds_box_model["reservoir_differentiate"]
    + ds_box_model["source"]
)
axs[3].plot(data["time"], data, label=r"$-\frac{dR}{dt} + I + O + S$")
axs[3].legend(loc="upper left")

<matplotlib.legend.Legend at 0x7fffa11801d0>

In [None]:
data = ds_box_model["inflow"] + ds_box_model["outflow"] - ds_box_model["reservoir_differentiate"]
data = -data

y_init = data.sel(time=slice(1500, None))
x_init = ds_box_model["source"].sel(time=slice(1500, None))

y, yerr = mean_and_stderror_of_mean(y_init, dims="time")
x, xerr = mean_and_stderror_of_mean(x_init, dims="time")

fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.hist2d(
    x_init,
    y_init,
    bins=(
        np.linspace(-1.5, -0.5, 100),
        np.linspace(-1.5, -0.5, 100),
    ),
    cmap="Reds",
)
ax.errorbar(
    x=x,
    y=y,
    xerr=xerr,
    yerr=yerr,
    fmt="x",
    color="k",
    markersize=10,
    zorder=10,
)

plot_one_one(ax, linestyle="--")

In [None]:
np.abs(f) * np.sqrt((sigma_A / A) ** 2 + (sigma_B / B) ** 2)

In [None]:
A_init = -100 * (ds_box_model["outflow"]).sel(time=slice(1500, None))
B_init = ds_box_model["inflow"].sel(time=slice(1500, None))

A, sigma_A = mean_and_stderror_of_mean(A_init, dims="time")
B, sigma_B = mean_and_stderror_of_mean(B_init, dims="time")

sigma_AB = xr.cov(A_init, B_init, dim="time")

f = A / B
sigma_f = np.abs(f) * np.sqrt(
    (sigma_A / A) ** 2
    + (sigma_B / B) ** 2
    # - 2 * (sigma_AB / (A * B)) ** 2
)

data = A_init / B_init

fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=True, width_ratios=[3, 1])

axs: Tuple[plt.Axes, plt.Axes] = axs


axs[0].plot(data["time"], data, alpha=1, color="grey", zorder=-1)
# axs[0].axhline(100, color="k", linestyle="--")
axs[0].axhline(f, color="r", linestyle="-", label="mean")
axs[0].fill_between(
    data["time"],
    f - sigma_f,
    f + sigma_f,
    color="r",
    alpha=0.3,
    label="f $\\pm$ sigma_f" + f"\n{f:.2f} $\\pm$ {sigma_f:.2f} %",
)
m, s = data.mean("time"), data.std("time")
axs[0].fill_between(
    data["time"],
    m - s,
    m + s,
    color="b",
    alpha=0.1,
    label="mean $\\pm$ STD" + f"\n{m:.2f} $\\pm$ {s:.2f} %",
)
axs[0].set_xlim(0, 3500)
axs[0].legend()
axs[0].set_ylabel("Outflow / Inflow [%]")
axs[0].set_xlabel("Time [s]")

p, b, _ = axs[1].hist(
    data.sel(time=slice(0, None)),
    # bins=np.arange(0, 300, 10),
    density=True,
    alpha=0.5,
    color="grey",
    orientation="horizontal",
)
# axs[1].axhline(100, color="k", linestyle="--")

axs[1].fill_between(
    [0, 1.1 * np.max(p)],
    f - sigma_f,
    f + sigma_f,
    color="r",
    alpha=0.3,
    label="f $\\pm$ sigma_f" + f"\n{f:.2f} $\\pm$ {sigma_f:.2f} %",
)

axs[1].fill_between(
    [0, 1.1 * np.max(p)],
    m - s,
    m + s,
    color="b",
    alpha=0.1,
    label="mean $\\pm$ STD" + f"\n{m:.2f} $\\pm$ {s:.2f} %",
)

axs[1].set_xlabel("Density")
axs[1].legend()

for _ax in axs:
    _ax.axhline(m, color="b", linestyle="-", label="mean")
    _ax.axhline(f, color="r", linestyle="-", label="mean")

fig.suptitle("Outflow fraction (Outflow / Inflow)")
fig.tight_layout()
fig.savefig(fig_dir / "outflow_fraction.png", dpi=300)