In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import awkward as ak
from typing import List, Tuple, Dict

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
from sdm_eurec4a.visulization import set_custom_rcParams
from sdm_eurec4a.reductions import mean_and_stderror_of_mean

from pySD.sdmout_src import sdtracing
from pySD.sdmout_src import supersdata
from pySD.sdmout_src import pygbxsdat, pysetuptxt, supersdata

set_custom_rcParams()

from sdm_eurec4a import RepositoryPath

RP = RepositoryPath("levante")

In [None]:
# microphysics = "null_microphysics"
# microphysics = "condensation"
# microphysics = "collision_condensation"
microphysics = "coalbure_condensation_large"
# microphysics = "coalbure_condensation_small"

fig_dir = RP.repo_dir / "results/notebooks/paper/mass_conservation" / microphysics
fig_dir.mkdir(exist_ok=True, parents=True)

In [None]:
data_dir = Path(f"/home/m/m301096/CLEO/data/output_v4.0/{microphysics}/cluster_384")
cloud_id = int(data_dir.name.split("_")[1])

# output_dir = data_dir / "processed"
# output_dir.mkdir(exist_ok=True, parents=False)

# output_path = output_dir / "eulerian_dataset.nc"
# output_path.parent.mkdir(exist_ok=True)

setupfile_path = data_dir / "config" / "eurec4a1d_setup.txt"
statsfile_path = data_dir / "config" / "eurec4a1d_stats.txt"
zarr_path = data_dir / "eurec4a1d_sol.zarr"
gridfile_path = data_dir / "share/eurec4a1d_ddimlessGBxboundaries.dat"


# read in constants and intial setup from setup .txt file
config = pysetuptxt.get_config(str(setupfile_path), nattrs=3, isprint=False)
consts = pysetuptxt.get_consts(str(setupfile_path), isprint=False)
gridbox_dict = pygbxsdat.get_gridboxes(str(gridfile_path), consts["COORD0"], isprint=False)

ds_zarr = xr.open_zarr(zarr_path, consolidated=False)
ds_zarr = ds_zarr.rename({"gbxindex": "gridbox"})
ds_zarr["time"] = np.round(ds_zarr["time"], 1)


ds_eulerian = xr.open_dataset(data_dir / "processed/eulerian_dataset.nc")
ds_eulerian["time"] = np.round(ds_eulerian["time"], 1)
# ds_eulerian['radius_bins'] = ds_eulerian['radius_bins'].where(ds_eulerian['radius_bins'] > 0, 1e-3)
ds_eulerian

Reading binary file:
 /home/m/m301096/CLEO/data/output_v4.0/coalbure_condensation_large/cluster_384/share/eurec4a1d_ddimlessGBxboundaries.dat


## Attempt to understand conservation of mass in the setup

This is a bit like in a reservoir.

We have:
- $\mathit{I}$   Inflow of SDs 
- $\mathit{O}$   Outflow of SDs
- $\mathit{R}$   Reservoir of SDs within the domain 


For gridboxes $g \in \left[0, G\right]$
For time $t \in \left[0, T\right]$
For time $i \in \left[0, S\right]$


- The inflow at time t is given by the first time instance of a Droplet, in the top most SUBCLOUD gridbox. This should be the second timestep of existence.
- The outflow is given by the last timestep of existance.
- The resevoir are all OTHER values at this timestep for ALL gridboxes.

The differnce should be exactly in line with CLEOs output of ``masscondensed`` $dM$

$I = R + O + dM$

We need to use the represented mass for each SDs

In [None]:
dataset = supersdata.SupersDataNew(
    dataset=ds_zarr,
    consts=consts,
)
dataset.set_attribute(dataset["sdId"].attribute_to_indexer_unique())
dataset.index_by_indexer(dataset["sdId"])

---- Superdrop Properties -----
RHO_L = 998.203 Kg/m^3
RHO_SOL = 2016.5 Kg/m^3
MR_SOL = 0.05844277 Kg/mol
IONIC = 2.0
-------------------------------


Attribute coord1 not found in dataset
Attribute coord2 not found in dataset


In [None]:
def create_init_exit_reservoir_data(dataset: supersdata.SupersDataNew) -> Tuple[
    supersdata.SupersDataSimple,
    supersdata.SupersDataSimple,
    supersdata.SupersDataSimple,
]:

    data = dataset["time"].data
    mask = ak.num(data, axis=-1) > 1

    dataset_init = supersdata.SupersDataSimple([])
    dataset_exit = supersdata.SupersDataSimple([])
    dataset_reservoir = supersdata.SupersDataSimple([])

    for key, attribute in dataset.attributes.items():
        data = attribute.data
        data = data[mask]
        init_data = data[:, 1]
        exit_data = data[:, -1]
        reservoir_data = data[:, 1:-1]
        reservoir_data = ak.flatten(reservoir_data, axis=-1)

        dataset_init.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=init_data, units=attribute.units, metadata=attribute.metadata
            )
        )
        dataset_exit.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=exit_data, units=attribute.units, metadata=attribute.metadata
            )
        )
        dataset_reservoir.set_attribute(
            supersdata.SupersAttribute(
                name=key, data=reservoir_data, units=attribute.units, metadata=attribute.metadata
            )
        )

    return dataset_init, dataset_exit, dataset_reservoir


dataset_init, dataset_exit, dataset_reservoir = create_init_exit_reservoir_data(dataset)

In [None]:
dataset_init.set_attribute(dataset_init["time"].attribute_to_indexer_unique())
dataset_init.index_by_indexer(dataset_init["time"])

dataset_exit.set_attribute(dataset_exit["time"].attribute_to_indexer_unique())
dataset_exit.index_by_indexer(dataset_exit["time"])

dataset_reservoir.set_attribute(dataset_reservoir["time"].attribute_to_indexer_unique())
dataset_reservoir.set_attribute(dataset_reservoir["sdgbxindex"].attribute_to_indexer_unique())
dataset_reservoir.index_by_indexer(dataset_reservoir["time"])
dataset_reservoir.index_by_indexer(dataset_reservoir["sdgbxindex"])

We can only use the timesteps up to the last time!!!!

In [None]:
da_init = dataset_init.attribute_to_DataArray_reduction("mass_represented", reduction_func=ak.sum)
da_exit = dataset_exit.attribute_to_DataArray_reduction("mass_represented", reduction_func=ak.sum)
da_reservoir = dataset_reservoir.attribute_to_DataArray_reduction(
    "mass_represented", reduction_func=ak.sum
)


da_init = da_init.isel(time=slice(0, -1))
da_exit = da_exit.isel(time=slice(0, -1))
da_reservoir = da_reservoir.isel(time=slice(0, -1))

In [None]:
da_dM = ds_eulerian["massdelta_condensation"].isel(time=slice(0, -1)).isel(gridbox=slice(0, -1))
# make sure to have kg per gridbox
da_dM = da_dM * ds_eulerian["gridbox_volume"]
# make sure to have per timestep NOT per second
da_dM = da_dM * ds_eulerian["time"].diff("time").mean()
# correct the Vorzeichen
da_dM = -da_dM
# rename gridbox to sdgbxindex
da_dM = da_dM.rename({"gridbox": "sdgbxindex"})

da_dM

In [None]:
da_init.rolling(time=10).mean().plot(label="I smoothed\n(inflow mass in topmost gridbox)")
da_exit.rolling(time=10).mean().plot(label="O smoothed\n(outflow mass in topmost gridbox)")
plt.legend(loc="center right")

plt.savefig(fig_dir / "mass_conservation_inflow_outflow.png", dpi=300)

plt.figure()
(da_init - da_exit).plot(label="$I_{cs} - O_{cs}$")

da_reservoir.sum("sdgbxindex").diff("time").plot(label="R reservoir")
plt.legend(loc="upper right")
plt.savefig(fig_dir / "mass_conservation_reservoir.png", dpi=300)

plt.figure()
(da_init - da_exit - da_reservoir.sum("sdgbxindex").diff("time")).plot(label="$I_{cs} - O_{cs} - R$")

da_dM.sum("sdgbxindex").plot(label="dM")

plt.legend(loc="upper left")

plt.savefig(fig_dir / "mass_conservation_residual_dM.png", dpi=300)
plt.figure()
(da_init - da_exit - da_reservoir.sum("sdgbxindex").diff("time") - da_dM.sum("sdgbxindex")).plot(
    label="$I_{cs} - O_{cs} - R - dM$", color="k"
)
plt.legend(loc="upper right")

plt.savefig(fig_dir / "mass_conservation_residual.png", dpi=300)

In [None]:
fig, ax = plt.subplots()


data = 100 * da_exit / da_init

m, sem = mean_and_stderror_of_mean(data.sel(time=slice(1000, 1300)), dims="time")
s = data.std("time")
ax.plot(data["time"], data)

ax.axhline(100, color="k", linestyle="--")
ax.axhline(m, color="b", linestyle="-")

ax.fill_between(data["time"], m - sem, m + sem, color="b", alpha=0.3)
ax.fill_between(data["time"], m - s, m + s, color="r", alpha=0.3)
ax.set_xlim(0, 3500)

(0.0, 3500.0)

In [None]:
da_init.rolling(time=10).mean().plot(label="I smoothed\n(inflow mass in topmost gridbox)")
da_exit.rolling(time=10).mean().plot(label="O smoothed\n(outflow mass in topmost gridbox)")
plt.legend(loc="center right")

plt.savefig(fig_dir / "cumsum_mass_conservation_inflow_outflow_smoothed.png", dpi=300)

plt.figure()
da_init.cumsum("time").plot(label="$I_{cs}$ (cumulative sum over time)")
da_exit.cumsum("time").plot(label="$O_{cs}$ (cumulative sum over time)")
plt.legend(loc="center right")
plt.savefig(fig_dir / "cumsum_mass_conservation_inflow_outflow.png", dpi=300)

plt.figure()
(da_init.cumsum("time") - da_exit.cumsum("time")).plot(label="$I_{cs} - O_{cs}$")

da_reservoir.sum("sdgbxindex").plot(label="R (reservoir)")
plt.legend(loc="center right")
plt.savefig(fig_dir / "cumsum_mass_conservation_reservoir.png", dpi=300)

plt.figure()
(da_init.cumsum("time") - da_exit.cumsum("time") - da_reservoir.sum("sdgbxindex")).plot(
    label="$I_{cs} - O_{cs} - R$"
)

da_dM.sum("sdgbxindex").cumsum("time").plot(label="dM")

plt.legend(loc="center left")

plt.savefig(fig_dir / "cumsum_mass_conservation_residual_dM.png", dpi=300)
plt.figure()
(
    da_init.cumsum("time")
    - da_exit.cumsum("time")
    - da_reservoir.sum("sdgbxindex")
    - da_dM.sum("sdgbxindex").cumsum("time")
).plot(label="$I_{cs} - O_{cs} - R - dM$", color="k")
plt.legend(loc="center right")
plt.savefig(fig_dir / "cumsum_mass_conservation_residual.png", dpi=300)