In [None]:
from pathlib import Path
import textwrap
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as patches
import matplotlib.collections as mcollections

import xarray as xr
from typing import Tuple, Literal, Union

import seaborn as sns

from sdm_eurec4a.visulization import (
    set_paper_rcParams,
    adjust_lightness_array,
    adjust_lightness,
    label_from_attrs,
    add_additional_axis,
    add_subplotlabel,
    save_figure,
)
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a import data_loading
from sdm_eurec4a.constants import TimeSlices
from sdm_eurec4a import conversions

from sdm_eurec4a.reductions import mean_and_stderror_of_mean


default_colors = set_paper_rcParams()
# make sure that figures are not cut off
# pltrcParams.update({'figure.autolayout': True})

default_dark_colors = adjust_lightness_array(default_colors, 0.75)

RepoPaths = RepositoryPath("levante")

data_dir = RepoPaths.CLEO_data_dir / Path("output_v4.4-CLEO_v0.39.7-input_v4.2")
data_dir_v43 = RepoPaths.CLEO_data_dir / Path("output_v4.3-CLEO_v0.39.7-input_v4.2")
data_dir_v42 = RepoPaths.CLEO_data_dir / Path("output_v4.2")
data_dir_v41 = RepoPaths.CLEO_data_dir / Path("output_v4.1")
data_dir_v40 = RepoPaths.CLEO_data_dir / Path("output_v4.0")

fig_dir = RepoPaths.fig_dir / Path("paper-v4.4-clara")
fig_dir.mkdir(exist_ok=True, parents=False)
appendix_fig_dir = fig_dir / "appendix"
appendix_fig_dir.mkdir(exist_ok=True, parents=False)
more_fig_dir = fig_dir / "more"
more_fig_dir.mkdir(exist_ok=True, parents=False)


def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=256):
    """https://stackoverflow.com/a/18926541/16372843"""
    new_cmap = mcolors.LinearSegmentedColormap.from_list(
        "trunc({n},{a:.2f},{b:.2f})".format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)),
    )
    return new_cmap


# strength_cmap = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)
full_strength_cmap = sns.color_palette("mako_r", as_cmap=True)
strength_cmap = truncate_colormap(full_strength_cmap, 0.05, 1)
strength_cmap

In [None]:
small_fig_size = np.array((16 / 3, 9 / 3))
square_fig_size = small_fig_size[0], small_fig_size[0]
large_figure_multiplicator = 12 / 8.3
large_fig_size = small_fig_size * large_figure_multiplicator
large_square_fig_size = large_fig_size[0], large_fig_size[0]
wide_fig_size = large_fig_size[0], small_fig_size[1]

# Data prepocessing

In [None]:
ds_eulerian = xr.open_dataset(
    data_loading.__eulerian_data_path__(data_dir=data_dir, microphysic="coalbure_condensation_small")
)
ds_eulerian = ds_eulerian.sel(time=TimeSlices.full_state)
ds_eulerian = data_loading.__post_process_eulerian_dataset__(ds=ds_eulerian)

ds_conservation = xr.open_dataset(
    data_loading.__conservation_data_path__(data_dir=data_dir, microphysic="coalbure_condensation_small")
)
ds_conservation = ds_conservation.sel(time=TimeSlices.full_state)
ds_conservation = data_loading.__post_process_conservation_dataset__(
    ds=ds_conservation,
    da_surface_area=ds_eulerian["surface_area"].mean("gridbox"),
    timestep=ds_conservation["time"].diff("time").mean().values,
)

In [None]:
microphysics_styles = data_loading.MicrophysicsStyles()

In [None]:
def propagate_mean_sem(data, data_std, dim: str):

    N = len(data[dim])

    # Inter-model spread (std of model means)
    inter_model_spread = data.std(dim=dim, ddof=1) / N**0.5

    # Individual model uncertainty propagation
    individual_model_error = (data_std**2).sum(dim) ** 0.5 / N

    # Total propagated SEM
    total_sem = (inter_model_spread**2 + individual_model_error**2) ** 0.5

    return total_sem


def propagate_mean_std(data, data_std, dim: str):

    N = len(data[dim])

    # Inter-model spread (std of model means)
    inter_model_spread = data.std(dim=dim, ddof=1)

    # Individual model uncertainty propagation
    individual_model_error = (data_std**2).sum(dim) ** 0.5 / N

    # Total propagated SEM
    total_sem = (inter_model_spread**2 + individual_model_error**2) ** 0.5

    return total_sem

In [None]:
def scatter_and_errorbar(
    ax: plt.Axes,
    x_var: str,
    y_var: str,
    ds: xr.Dataset,
    ds_sem: xr.Dataset,
    microphysics: Literal[
        "null_microphysics",
        "condensation",
        "collision_condensation",
        "coalbure_condensation_small",
        "coalbure_condensation_large",
    ] = "condensation",
    x_multiply: float = 1.0,
    y_multiply: float = 1.0,
    plot_patch: bool = True,
    plot_annotations: bool = True,
    patch_width: float = 1,
    scatter_kwargs: Union[dict, None] = None,
    error_kwargs: dict = dict(fmt="", label="mean ± SEM", color="black", capsize=5, linewidth=2),
    annotation_kwargs: dict = dict(
        fontsize=12,
        color="black",
    ),
) -> Tuple[dict, dict]:

    x = x_multiply * ds[x_var].sel(microphysics=microphysics)
    x_sem = x_multiply * ds_sem[x_var].sel(microphysics=microphysics)
    # x_sem = x * 0
    x_mean = x.mean("cloud_id")
    x_std = propagate_mean_std(x, x_sem, dim="cloud_id")

    y = y_multiply * ds[y_var].sel(microphysics=microphysics)
    y_sem = y_multiply * ds_sem[y_var].sel(microphysics=microphysics)
    y_mean = y.mean("cloud_id")
    y_std = propagate_mean_std(y, y_sem, dim="cloud_id")

    if scatter_kwargs == None:
        scatter_kwargs = microphysics_styles.get_style(key=microphysics)
    else:
        pass

    pathcollection = ax.scatter(x, y, **scatter_kwargs)

    error_container = ax.errorbar(
        x=x_mean,
        y=y_mean,
        xerr=x_std,
        yerr=y_std,
        **error_kwargs,
    )

    # for (x, y), label in zip(
    #     (, (x_mean, y_mean + 4 * y_std)),
    #     (fr"{x_mean.data:.2f}$\pm${x_std.data:.2f}", fr"{y_mean.data:.2f}$\pm${y_std.data:.2f}"),
    # ) :

    increase = max(patch_width, 1)
    offset = 1.1  # offset by 10 % to the right and top

    x_xy = ((x_mean.data + x_std.data), y_mean.data)
    x_xytext = (offset * (x_mean.data + increase * x_std.data), y_mean.data)
    x_label = rf"{x_mean.data:.2f}$\pm${x_std.data:.2f}"

    y_xy = (x_mean.data, (y_mean.data + y_std.data))
    y_xytext = (x_mean.data, offset * (y_mean.data + increase * y_std.data))
    y_label = rf"{y_mean.data:.2f}$\pm${y_std.data:.2f}"

    if plot_annotations:

        x_annotation = ax.annotate(
            x_label,
            xy=x_xy,
            xytext=x_xytext,
            ha="left",
            va="center",
            **annotation_kwargs,
        )

        y_annotation = ax.annotate(
            y_label,
            xy=y_xy,
            xytext=y_xytext,
            ha="center",
            va="bottom",
            rotation=90,
            **annotation_kwargs,
        )
    else:
        y_annotation = None
        x_annotation = None

    # Create a Rectangle patch

    if plot_patch:
        if patch_width > 1.0:
            wide_error_kwargs = error_kwargs.copy()
            wide_error_kwargs.update(alpha=0.1)
            error_container_wide = ax.errorbar(
                x=x_mean,
                y=y_mean,
                xerr=patch_width * x_std,
                yerr=patch_width * y_std,
                **wide_error_kwargs,
            )

        xy = x_mean.data - patch_width * x_std.data, y_mean.data - patch_width * y_std.data
        dx = patch_width * 2 * x_std.data
        dy = patch_width * 2 * y_std.data

        rect = patches.Rectangle(xy, dx, dy, linewidth=1, edgecolor="None", facecolor="k", alpha=0.1)

        ax.add_patch(rect)
    else:
        rect = None

    return (
        dict(
            x_mean=x_mean,
            y_mean=y_mean,
            x_std=x_std,
            y_std=y_std,
            x_label=x_label,
            y_label=y_label,
        ),
        dict(
            pathcollection=pathcollection,
            error_container=error_container,
            rect=rect,
            y_annotation=y_annotation,
            x_annotation=x_annotation,
        ),
    )

### Load the cleo output data

- Data in physical gridbox coordinates
- Data normalized by cloud base height.
- Data without ventilation coefficient

In [None]:
cleo_dataset = data_loading.CleoDataset(
    data_dir=data_dir,
    microphysics=tuple(microphysics_styles),
)
# get physicsal height cleo output data
ds, ds_sem = cleo_dataset()
cleo_dataset.normalize_gridboxes()
# get normalized height cleo output data
ds_normalized, ds_normalized_sem = cleo_dataset()


# get non ventilated cleo output data
cleo_dataset_no_ventilation = data_loading.CleoDataset(
    data_dir=data_dir_v41,
    microphysics=tuple(microphysics_styles),
)
ds_no_ventilation, ds_sem_no_ventilation = cleo_dataset_no_ventilation()
# cleo_dataset.normalize_gridboxes()
# ds_normalized, ds_normalized_sem = cleo_dataset()

# valid_cleo_monitor_dataset = data_loading.CleoDataset(
#     data_dir= data_dir_v40,
#     microphysics=tuple(microphysics_styles),
# )
# # get physicsal height cleo output data
# ds_valid_cleo_monitor, ds_valid_cleo_monitor_sem = valid_cleo_monitor_dataset()

condensation


collision_condensation
coalbure_condensation_small
coalbure_condensation_large
condensation
collision_condensation
coalbure_condensation_small
coalbure_condensation_large


## Integrity of the combined dataset

there seems to be an issue, that the combined dataset can contain different values compared to the individual datasets.

Therefore, we identify the cloud ids, which have different values.
We omit these!

In [None]:
atol = 1e-10
invalid_combined_dataset_ids = set()
error_combined_dataset_ids = set()

for mp in microphysics_styles:
    print(mp)
    for cloud_id in tqdm(ds["cloud_id"]):
        cloud_id = int(cloud_id.data)
        p = data_dir / Path(f"{mp}/cluster_{cloud_id}/processed/conservation_dataset.nc")

        if p.is_file():
            ds_single = xr.open_dataset(p).sel(time=TimeSlices.quasi_stationary_state)
            inflow_diff = np.abs(
                ds_single["inflow"].mean("time").data
                - ds["inflow"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
            )
            outflow_diff = np.abs(
                ds_single["outflow"].mean("time").data
                - ds["outflow"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
            )
            source_diff = np.abs(
                ds_single["source"].mean("time").data
                - ds["source"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
            )

            if inflow_diff > atol or outflow_diff > atol or source_diff > atol:
                invalid_combined_dataset_ids.add(cloud_id)

        else:
            error_combined_dataset_ids.add(cloud_id)

print(
    f"The following clouds have invalid data {invalid_combined_dataset_ids.union(error_combined_dataset_ids)}"
)

condensation


  7%|▋         | 9/126 [00:00<00:06, 18.19it/s]

100%|██████████| 126/126 [00:07<00:00, 17.77it/s]


collision_condensation


100%|██████████| 126/126 [00:05<00:00, 21.09it/s]


coalbure_condensation_small


100%|██████████| 126/126 [00:05<00:00, 22.19it/s]


coalbure_condensation_large


100%|██████████| 126/126 [00:05<00:00, 23.62it/s]

The following clouds have invalid data {89, 411}





Invalid for none ventilation dataset 

In [None]:
# atol = 1e-10
# invalid_combined_dataset_ids_no_ventilation = set()
# error_combined_dataset_ids_no_ventilation = set()

# for mp in microphysics_styles:
#     for cloud_id in ds_no_ventilation["cloud_id"]:
#         cloud_id = int(cloud_id.data)
#         p = Path(
#             f"/home/m/m301096/CLEO/data/output_v4.1/{mp}/cluster_{cloud_id}/processed/conservation_dataset.nc"
#         )

#         if p.is_file():
#             ds_single = xr.open_dataset(p).sel(time=TimeSlices.quasi_stationary_state)
#             inflow_diff = np.abs(
#                 ds_single["inflow"].mean("time").data
#                 - ds_no_ventilation["inflow"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
#             )
#             outflow_diff = np.abs(
#                 ds_single["outflow"].mean("time").data
#                 - ds_no_ventilation["outflow"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
#             )
#             source_diff = np.abs(
#                 ds_single["source"].mean("time").data
#                 - ds_no_ventilation["source"].sel(microphysics=mp).sel(cloud_id=cloud_id).data
#             )

#             if inflow_diff > atol or outflow_diff > atol or source_diff > atol:
#                 invalid_combined_dataset_ids_no_ventilation.add(cloud_id)

#         else:
#             error_combined_dataset_ids_no_ventilation.add(cloud_id)

# print(
#     f"The following clouds have invalid data {invalid_combined_dataset_ids_no_ventilation.union(error_combined_dataset_ids_no_ventilation)}"
# )

Option to get more information about these clouds.More detailed analysis of the clouds with invalid data from concatenation or missing simulations

In [None]:
# for cloud_id in invalid_combined_dataset_ids:
#     print('-------------')
#     print(cloud_id)


#     print(
#         'MP'.ljust(28),
#         'I-conc'.ljust(10),
#         'I-true'.ljust(10),
#         'I-DIFF'.ljust(10),
#         '|',
#         'O-conc'.ljust(10),
#         'O-true'.ljust(10),
#         'O-DIFF'.ljust(10),
#         '|',
#         'S-conc'.ljust(10),
#         'S-true'.ljust(10),
#         'S-DIFF'.ljust(10),
#     )

#     for mp in microphysics_styles :
#         p = Path(f'/home/m/m301096/CLEO/data/output_v4.2/{mp}/cluster_{cloud_id}/processed/conservation_dataset.nc')

#         if p.is_file():
#             ds_single = xr.open_dataset(p).sel(time = TimeSlices.quasi_stationary_state)

#             print(
#                 str(mp).ljust(28),
#                 f'{ds['inflow'].sel(microphysics = mp).sel(cloud_id = cloud_id ).data:.2e}'.ljust(10),
#                 f'{ds_single['inflow'].mean('time').data:.2e}'.ljust(10),
#                 f'{ds_single['inflow'].mean('time').data - ds['inflow'].sel(microphysics = mp).sel(cloud_id = cloud_id ).data:.2e}'.ljust(10),
#                 '|',
#                 f'{ds['outflow'].sel(microphysics = mp).sel(cloud_id = cloud_id).data:.2e}'.ljust(10),
#                 f'{ds_single['outflow'].mean('time').data:.2e}'.ljust(10),
#                 f'{ds_single['outflow'].mean('time').data - ds['outflow'].sel(microphysics = mp).sel(cloud_id = cloud_id ).data:.2e}'.ljust(10),
#                 '|',
#                 f'{ds['source'].sel(microphysics = mp).sel(cloud_id = cloud_id ).data:.2e}'.ljust(10),
#                 f'{ds_single['source'].mean('time').data:.2e}'.ljust(10),
#                 f'{ds_single['source'].mean('time').data - ds['source'].sel(microphysics = mp).sel(cloud_id = cloud_id ).data:.2e}'.ljust(10),
#                 # f'{ds_eulerian['massdelta_condensation'].sel(cloud_id = cloud_id).sum('gridbox').mean('time').data:.2e}'.ljust(10),
#                 )
#         else :
#             pass

## Integrity of conservation datasets

We have seen, that the conservation dataset seem to show some errors when it comes to the total value of $A = I+O+S-\frac{dR}{dt}$.

Due to the error in the lower sampling resolution of the condensation monitor, we get $|A| > 0$.
In the following, we will analyse the magnitude of this error. 

We can analyse this in detail below and find, that this is the case for some clouds.
We state, that the error should not exceed 10% compared to any of the conservation variables
- inflow
- outflow
- source

So it needs to be $A/V < 10 \% \,\, \forall \,\, V $ in $\set{I,O,S}$

#### Compare all datasets visually

Use only the temporal mean from the CLEO output dataset itself.

In [None]:
# fig, axs = plt.subplots(nrows = 4, figsize = (8,6))


# for plot_dataset, label in zip(
#     [ds_valid_cleo_monitor, ds_no_ventilation, ds],
#     ['good monitor', 'non ventilated', 'ventilated'],
# ):
#     total = (
#         plot_dataset["inflow"] +
#         plot_dataset["outflow"] +
#         plot_dataset["source"] +
#         - plot_dataset["reservoir_change"]
#     )

#     error = {}
#     for key in ['inflow', 'outflow', 'source'] :
#         error[key] = total / plot_dataset[key] * 100
#         error[key].attrs.update(plot_dataset[key].attrs)
#         error[key].attrs.update(units = r'\%', description = f"Relative error of {key} per gridbox per cloud")
#         error[key] = error[key].expand_dims(which = [key])

#     da_error = xr.concat(
#         error.values(),
#         dim = 'which',
#     )
#     da_maximum_error = np.abs(da_error).max(dim = 'which').expand_dims(which = ['maximum'])
#     error['maximum'] = da_maximum_error

#     da_error = xr.concat(
#         error.values(),
#         dim = 'which',
#     )


#     for _ax, key in zip(axs, da_error['which'].data):
#         _ax.plot(
#             da_error.sel(which = key)['cloud_id'].astype(str),
#             np.abs(da_error.sel(which = key)).max('microphysics'),
#             label = label,
#             linestyle = '-',
#             marker = '+',
#         )
#         # _ax.plot(
#         #     da_error.sel(which = key)['cloud_id'].astype(str),
#         #     np.abs(da_error.sel(which = key)).sel(microphysics = 'condensation'),
#         #     label = label,
#         #     linestyle = ':',
#         #     marker = 'x',
#         # )
#         _ax.tick_params(axis='x', rotation=90)# .sel(which = 'maximum').max('microphysics')
#         _ax.set_title(key)
#         _ax.legend()
#         _ax.set_ylim(0, 10)

# # fig, axs = plt.subplots(
# #     nrows = 4,
# #     ncols = 1,
# #     sharey=True,
# #     figsize=(10, 10))

# # for _ax, key in zip(axs, error):
# #     data = error[key]
# #     pcm = _ax.pcolormesh(
# #         data['cloud_id'].astype(str),
# #         [microphysics_styles[mp]['name'] for mp in data['microphysics'].data],
# #         data,
# #         cmap = strength_cmap,
# #     )
# #     fig.colorbar(pcm , ax = _ax, label = label_from_attrs(data, linebreak=True))

# # for _ax in axs :
# #     _ax.set_xticks([])
# fig.tight_layout()

Calculate the error for all timesteps and AFTERWARDS calculate the temporal mean.

In [None]:
# fig, axs = plt.subplots(nrows = 4, figsize = (8, 6))


# for plot_dataset, label, conservation_data_dir in zip(
#     [ds_valid_cleo_monitor, ds_no_ventilation, ds],
#     ['good monitor', 'non ventilated', 'ventilated'],
#     [data_dir_v40, data_dir_v41, data_dir],
# ):
#     conservation_list = []
#     for mp in microphysics_styles :
#         _ds = xr.open_dataset(
#             data_loading.__conservation_data_path__(data_dir=conservation_data_dir, microphysic=mp)
#         )
#         conservation_list.append(_ds.expand_dims(microphysics = [mp]))

#     select_ds_conservation = xr.concat(
#         conservation_list,
#         dim = 'microphysics',
#     )

#     total = (
#         select_ds_conservation["inflow"] +
#         select_ds_conservation["outflow"] +
#         select_ds_conservation["source"]
#         - select_ds_conservation["reservoir_change"]
#     )
#     total = total.sel(time = TimeSlices.quasi_stationary_state).mean('time')

#     error = {}
#     for key in ['inflow', 'outflow', 'source'] :
#         error[key] = total / plot_dataset[key] * 100
#         error[key].attrs.update(plot_dataset[key].attrs)
#         error[key].attrs.update(units = r'\%', description = f"Relative error of {key} per gridbox per cloud")
#         error[key] = error[key].expand_dims(which = [key])

#     da_error = xr.concat(
#         error.values(),
#         dim = 'which',
#     )
#     da_maximum_error = np.abs(da_error).max(dim = 'which').expand_dims(which = ['maximum'])
#     error['maximum'] = da_maximum_error

#     da_error = xr.concat(
#         error.values(),
#         dim = 'which',
#     )

#     for _ax, key in zip(axs, da_error['which'].data):
#         _ax.plot(
#             da_error.sel(which = key)['cloud_id'].astype(str),
#             np.abs(da_error.sel(which = key)).max('microphysics'),
#             label = label,
#             linestyle = '-',
#             marker = '+',
#         )
#         # _ax.plot(
#         #     da_error.sel(which = key)['cloud_id'].astype(str),
#         #     np.abs(da_error.sel(which = key)).sel(microphysics = 'condensation'),
#         #     label = label,
#         #     linestyle = ':',
#         #     marker = 'x',
#         # )
#         _ax.tick_params(axis='x', rotation=90)# .sel(which = 'maximum').max('microphysics')
#         _ax.set_title(key)
#         _ax.set_ylim(0, 10)
#         _ax.legend()

# # fig, axs = plt.subplots(
# #     nrows = 4,
# #     ncols = 1,
# #     sharey=True,
# #     figsize=(10, 10))

# # for _ax, key in zip(axs, error):
# #     data = error[key]
# #     pcm = _ax.pcolormesh(
# #         data['cloud_id'].astype(str),
# #         [microphysics_styles[mp]['name'] for mp in data['microphysics'].data],
# #         data,
# #         cmap = strength_cmap,
# #     )
# #     fig.colorbar(pcm , ax = _ax, label = label_from_attrs(data, linebreak=True))

# # for _ax in axs :
# #     _ax.set_xticks([])
# fig.tight_layout()

#### Select only the ventilation cloud_ids

In [None]:
## Omit the outflow variable, because it can be very weak for strong evapoation.
## So it is sufficient to compare it to the inflow and source variables.
# relative_to_variables = ["inflow", "source"]
relative_to_variables = ["inflow", "outflow", "source"]
error_microphysics = (
    "null_microphysics",
    "condensation",
    "collision_condensation",
    "coalbure_condensation_small",
    "coalbure_condensation_large",
)

conservation_data_dir = data_dir
cleo_dataset_error = data_loading.CleoDataset(
    data_dir=data_dir,
    microphysics=error_microphysics,
)
# get physicsal height cleo output data
ds_error, ds_error_sem = cleo_dataset_error()


conservation_list = []
for mp in error_microphysics:
    _ds = xr.open_dataset(
        data_loading.__conservation_data_path__(data_dir=conservation_data_dir, microphysic=mp)
    )
    conservation_list.append(_ds.expand_dims(microphysics=[mp]))

select_ds_conservation = xr.concat(
    conservation_list,
    dim="microphysics",
)

total = (
    select_ds_conservation["inflow"]
    + select_ds_conservation["outflow"]
    + select_ds_conservation["source"]
    - select_ds_conservation["reservoir_change"]
)
total = total.sel(time=TimeSlices.quasi_stationary_state).mean("time")

error = {}
for key in ["inflow", "outflow", "source"]:
    e = total / ds_error[key] * 100
    e = e.where(np.isfinite(e), np.nan)
    error[key] = e
    error[key].attrs.update(ds_error[key].attrs)
    error[key].attrs.update(units=r"\%", description=f"Relative error of {key} per gridbox per cloud")
    error[key] = error[key].expand_dims(which=[key])

da_error = xr.concat(
    error.values(),
    dim="which",
)
da_maximum_error = (
    np.abs(da_error)
    .sel(which=relative_to_variables)
    .max(dim="which", skipna=True)
    .expand_dims(which=["maximum"])
)
error["maximum"] = da_maximum_error

da_error = xr.concat(
    error.values(),
    dim="which",
)

# where is the error of the conversation larger than 10% relative to any of the inflow, outflow, source
invalid_derivate_mass_conservation_ids = set(
    da_error["cloud_id"]
    .where(da_error.sel(which="maximum").max("microphysics") >= 10, drop=True)
    .data.astype(int)
    .tolist()
)

print(f"The following clouds have invalid conservation of mass {invalid_derivate_mass_conservation_ids}")

# plt.figure()
# plt.pcolormesh(
#     da_error['cloud_id'].astype(str),
#     da_error['which'].astype(str),
#     da_error.sel(microphysics="null_microphysics"), vmax = 10, vmin = 0, cmap = strength_cmap,
#     shading = 'auto',
#     )
# plt.tick_params(axis = 'x', rotation = 90)
# plt.colorbar()

# plt.figure()
# plt.pcolormesh(
#     da_error['cloud_id'].astype(str),
#     da_error['which'].astype(str),
#     da_error.sel(microphysics="condensation"), vmax = 10, vmin = 0, cmap = strength_cmap,
#     shading = 'auto',
#     )
# plt.tick_params(axis = 'x', rotation = 90)
# plt.colorbar()

null_microphysics
condensation
collision_condensation
coalbure_condensation_small
coalbure_condensation_large
The following clouds have invalid conservation of mass {549, 239, 83, 86, 88, 569, 250}


In [None]:
microphysics_styles.available_setups

('condensation',
 'collision_condensation',
 'coalbure_condensation_small',
 'coalbure_condensation_large')

# Remove outliers

We omit the following clouds:
- Cloud base precipitation above the set value (see in code)
- Where the combined data is not the same as the individual datasets
- Where the conservation dataset has a relative error above 10%
- Where the evaporation exceeds 2 mm/h

## Maximum cloud base precipitation

We exclude clouds with precipitation which exceed the inter-cloud mean by more than 4 standard deviations. 

In [None]:
data = ds_error["inflow_precipitation"].sel(microphysics="condensation")
data_sem = ds_error_sem["inflow_precipitation"].sel(microphysics="condensation")
m = data.mean("cloud_id").data
s = propagate_mean_std(data, data_sem, dim="cloud_id")

print(f"mean: {m:.2f}, std: {s:.2f} mm/h")
print(f"mean + 4 std: {m + 4 * s:.2f} mm/h")
invalid_cloud_base_precipitation_ids = set(
    [int(_d) for _d in ds["cloud_id"].where(data > m + 4 * s, drop=True).data]
)
invalid_cloud_base_precipitation_ids

mean: 1.73, std: 5.36 mm/h
mean + 4 std: 23.18 mm/h


{384}

## Maximum column integrated evaporation

We exclude clouds with column integrated evaporation, which exceed the inter-cloud mean by more than 4 standard deviations.

In [None]:
data = -ds_error["source_precipitation"].sel(microphysics="condensation")
data_sem = ds_error_sem["source_precipitation"].sel(microphysics="condensation")
m = data.mean("cloud_id").data
s = propagate_mean_std(data, data_sem, dim="cloud_id")

print(f"mean: {m:.2f}, std: {s:.2f} mm/h")
print(f"mean + 4 std: {m + 4 * s:.2f} mm/h")
invalid_column_integrated_evaporation_ids = set(
    [int(_d) for _d in ds["cloud_id"].where(data > m + 4 * s, drop=True).data]
)
invalid_column_integrated_evaporation_ids

mean: 0.27, std: 0.53 mm/h
mean + 4 std: 2.37 mm/h


{150, 384}

## Combine and visualize outliers

In [None]:
all_cloud_ids = set(ds["cloud_id"].data.astype(int).tolist())

invalid_data_cloud_ids = invalid_combined_dataset_ids.union(error_combined_dataset_ids).union(
    invalid_derivate_mass_conservation_ids
)
invalid_value_cloud_ids = invalid_cloud_base_precipitation_ids.union(
    invalid_column_integrated_evaporation_ids
)
set_invalid_cloud_ids = invalid_data_cloud_ids.union(invalid_value_cloud_ids)

# remove invalid clouds
set_valid_cloud_ids = all_cloud_ids - set_invalid_cloud_ids
valid_cloud_ids = sorted(set_valid_cloud_ids)

with open(
    data_dir / Path("valid_cloud_ids.yaml"),
    "w",
) as f:
    f.write(
        textwrap.dedent(
            f"""\
    # The following cloud ids are valid for the CLEO data
    # and can be used for the analysis
    valid_cloud_ids:
    """
        )
    )
    for _id in valid_cloud_ids:
        f.write(f" - {_id}\n")

print(f"Number of cloud with valid CLEO data is {len(valid_cloud_ids)} of {len(all_cloud_ids)}")

Number of cloud with valid CLEO data is 115 of 126


In [None]:
fig, ax = plt.subplots()

x = ds_error["inflow_precipitation"].sel(microphysics="condensation")
x_sem = ds_error_sem["inflow_precipitation"].sel(microphysics="condensation")
# x_sem = x * 0
x_mean = x.mean("cloud_id")
x_std = propagate_mean_std(x, x_sem, dim="cloud_id")

y = -ds["source_precipitation"].sel(microphysics="condensation")
y_sem = -ds_sem["source_precipitation"].sel(microphysics="condensation")
y_mean = y.mean("cloud_id")
y_std = propagate_mean_std(y, y_sem, dim="cloud_id")

style = microphysics_styles.get_style("condensation")
ax.scatter(
    x.sel(cloud_id=sorted(set_invalid_cloud_ids)),
    y.sel(cloud_id=sorted(set_invalid_cloud_ids)),
    color="k",
    marker="o",
    zorder=10,
)

scatter_and_errorbar(
    ax=ax,
    x_var="inflow_precipitation",
    y_var="source_precipitation",
    ds=ds,
    ds_sem=ds_sem,
    x_multiply=1.0,
    y_multiply=-1.0,
    plot_patch=True,
    plot_annotations=True,
    patch_width=4,
)

# ax.set_xlim(0, None)
# ax.set_ylim(0, None)
# ax.set_xscale('linear')
# ax.set_yscale('linear')

ax.set_xlim(1e-3, 70)
ax.set_ylim(1e-3, 5)
ax.set_xscale("log")
ax.set_yscale("log")

ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y, name_width=25))
fig.tight_layout()

save_figure(fig, appendix_fig_dir / "outlier_scatter")

## Remove outliers from all used datasets

By excluding the two outlier clouds, from our set of 117 valid clouds, we omit 2 outliers based on the mean +- 4 std.

Thus, we focus on 98.3% of clouds. 

In [None]:
f"{len(valid_cloud_ids) / len(all_cloud_ids - invalid_data_cloud_ids) * 100:.2f}% of the clouds are not outliers"

'98.29% of the clouds are not outliers'

In [None]:
ds_eulerian = ds_eulerian.sel(cloud_id=valid_cloud_ids)
ds_conservation = ds_conservation.sel(cloud_id=valid_cloud_ids)

ds = ds.sel(cloud_id=valid_cloud_ids)
ds_sem = ds_sem.sel(cloud_id=valid_cloud_ids)

ds_normalized = ds_normalized.sel(cloud_id=valid_cloud_ids)
ds_normalized_sem = ds_normalized_sem.sel(cloud_id=valid_cloud_ids)

ds_no_ventilation = ds_no_ventilation.sel(cloud_id=valid_cloud_ids)
ds_sem_no_ventilation = ds_sem_no_ventilation.sel(cloud_id=valid_cloud_ids)

## add some more data vars to the ds

In [None]:
ds["cloud_base_height"] = ds["gridbox_coord3"].sel(gridbox=ds["max_gridbox"])
ds["cloud_base_height"].attrs = {
    "long_name": "Cloud base height",
    "units": ds["gridbox_coord3"].attrs["units"],
}
ds["relative_humidity_mean"] = (
    ds["relative_humidity"] * ds["gridbox_volume"] / ds["gridbox_volume"].sum("gridbox")
).sum("gridbox")
ds["relative_humidity_mean"].attrs = {
    "long_name": "Mean relative humidity",
    "units": ds["relative_humidity"].attrs["units"],
}

# Calculate mean evaporation height
# for each gridbox the evaporation energy is given by E * V
ev = ds["evaporation_rate_energy"] * ds["gridbox_volume"]
# MEH is then the height average weighted by E * V.
# We need to weight, because the gridbox volume is not constant along the vertical
meh = ((ds["gridbox_coord3"] * ev) / ev.sum("gridbox")).sum("gridbox")
meh = meh / ds["gridbox_coord3"].sel(gridbox=ds["max_gridbox"])

ds["mean_evaporation_height"] = meh
ds["mean_evaporation_height"].attrs = dict(
    long_name="Mean evaporation height",
    units=ds_normalized["normalized_gridbox_coord3"].attrs["units"],
)

ds["radius_bins"].attrs.update(
    long_name="Radius",
    units="µm",
)

# update the name and units for the xi temporal mean
radius_bin_width = (ds["radius_bins"].shift(radius_bins=-1) - ds["radius_bins"].shift(radius_bins=1)) / 2
radius_bin_width = radius_bin_width.interpolate_na(dim="radius_bins", method="linear")
ds["radius_bin_width"] = radius_bin_width
ds["radius_bin_width"].attrs = dict(
    long_name="Radius bin width",
    units="µm",
    description="Width of the radius bin given by a linear interpolation of the radius bins",
)

ds["number_concentration"] = ds["xi_temporal_mean"] / ds["gridbox_volume"] / ds["radius_bin_width"]
ds["number_concentration"].attrs = dict(
    long_name="Number concentration",
    units="m^{-3} µm^{-1}",
)

# Visulization

In [None]:
def plot_individual_clouds(ds, ds_normalized, cloud_ids):

    fig, axs = plt.subplots(nrows=1, ncols=6, figsize=(15, 4))

    # plot the number concentration

    line_var_tuples = (
        (
            ds["radius_bins"],
            ds["number_concentration"]
            .sel(gridbox=ds["max_gridbox"])
            .sel(microphysics="condensation")
            .transpose(..., "cloud_id"),
        ),
        (
            ds_normalized["relative_humidity"]
            .sel(microphysics="condensation")
            .transpose(..., "cloud_id"),
            ds_normalized["normalized_gridbox_coord3"],
        ),
        (
            -ds_normalized["evaporation_rate_energy"]
            .sel(microphysics="condensation")
            .transpose(..., "cloud_id"),
            ds_normalized["normalized_gridbox_coord3"],
        ),
        (
            ds_normalized["liquid_water_content"]
            .sel(microphysics="condensation")
            .transpose(..., "cloud_id"),
            ds_normalized["normalized_gridbox_coord3"],
        ),
    )
    hist_vars = (
        (
            ds["cloud_mass_radius_mean"].sel(microphysics="condensation").transpose(..., "cloud_id"),
            np.linspace(0, 1800, 20),
        ),
        (
            ds["cloud_liquid_water_content"].sel(microphysics="condensation").transpose(..., "cloud_id"),
            np.linspace(0, 0.5, 20),
        ),
    )

    for i, (x, y) in enumerate([line_var_tuples[0]]):
        axs[i].plot(
            x,
            y,
            color=[0.9, 0.9, 0.9, 0.3],
        )
        axs[i].plot(x, y.sel(cloud_id=cloud_ids), linewidth=2, alpha=0.75)
        axs[i].set_xlabel(label_from_attrs(x))
        axs[i].set_ylabel(label_from_attrs(y))

    axs[0].set_xscale("log")
    axs[0].set_yscale("log")
    axs[0].set_xlim(50, None)

    for i, (x, y) in enumerate(line_var_tuples[1:], start=1):
        axs[i].plot(
            x,
            y,
            color=[0.9, 0.9, 0.9, 0.3],
        )
        axs[i].plot(x.sel(cloud_id=cloud_ids), y, linewidth=2, alpha=0.75)
        axs[i].set_xlabel(label_from_attrs(x))
        axs[i].set_ylabel(label_from_attrs(y))

    axs[2].set_xscale("log")
    axs[3].set_xscale("log")

    for j, (x, bins) in enumerate(hist_vars, start=4):
        x_sel = x.sel(cloud_id=cloud_ids)
        if len(x_sel) == 1:
            x_sel = [x_sel]

        axs[j].hist(
            x,
            bins=bins,
            color=[0.9, 0.9, 0.9, 1],
        )
        axs[j].hist(
            x_sel,
            bins=bins,
            color="r",
        )

        axs[j].set_xlabel(label_from_attrs(x, name_width=20))
        axs[j].set_ylabel("Counts")

    fig.tight_layout()

    return fig, axs

# Data and Methods



### Precipitation plot

We want to show the stationary state of the simulation.
And we want to show the values of precipitation to show the errors.

In [None]:
rolling_indices = 30
xlim = (0, 3600)
ylim = (0, 10)

fig, ax = plt.subplots()
ax: plt.Axes = ax
# ax_hist.sharey(ax)

x = ds_conservation["time"]

y = -ds_conservation["outflow_precipitation"].transpose("time", ...)
y_rolling = y.rolling(time=rolling_indices, center=True).mean()
x_rolling = x.rolling(time=rolling_indices, center=True).mean()

y_mean, y_sem = mean_and_stderror_of_mean(y.sel(time=TimeSlices.quasi_stationary_state), dims=("time",))

total_mean = y_mean.mean("cloud_id")
total_std = propagate_mean_std(data=y_mean, data_std=y_sem, dim="cloud_id")

total_median = y_mean.median("cloud_id")

x = x.isel(time=slice(0, -2))
y = y.isel(time=slice(0, -2))

ax.plot(
    x_rolling,
    y_rolling,
    color="grey",
    alpha=0.2,
    linewidth=0.5,
    zorder=10,
)

ax.plot(
    x_rolling,
    y_rolling.mean("cloud_id"),
    color="k",
    alpha=1,
    linestyle="--",
    zorder=10,
    label=rf"Mean: {total_mean.data:.2f}$\pm${total_std.data:.2f} {label_from_attrs(y, return_name=False)}",
)
ax.fill_between(
    x_rolling,
    y_rolling.mean("cloud_id") + y_rolling.std("cloud_id"),
    y_rolling.mean("cloud_id") - y_rolling.std("cloud_id"),
    label="Std.Dev.",
    color=adjust_lightness("grey", 1.7),
    alpha=1,
    zorder=3,
)


ax.plot(
    x_rolling,
    y_rolling.median("cloud_id"),
    color="k",
    alpha=1,
    linestyle="-",
    zorder=10,
    label=f"Median: {total_median.data:.2f} {label_from_attrs(y, return_name=False)}",
)

ax.fill_between(
    x_rolling,
    y_rolling.quantile(0.25, "cloud_id"),
    y_rolling.quantile(0.75, "cloud_id"),
    label=f"25-75%",
    color=adjust_lightness("grey", 1.2),
    alpha=1,
    zorder=4,
)


# for _ax in [ax]:
#     _ax.set_ylim(0, 30)
#     _ax.axhline(
#         total_mean.data,
#         color="red",
#         linewidth=2,
#         linestyle="-",
#         label="Inter-Cloud-Mean:\n"
#         + rf"{total_mean.data:.2f}$\pm${total_std.data:.2f}",
#     )
#     _ax.fill_between(
#         _ax.get_xlim(),
#         total_mean - total_std,
#         total_mean + total_std,
#         color="red",
#         alpha=0.2,
#     )

# yticks = np.arange(min(ylim), max(ylim) + 1, 5)

ax.set_ylim(ylim)
ax.set_ylabel(label_from_attrs(y, name_width=25))
ax.set_xlabel(r"Simulation time $[s]$")
ax.set_xlim(xlim)
# ax.set_yticks(yticks)
ax.legend(loc="upper right")

ax.axvline(
    TimeSlices.quasi_stationary_state.start,
    color="red",
    linestyle="--",
    linewidth=1,
    alpha=1,
    zorder=20,
    label="Stationary State",
)

fig.tight_layout()
save_figure(fig=fig, filepath=fig_dir / f"precipitation-temporal-evolution-stationary-state-all")

  return fnb._ureduce(a,


Scatter of precipitation and rain water content

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(5, 5), width_ratios=[1, 0.3], height_ratios=[0.3, 1])

ax_empty: plt.Axes = axs[0, 1]
ax_empty.axis("off")
ax_x_hist: plt.Axes = axs[0, 0]
ax_y_hist: plt.Axes = axs[1, 1]
ax_scatter: plt.Axes = axs[1, 0]

ax_x_hist.sharex(ax_scatter)
ax_y_hist.sharey(ax_scatter)

x = ds["cloud_liquid_water_content"]
y = -ds["outflow_precipitation"]

x_median = x.median("cloud_id")
y_median = y.median("cloud_id")


x_bins = np.arange(0, 1, 0.05)
ax_scatter.set_xlim(x_bins[0], x_bins[-1])

y_bins = np.arange(0, 16.5, 0.5)
ax_scatter.set_ylim(y_bins[0], y_bins[-1])

for mp in ["condensation"]:

    statistic_dict, plot_dict = scatter_and_errorbar(
        ax=ax_scatter,
        x_var="cloud_liquid_water_content",
        y_var="outflow_precipitation",
        ds=ds,
        ds_sem=ds_sem,
        x_multiply=1,
        y_multiply=-1,
        plot_patch=False,
        plot_annotations=True,
    )

    # add median annotation to the text

    y_annotation = plot_dict["y_annotation"]
    y_annotation.set(
        text=y_annotation.get_text() + f"\n{y_median.sel(microphysics=mp).data:.2f}",
    )
    y_annotation.set(rotation=90, va="bottom", ha="center")

    x_annotation = plot_dict["x_annotation"]
    x_annotation.set(
        text=x_annotation.get_text() + f"\n{x_median.sel(microphysics=mp).data:.2f}",
    )
    x_annotation.set(y=1, x=0.4, va="center", ha="center")

    ax_x_hist.hist(
        x.sel(microphysics=mp),
        bins=x_bins,
        histtype="step",
        color=microphysics_styles.get_style(mp)["color"],
        lw=2,
    )

    ax_y_hist.hist(
        y.sel(microphysics=mp),
        bins=y_bins,
        histtype="step",
        color=microphysics_styles.get_style(mp)["color"],
        lw=2,
        orientation="horizontal",
    )

ax_scatter.set_xlabel(label_from_attrs(x))
ax_scatter.set_ylabel(label_from_attrs(y))

ax_x_hist.set_ylabel("Counts")
ax_y_hist.set_xlabel("Counts")

ax_scatter.set_xlabel(label_from_attrs(da=x))


for _ax in axs.flatten():
    _ax.grid(linestyle="-", alpha=0.2, color="grey")

add_subplotlabel([ax_x_hist, ax_scatter, ax_y_hist])

fig.tight_layout()
save_figure(fig=fig, filepath=more_fig_dir / f"{x.name}-{y.name}-scattered-histograms-LINEAR")

Individual histograms

HERENILS

In [None]:
fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(5.33, 4))

ylim = (0, 80)
ax_x_hist: plt.Axes = axs[0]
ax_y_hist: plt.Axes = axs[1]

x = ds["cloud_liquid_water_content"]
x_sem = ds_sem["cloud_liquid_water_content"]
y = -ds["outflow_precipitation"]
y_sem = -ds_sem["outflow_precipitation"]

x_mean = x.mean("cloud_id")
x_std = propagate_mean_std(x, x_sem, dim="cloud_id")

y_mean = y.mean("cloud_id")
y_std = propagate_mean_std(y, y_sem, dim="cloud_id")

x_median = x.median("cloud_id")
y_median = y.median("cloud_id")

x_bins = np.arange(0, 1, 0.05)
y_bins = np.arange(0, 17, 0.5)

x_dict = dict(
    data=x,
    mean=x_mean,
    std=x_std,
    median=x_median,
    bins=x_bins,
)
y_dict = dict(
    data=y,
    mean=y_mean,
    std=y_std,
    median=y_median,
    bins=y_bins,
)

for mp in ["condensation"]:
    style = microphysics_styles.get_style(mp)
    for d, _ax in zip([x_dict, y_dict], [ax_x_hist, ax_y_hist]):

        median = d["median"].sel(microphysics=mp)
        m, s = d["mean"].sel(microphysics=mp), d["std"].sel(microphysics=mp)
        _ax.hist(
            d["data"].sel(microphysics=mp),
            bins=d["bins"],
            histtype="step",
            color=style["color"],
            lw=2,
        )
        _ax.axvline(
            m,
            color=style["color"],
            linestyle="--",
            lw=2,
            label=rf"Mean: {m.data:.2f} $\pm$ {s.data:.2f}",
        )
        _ax.fill_betweenx(
            ylim,
            m - s,
            m + s,
            color=style["color"],
            alpha=0.1,
        )
        _ax.axvline(
            median,
            color=style["color"],
            linestyle="-",
            lw=2,
            label=f"Median: {median.data:.2f}",
        )

ax_x_hist.set_xlabel(label_from_attrs(x))
ax_y_hist.set_xlabel(label_from_attrs(y))

ax_y_hist.set_xticks(np.arange(0, 17, 2))
for _ax in axs.flatten():
    _ax.set_ylabel("Counts")
    _ax.set_ylim(ylim)
    _ax.legend(loc="upper right")
    _ax.set_xlim(0, None)

add_subplotlabel([ax_x_hist, ax_y_hist], location="title", zorder=100)

fig.tight_layout()
save_figure(fig=fig, filepath=fig_dir / f"{x.name}-{y.name}-histograms-LINEAR")

In [None]:
axs_precip = axs

# Results

## Analysis of EvapOnly

In [None]:
fig, ax = plt.subplots(ncols=1, nrows=1)

x = ds["inflow_precipitation"]
c = -ds["source_precipitation"]

y = ds["outflow"] + ds["source"] + ds["inflow"] - ds["reservoir_change"]
y = y / x * 100

y.attrs.update(
    units=r"\%",
    long_name=f"Rel. error of conservation against {x.attrs['long_name']}",
    description=f"Relative error of the sum of all conservation values against the variable {x.attrs['long_name']}",
)


for mp in ["condensation"]:
    sc = ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        c=c.sel(microphysics=mp),
        cmap=strength_cmap,
    )
ax.set_xscale("log")

fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=25))
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y, name_width=25))

Text(0, 0.5, 'Rel. error of\nconservation against\nCloud Base Precipitation\nFlux $\\left[  \\%  \\right]$')

#### Evaporation fraction and Column Integrated Evaporation comparison


Validate that match exactly. 
This will give us confidence in our analysis. 

In [None]:
x = -ds["source_precipitation"]
y = ds["inflow_precipitation"] * ds["evaporation_fraction"] * 1e-2

long_name = label_from_attrs(ds["inflow_precipitation"], name_width=20, return_units=False)
long_name += "\nX\n"
long_name += label_from_attrs(ds["evaporation_fraction"], name_width=20, return_units=False)

y.attrs.update(
    long_name=long_name,
    units=ds["inflow_precipitation"].attrs["units"],
)

plt.scatter(
    x.sel(microphysics="condensation"),
    y.sel(microphysics="condensation"),
    **microphysics_styles.get_style("condensation"),
)
plt.xlabel(label_from_attrs(x))
plt.ylabel(label_from_attrs(y, linebreak=True))
print(xr.corr(x, y, dim="cloud_id"))
save_figure(fig=fig, filepath=more_fig_dir / f"reconstructed-evaporation-scatter")

<xarray.DataArray (microphysics: 4)> Size: 32B
array([1., 1., 1., 1.])
Coordinates:
  * microphysics  (microphysics) object 32B 'condensation' ... 'coalbure_cond...


In [None]:
fig, axs = plt.subplots(2, 2, figsize=(5, 5), width_ratios=[1, 0.3], height_ratios=[0.3, 1])

ax_empty = axs[0, 1]
ax_empty.axis("off")
ax_x_hist = axs[0, 0]
ax_y_hist = axs[1, 1]
ax_scatter = axs[1, 0]

ax_x_hist.sharex(ax_scatter)
ax_y_hist.sharey(ax_scatter)

x = -ds["source_precipitation"]
y = ds["evaporation_fraction"]
c = ds["cloud_liquid_water_content"]

x_median = x.median("cloud_id")
y_median = y.median("cloud_id")

x_bins = np.arange(0, 1.3, 0.05)
ax_scatter.set_xlim(x_bins[0], x_bins[-1])

y_bins = np.arange(0, 101, 5)
ax_scatter.set_ylim(y_bins[0], y_bins[-1])

for mp in ["condensation"]:

    data_dict, plot_dict = scatter_and_errorbar(
        ax=ax_scatter,
        x_var="source_precipitation",
        y_var="evaporation_fraction",
        ds=ds,
        ds_sem=ds_sem,
        x_multiply=-1,
        y_multiply=1,
        plot_patch=False,
        plot_annotations=True,
        patch_width=2,
        annotation_kwargs=dict(
            arrowprops=dict(arrowstyle="->", color="black", alpha=0.3),
        ),
    )
    y_annotation = plot_dict["y_annotation"]
    y_annotation.set(
        text=y_annotation.get_text() + f"\n{y_median.sel(microphysics=mp).data:.2f}",
        y=75,
        rotation=0,
        va="bottom",
        ha="center",
    )

    x_annotation = plot_dict["x_annotation"]
    x_annotation.set(
        text=x_annotation.get_text() + f"\n{x_median.sel(microphysics=mp).data:.2f}", ha="center", x=1
    )

    ax_x_hist.hist(
        x.sel(microphysics=mp),
        bins=x_bins,
        histtype="step",
        color=microphysics_styles.get_style(mp)["color"],
        lw=2,
    )
    ax_y_hist.hist(
        y.sel(microphysics=mp),
        bins=y_bins,
        histtype="step",
        color=microphysics_styles.get_style(mp)["color"],
        lw=2,
        orientation="horizontal",
    )

ax_scatter.set_xlabel(label_from_attrs(x))
ax_scatter.set_ylabel(label_from_attrs(y))

ax_x_hist.set_ylabel("Counts")
ax_y_hist.set_xlabel("Counts")

ax_scatter.set_xlim(0, None)
ax_scatter.set_ylim(0, None)


x_ticks = xr.DataArray(ax_scatter.get_xticks(), attrs=x.attrs.copy())

new_x_ticks: xr.DataArray = conversions.EvaporationUnits(
    data=x_ticks, input_type="precipitation"
).convert_to("energy")
new_ticks_func = lambda _: [f"{round(new_x, 0):.0f}" for x, new_x in zip(x_ticks, new_x_ticks.data)]
add_additional_axis(
    ax=ax_scatter,
    new_ticks_func=new_ticks_func,
    label=label_from_attrs(da=new_x_ticks),
    position="bottom",
    offset_position=["axes", -0.3],
)
ax_scatter.set_xlabel(label_from_attrs(da=x))

correlation = xr.corr(x, y, dim="cloud_id")
ax_scatter.annotate(
    f"R = {correlation.sel(microphysics=mp).data:.2f}",
    xy=(1, 1),
    xycoords="axes fraction",
    ha="right",
    va="top",
    # fontsize=10,
)

for _ax in axs.flatten():
    _ax.grid(linestyle="-", alpha=0.2, color="grey")

add_subplotlabel([ax_x_hist, ax_scatter, ax_y_hist], location="title")

fig.tight_layout()
save_figure(fig=fig, filepath=appendix_fig_dir / f"{x.name}-{y.name}-scattered-histograms-LINEAR")

HERENILS

In [None]:
fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(5.33, 4.4))

ylim = (0, 40)
ax_x_hist: plt.Axes = axs[0]
ax_y_hist: plt.Axes = axs[1]

x = -ds["source_precipitation"]
x_sem = -ds_sem["source_precipitation"]

y = ds["evaporation_fraction"]
y_sem = ds_sem["evaporation_fraction"]

x_mean = x.mean("cloud_id")
x_std = propagate_mean_std(x, x_sem, dim="cloud_id")

y_mean = y.mean("cloud_id")
y_std = propagate_mean_std(y, y_sem, dim="cloud_id")

x_median = x.median("cloud_id")
y_median = y.median("cloud_id")

x_bins = np.arange(0, 1.3, 0.05)
y_bins = np.arange(0, 101, 5)

x_dict = dict(
    data=x,
    mean=x_mean,
    std=x_std,
    median=x_median,
    bins=x_bins,
)
y_dict = dict(
    data=y,
    mean=y_mean,
    std=y_std,
    median=y_median,
    bins=y_bins,
)

for mp in ["condensation"]:
    style = microphysics_styles.get_style(mp)
    for d, _ax in zip([x_dict, y_dict], [ax_x_hist, ax_y_hist]):

        median = d["median"].sel(microphysics=mp)
        m, s = d["mean"].sel(microphysics=mp), d["std"].sel(microphysics=mp)
        _ax.hist(
            d["data"].sel(microphysics=mp),
            bins=d["bins"],
            histtype="step",
            color=style["color"],
            lw=2,
        )
        _ax.axvline(
            m,
            color=style["color"],
            linestyle="--",
            lw=2,
            label=rf"Mean: {m.data:.2f} $\pm$ {s.data:.2f}",
        )
        _ax.fill_betweenx(
            ylim,
            m - s,
            m + s,
            color=style["color"],
            alpha=0.1,
        )
        _ax.axvline(
            median,
            color=style["color"],
            linestyle="-",
            lw=2,
            label=f"Median: {median.data:.2f}",
        )

ax_x_hist.set_xlabel(label_from_attrs(x))
ax_y_hist.set_xlabel(label_from_attrs(y))

for _ax in axs.flatten():
    _ax.set_ylabel("Counts")
    _ax.set_ylim(ylim)
    _ax.legend(loc="upper right")
    _ax.set_xlim(0, None)

add_subplotlabel([ax_x_hist, ax_y_hist], location="title", zorder=100)

# update the title to be on the left
ax_x_hist.set_title("")

x_ticks = xr.DataArray(ax_x_hist.get_xticks(), attrs=x.attrs.copy())
new_x_ticks: xr.DataArray = conversions.EvaporationUnits(
    data=x_ticks, input_type="precipitation"
).convert_to("energy")
new_ticks_func = lambda _: [f"{round(new_x, 0):.0f}" for x, new_x in zip(x_ticks, new_x_ticks.data)]
add_additional_axis(
    ax=ax_x_hist,
    new_ticks_func=new_ticks_func,
    label=label_from_attrs(da=new_x_ticks),
    position="top",
    offset_position=["axes", 1],
)
ax_x_hist.set_xlabel(label_from_attrs(da=x))


fig.tight_layout()
save_figure(fig=fig, filepath=fig_dir / f"{x.name}-{y.name}-histograms-LINEAR")

make sure the two histograms have same axes sizes

In [None]:
axs_evap = axs

In [None]:
for i in range(1, -1, -1):
    bbox = axs_precip[i].get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    width, height = bbox.width, bbox.height
    print(width, height)
    bbox = axs_evap[i].get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    width, height = bbox.width, bbox.height
    print(width, height)

4.489722222222222 1.006388888888889
4.489722222222222 0.9915000000000006
4.489722222222222 1.006388888888889
4.489722222222222 0.9915000000000003


In [None]:
fig, axs = plt.subplots(
    2, 2, figsize=large_square_fig_size, width_ratios=[1, 0.3], height_ratios=[0.3, 1]
)

ax_empty = axs[0, 1]
ax_empty.axis("off")
ax_x_hist = axs[0, 0]
ax_y_hist = axs[1, 1]
ax_scatter = axs[1, 0]

ax_x_hist.sharex(ax_scatter)
ax_y_hist.sharey(ax_scatter)

x = -ds["source_precipitation"]
y = ds["evaporation_fraction"]
c = ds["cloud_liquid_water_content"]

x_median = x.median("cloud_id")
y_median = y.median("cloud_id")

x_bins = np.arange(0, 2.5, 0.05)
ax_scatter.set_xlim(x_bins[0], x_bins[-1])

y_bins = np.arange(0, 101, 5)
ax_scatter.set_ylim(y_bins[0], y_bins[-1])

for i, mp in enumerate(microphysics_styles):

    style = microphysics_styles.get_style(mp)

    data_dict, plot_dict = scatter_and_errorbar(
        ax=ax_scatter,
        x_var="source_precipitation",
        y_var="evaporation_fraction",
        ds=ds,
        ds_sem=ds_sem,
        microphysics=mp,
        x_multiply=-1,
        y_multiply=1,
        plot_patch=False,
        plot_annotations=True,
        patch_width=2.5,
        scatter_kwargs=dict(
            color=adjust_lightness(style["color"], 1.5),
            marker=style["marker"],
            alpha=1,
        ),
        error_kwargs=dict(
            fmt="",
            label="mean ± SEM",
            color=style["color"],
            capsize=5,
            linewidth=2,
        ),
        annotation_kwargs=dict(
            color=style["color"],
        ),
    )

    x_annotation = plot_dict["x_annotation"]
    x_annotation.set(
        x=0.5,
        y=80 + 5 * i,
        ha="left",
        va="center",
    )

    y_annotation = plot_dict["y_annotation"]
    text = (
        x_annotation.get_text()
        + f"\n{x_median.sel(microphysics=mp).data:.2f} {label_from_attrs(x, return_name=False)}",
    )

    y_annotation = plot_dict["y_annotation"]
    y_annotation.set(
        x=2.4,
        y=35 + 5 * i,
        rotation=0,
        ha="right",
        va="center",
    )

    # add correlation annotation
    x_var = "source_precipitation"
    y_var = "evaporation_fraction"
    correlation = xr.corr(-ds[x_var], ds[y_var], dim="cloud_id")

    ax_scatter.annotate(
        f"R = {correlation.sel(microphysics=mp).data:.2f}",
        xy=(2.4, 80 + 5 * i),
        color=style["color"],
        xycoords="data",
        ha="right",
        va="center",
    )

    ax_x_hist.hist(
        x.sel(microphysics=mp),
        bins=x_bins,
        histtype="step",
        color=style["color"],
        lw=2,
    )
    ax_y_hist.hist(
        y.sel(microphysics=mp),
        bins=y_bins,
        histtype="step",
        color=style["color"],
        lw=2,
        orientation="horizontal",
    )

ax_scatter.plot(
    x,
    y,
    color=adjust_lightness("grey", 1.75),
    zorder=0,
)

ax_scatter.set_xlabel(label_from_attrs(x))
ax_scatter.set_ylabel(label_from_attrs(y))

ax_x_hist.set_ylabel("Counts")
ax_y_hist.set_xlabel("Counts")

x_ticks = xr.DataArray(ax_scatter.get_xticks(), attrs=x.attrs.copy())

new_x_ticks: xr.DataArray = conversions.EvaporationUnits(
    data=x_ticks, input_type="precipitation"
).convert_to("energy")
new_ticks_func = lambda _: [f"{round(new_x, 0):.0f}" for x, new_x in zip(x_ticks, new_x_ticks.data)]
add_additional_axis(
    ax=ax_scatter,
    new_ticks_func=new_ticks_func,
    label=label_from_attrs(da=new_x_ticks),
    position="bottom",
    offset_position=["axes", -0.2],
)
ax_scatter.set_xlabel(label_from_attrs(da=x))

for _ax in axs.flatten():
    _ax.grid(linestyle="-", alpha=0.2, color="grey")

add_subplotlabel([ax_scatter, ax_x_hist, ax_y_hist], location="title")

fig.tight_layout()
save_figure(fig=fig, filepath=appendix_fig_dir / f"{x.name}-{y.name}-scattered-histograms-LINEAR-ALL")

## Microphysics

### Ventilation coefficient 

In [None]:
ds["radius_bins"].attrs = dict(
    long_name="Radius",
    units="$\\mu m$",
)
radii_label = label_from_attrs(ds["radius_bins"])

In [None]:
RH = ds["relative_humidity"].mean() / 100
H = 1000

rhow = 0.998e3
rhoa = 1.2
eta = 1.85e-5
g = 9.81
nu = eta / rhoa
T = 294.41807507
p = 1e5
Dv0 = 0.211 * (T / 273.15) ** (1.94) * (1013.25e2 / p) * 1e-4  # PK97 (13-3)
Sc = 0.71  # nu/Dv0
gamma = 73e-3
Coo = 0.26
Cgamma = 18.4
lgamma = np.sqrt(gamma / (rhow * g))
kb = 1.380649e-23
Rconst = 8.314
Rv = 461.5
lv = 2.5e6
ka = 26.19e-3


def psat_water(T):
    theta = T - 273.15
    psat = 6.1121e2 * np.exp((18.678 - theta / 234.5) * (theta / (257.14 + theta)))
    return psat


def rhosat_water(T):
    rho = psat_water(T) * 18.01528e-3 / (Rconst * T)
    return rho


Dv = Dv0 / (1 + lv * Dv0 * rhosat_water(T) / (ka * T) * (lv / (Rv * T) - 1))


def theoretical_evaporation_fraction(r0s: xr.DataArray) -> xr.DataArray:
    bU = np.sqrt(8 / 3 * rhow / rhoa * g / 0.5)
    dr52 = 5 / 2 * Dv * H / bU * (1 - RH) * rhosat_water(T) / rhow
    efftheo = 1 - (1 - dr52 / r0s ** (5 / 2)) ** (6 / 5)
    efftheo = np.minimum(efftheo.fillna(1), 1)
    return efftheo


def fv(a, v):
    """Arguments are mass and velocity"""
    Re = 2 * a * np.abs(v) / nu
    x = Sc ** (1 / 3) * Re ** (1 / 2)
    if a < 60e-6:
        return 1 + 0.108 * x**2
    else:
        return 0.78 + 0.308 * x


def fv_xr(a: xr.DataArray, v: xr.DataArray) -> xr.DataArray:
    """Arguments are mass and velocity"""
    Re = 2 * a * np.abs(v) / nu
    x = Sc ** (1 / 3) * Re ** (1 / 2)
    low = 1 + 0.108 * x**2
    high = 0.78 + 0.308 * x

    return xr.where(a < 60e-6, low, high)


def vtlim(a):
    """Terminal velocity in m/s"""
    c1 = Coo ** (1 / 2)
    c2 = (12 * nu / a) ** (1 / 2)
    c3 = (8 * rhow * g * a / (3 * rhoa)) ** (1 / 2)
    return ((np.sqrt(c2**2 + 4 * c1 * c3) - c2) / (2 * c1)) ** 2


def vt(a):
    """Terminal velocity in m/s"""
    c1 = Coo ** (1 / 2) * (1 + Cgamma * (a / lgamma) ** 3) ** (1 / 6)
    c2 = (12 * nu / a) ** (1 / 2)
    c3 = (8 * rhow * g * a / (3 * rhoa)) ** (1 / 2)
    return ((np.sqrt(c2**2 + 4 * c1 * c3) - c2) / (2 * c1)) ** 2


ventilation_coefficient = fv_xr(ds["radius_bins"] * 1e-6, vt(ds["radius_bins"] * 1e-6))

print(T, H, RH)

294.41807507 1000 <xarray.DataArray 'relative_humidity' ()> Size: 8B
array(0.87606583)


In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax.plot(ds["radius_bins"], ventilation_coefficient, label="fv", linestyle="-")
ax.set_xlabel(radii_label)
ax.set_ylabel("fv")
ax.set_ylim(0, 17)
# ax.legend()

plt.figure()
evaporation_fraction = theoretical_evaporation_fraction(ds["radius_bins"] * 1e-6)
evaporation_fraction_ventilation = evaporation_fraction * ventilation_coefficient
evaporation_fraction_ventilation: xr.DataArray = np.minimum(
    evaporation_fraction_ventilation.fillna(1), 1
)

plt.plot(
    ds["radius_bins"],
    evaporation_fraction,
    label="no ventilation",
)
plt.plot(
    ds["radius_bins"],
    evaporation_fraction_ventilation,
    label="ventilation",
)
plt.xlabel(radii_label)
plt.xlim(1e1, None)
plt.ylabel("Evaporation fraction")
plt.loglog()
plt.legend()

<matplotlib.legend.Legend at 0x7ffb64fea780>

Compare no ventilation with ventilation in simple histogram

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(5, 5), width_ratios=[1, 0.3], height_ratios=[0.3, 1])

ax_empty = axs[0, 1]
ax_empty.axis("off")
ax_x_hist = axs[0, 0]
ax_y_hist = axs[1, 1]
ax_scatter = axs[1, 0]

ax_x_hist.sharex(ax_scatter)
ax_y_hist.sharey(ax_scatter)

x = -ds["source_precipitation"]
y = ds["evaporation_fraction"]

x_no_ventilation = -ds_no_ventilation["source_precipitation"]
y_no_ventilation = ds_no_ventilation["evaporation_fraction"]

x_bins = np.arange(0, 1.3, 0.05)
# ax_scatter.set_xlim(x_bins[0], x_bins[-1])

y_bins = np.arange(0, 101, 5)
# ax_scatter.set_ylim(y_bins[0], y_bins[-1])

for mp in ["condensation"]:

    style = microphysics_styles.get_style(mp).copy()
    style.update(alpha=0.5)

    no_ventilation_style = style.copy()
    no_ventilation_style.update(marker=".", color="grey")

    data_dict, plot_dict = scatter_and_errorbar(
        ax=ax_scatter,
        x_var="source_precipitation",
        y_var="evaporation_fraction",
        ds=ds_no_ventilation,
        ds_sem=ds_sem_no_ventilation,
        x_multiply=-1,
        y_multiply=1,
        plot_patch=False,
        plot_annotations=False,
        patch_width=3,
        scatter_kwargs=no_ventilation_style,
        error_kwargs=dict(
            fmt="",
            color=adjust_lightness(no_ventilation_style["color"], 0.5),
            capsize=5,
            linewidth=2,
        ),
    )

    data_dict, plot_dict = scatter_and_errorbar(
        ax=ax_scatter,
        x_var="source_precipitation",
        y_var="evaporation_fraction",
        ds=ds,
        ds_sem=ds_sem,
        x_multiply=-1,
        y_multiply=1,
        plot_patch=False,
        plot_annotations=False,
        patch_width=2.5,
        scatter_kwargs=style,
        error_kwargs=dict(
            fmt="",
            color=adjust_lightness(style["color"], 0.5),
            capsize=5,
            linewidth=2,
        ),
    )

    ax_x_hist.hist(
        x_no_ventilation.sel(microphysics=mp),
        bins=x_bins,
        histtype="step",
        color=no_ventilation_style["color"],
        lw=2,
    )
    ax_x_hist.hist(
        x.sel(microphysics=mp),
        bins=x_bins,
        histtype="step",
        color=style["color"],
        lw=2,
    )
    ax_y_hist.hist(
        y_no_ventilation.sel(microphysics=mp),
        bins=y_bins,
        histtype="step",
        color=no_ventilation_style["color"],
        lw=2,
        orientation="horizontal",
    )
    ax_y_hist.hist(
        y.sel(microphysics=mp),
        bins=y_bins,
        histtype="step",
        color=style["color"],
        lw=2,
        orientation="horizontal",
    )

ax_scatter.set_xlabel(label_from_attrs(x))
ax_scatter.set_ylabel(label_from_attrs(y))

ax_x_hist.set_ylabel("Counts")
ax_y_hist.set_xlabel("Counts")

ax_scatter.set_xlim(0, None)
ax_scatter.set_ylim(0, None)


x_ticks = xr.DataArray(ax_scatter.get_xticks(), attrs=x.attrs.copy())

new_x_ticks: xr.DataArray = conversions.EvaporationUnits(
    data=x_ticks, input_type="precipitation"
).convert_to("energy")
new_ticks_func = lambda _: [f"{round(new_x, 0):.0f}" for x, new_x in zip(x_ticks, new_x_ticks.data)]
add_additional_axis(
    ax=ax_scatter,
    new_ticks_func=new_ticks_func,
    label=label_from_attrs(da=new_x_ticks),
    position="bottom",
    offset_position=["axes", -0.3],
)
ax_scatter.set_xlabel(label_from_attrs(da=x))


for _ax in axs.flatten():
    _ax.grid(linestyle="-", alpha=0.2, color="grey")

add_subplotlabel([ax_x_hist, ax_scatter, ax_y_hist])

fig.tight_layout()
save_figure(
    fig=fig, filepath=appendix_fig_dir / f"VENILATION-{x.name}-{y.name}-scattered-histograms-LINEAR"
)

### Evaporation fraction and Mass radius mean

In [None]:
fig, ax = plt.subplots(1, 1)

x = ds["cloud_mass_radius_mean"]
y = ds["evaporation_fraction"]

x_no_ventilation = ds_no_ventilation["cloud_mass_radius_mean"]
y_no_ventilation = ds_no_ventilation["evaporation_fraction"]


for mp in ["condensation"]:
    style = microphysics_styles.get_style(mp)
    style["label"] += r" $\mathbf{with} \, f_v$"
    ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        **style,
    )
    style = microphysics_styles.get_style(mp, colortype="light")
    style["label"] += r" $\mathbf{without} \, f_v$"
    style["color"] = "grey"
    style["marker"] = "."
    ax.scatter(
        x_no_ventilation.sel(microphysics=mp),
        y_no_ventilation.sel(microphysics=mp),
        **style,
    )

# ax.plot(
#     x.transpose('microphysics', ...),
#     y.transpose('microphysics', ...),
#     color = 'black',
#     alpha = 0.1,
#     zorder = 1,
# )

ax.plot(
    ds["radius_bins"],
    1e2 * evaporation_fraction_ventilation,
    label=r"Theory $\mathbf{with} \, f_v$",
    color="black",
    linestyle="--",
)

ax.plot(
    ds["radius_bins"],
    1e2 * evaporation_fraction,
    label=r"Theory $\mathbf{without} \, f_v$",
    color="grey",
    linestyle="--",
)

ax.set_xlim(50, None)
ax.set_ylim(1, None)
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y))
ax.legend(loc="lower left")
fig.tight_layout()

save_figure(fig=fig, filepath=fig_dir / f"theoretical-scatter-{x.name}-{y.name}-VENTILATION")

#### Reason for outliers

In [None]:
fig, ax = plt.subplots(1, 1)

x = ds["cloud_mass_radius_mean"]
y = ds["evaporation_fraction"]
c = ds["mass_radius_std"].sel(gridbox=ds["max_gridbox"])
c.attrs.update(units="µm")
# x_no_ventilation = ds_no_ventilation["cloud_mass_radius_mean"]
# y_no_ventilation = ds_no_ventilation["evaporation_fraction"]


for mp in ["condensation"]:
    style = microphysics_styles.get_style(mp).copy()
    style.pop("color")
    sc = ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        c=c.sel(microphysics=mp),
        cmap=strength_cmap,
        **style,
    )

fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20))

# ax.plot(
#     x.transpose('microphysics', ...),
#     y.transpose('microphysics', ...),
#     color = 'black',
#     alpha = 0.1,
#     zorder = 1,
# )

ax.plot(
    ds["radius_bins"],
    1e2 * evaporation_fraction_ventilation,
    label=r"Theory",
    color="black",
    linestyle="--",
)

# ax.plot(
#     ds["radius_bins"],
#     1e2 * evaporation_fraction,
#     label=r"Theory $\mathbf{without} \, f_v$",
#     color="grey",
#     linestyle="--",
# )

ax.set_xlim(50, None)
ax.set_ylim(1, None)
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y))
ax.legend(loc="lower left")
fig.tight_layout()

save_figure(fig=fig, filepath=appendix_fig_dir / f"theoretical-scatter-{x.name}-{y.name}-{c.name}")

In [None]:
d = ds["mass_radius_std"].sel(gridbox=ds["max_gridbox"]).sel(microphysics="condensation")
cloud_ids = ds["cloud_id"].sortby(d)
ef_outlier_cloud_ids = cloud_ids[-10:]
ef_outlier_cloud_ids[-1]

In [None]:
plot_individual_clouds(ds=ds, ds_normalized=ds_normalized, cloud_ids=ef_outlier_cloud_ids)

(<Figure size 1500x400 with 6 Axes>,
 array([<Axes: xlabel='radius_bins $\\left[ ??? \\right]$', ylabel='Number concentration $\\left[  m^{-3} µm^{-1}  \\right]$'>,
        <Axes: xlabel='Relative humidity $\\left[  \\%  \\right]$', ylabel='Normalized height $\\left[    \\right]$'>,
        <Axes: xlabel='Evaporation Rate $\\left[  mW \\, m^{-3}  \\right]$', ylabel='Normalized height $\\left[    \\right]$'>,
        <Axes: xlabel='Rain Water Content $\\left[  g m^{-3}  \\right]$', ylabel='Normalized height $\\left[    \\right]$'>,
        <Axes: xlabel='Cloud Mean Mass\nRadius $\\left[  µm  \\right]$', ylabel='Counts'>,
        <Axes: xlabel='Cloud Rain Water\nContent $\\left[  g m^{-3}  \\right]$', ylabel='Counts'>],
       dtype=object))

#### Microphysics

In [None]:
fig, ax = plt.subplots(1, 1)

x = ds["cloud_mass_radius_mean"]
y = ds["evaporation_fraction"]

# x_no_ventilation = ds_no_ventilation["cloud_mass_radius_mean"]
# y_no_ventilation = ds_no_ventilation["evaporation_fraction"]


for mp in microphysics_styles:
    style = microphysics_styles.get_style(mp)
    # style["label"] = None
    ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        **style,
    )

ax.plot(
    x.transpose("microphysics", ...),
    y.transpose("microphysics", ...),
    color="black",
    alpha=0.1,
    zorder=1,
)

ax.plot(
    ds["radius_bins"],
    1e2 * evaporation_fraction_ventilation,
    label=r"Theory",
    color="black",
    linestyle="--",
)

# ax.plot(
#     ds["radius_bins"],
#     1e2 * evaporation_fraction,
#     label=r"Theory $\mathbf{without} \, f_v$",
#     color="grey",
#     linestyle="--",
# )

ax.set_xlim(50, None)
ax.set_ylim(1, None)
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y))
ax.legend(loc="lower left")
fig.tight_layout()

save_figure(fig=fig, filepath=fig_dir / f"theoretical-scatter-{x.name}-{y.name}-ALL")

### Column integrated evaporation and cloud base precipitation flux

In [None]:
fig, ax = plt.subplots(1, 1)
# ax_no_ventilation = plt.Axes = axs[1]

x = ds["inflow_precipitation"]
y = -ds["source_precipitation"]

# x_no_ventilation = ds_no_ventilation["inflow_precipitation"]
# y_no_ventilation = - ds_no_ventilation["source_precipitation"]


for mp in ["condensation"]:
    style = microphysics_styles.get_style(mp)
    ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        **style,
    )

ax.set_xscale("log")
ax.set_yscale("log")
# ax.set_yscale('symlog', linthresh = 1e-6, linscale = 0.2)
lims = np.array([1e-6, 2.5e1])
ax.set_ylim(lims.min(), lims.max())
ax.set_xlim(1e-4, lims.max())

p_x_values = np.geomspace(lims.min(), lims.max(), 100)

values_label_size = 10

for p in [1, 0.1, 0.01]:
    style = dict(color="grey", alpha=p ** (1 / 5))
    lines = ax.plot(p_x_values, p * p_x_values, "--", linewidth=1, zorder=0, **style)
    line = lines[0]
    _x = 15
    _y = p * _x
    # y = 1e-0
    # x =(1/1.3) * (y / p)
    ax.annotate(
        f"{100 * p:.0f} %",
        xy=(_x, _y),
        xytext=(10, 10),
        textcoords="offset points",
        va="top",
        ha="left",
        size=values_label_size,
        **style,
    )
    _x = 1e-3
    _y = p * _x
    # y = 1e-0
    # x =(1/1.3) * (y / p)
    ax.annotate(
        f"{100 * p:.0f} %",
        xy=(_x, _y),
        xytext=(0, 0),
        textcoords="offset points",
        va="top",
        ha="left",
        size=values_label_size,
        **style,
    )
    # _y = 2e-4
    # _x = _y / p
    # ax.annotate(
    #     f"{100 * p:.0f} %",
    #     xy=(_x, _y),
    #     xytext=(1, 1),
    #     textcoords="offset points",
    #     va="top",
    #     ha="left",
    #     size=values_label_size,
    #     **style,
    # )

ax.set_xlabel(label_from_attrs(ds["inflow_precipitation"]))
ax.set_ylabel(label_from_attrs(ds["source_precipitation"], name_width=20))

fig.tight_layout()

save_figure(fig=fig, filepath=fig_dir / f"{x.name}-{y.name}-scatter")

In [None]:
fig, ax = plt.subplots(1, 1)
# ax_no_ventilation = plt.Axes = axs[1]

x = ds["inflow_precipitation"]
y = -ds["source_precipitation"]

# x_no_ventilation = ds_no_ventilation["inflow_precipitation"]
# y_no_ventilation = - ds_no_ventilation["source_precipitation"]


for mp in microphysics_styles:
    style = microphysics_styles.get_style(mp)
    ax.scatter(
        x.sel(microphysics=mp),
        y.sel(microphysics=mp),
        **style,
    )

# for _ax in axs:
#     _ax.set_xscale("log")
#     _ax.set_yscale("log")
#     _ax.set_xlabel(label_from_attrs(x))
#     _ax.set_ylabel(label_from_attrs(y))


ax.plot(
    x.transpose("microphysics", ...),
    y.transpose("microphysics", ...),
    color="grey",
    alpha=0.5,
)

ax.set_xscale("log")
ax.set_yscale("log")
# ax.set_yscale('symlog', linthresh = 1e-6, linscale = 0.2)
lims = np.array([1e-6, 2.5e1])
ax.set_ylim(lims.min(), lims.max())
ax.set_xlim(1e-4, lims.max())

p_x_values = np.geomspace(lims.min(), lims.max(), 100)

values_label_size = 10

for p in [1, 0.1, 0.01]:
    style = dict(color="grey", alpha=p ** (1 / 5))
    lines = ax.plot(p_x_values, p * p_x_values, "--", linewidth=1, zorder=0, **style)
    line = lines[0]
    _x = 15
    _y = p * _x
    # y = 1e-0
    # x =(1/1.3) * (y / p)
    ax.annotate(
        f"{100 * p:.0f} %",
        xy=(_x, _y),
        xytext=(10, 10),
        textcoords="offset points",
        va="top",
        ha="left",
        size=values_label_size,
        **style,
    )
    _x = 1e-3
    _y = p * _x
    # y = 1e-0
    # x =(1/1.3) * (y / p)
    ax.annotate(
        f"{100 * p:.0f} %",
        xy=(_x, _y),
        xytext=(0, 0),
        textcoords="offset points",
        va="top",
        ha="left",
        size=values_label_size,
        **style,
    )
    # _y = 2e-4
    # _x = _y / p
    # ax.annotate(
    #     f"{100 * p:.0f} %",
    #     xy=(_x, _y),
    #     xytext=(1, 1),
    #     textcoords="offset points",
    #     va="top",
    #     ha="left",
    #     size=values_label_size,
    #     **style,
    # )

ax.set_xlabel(label_from_attrs(ds["inflow_precipitation"]))
ax.set_ylabel(label_from_attrs(ds["source_precipitation"], name_width=20))
# lgd = ax.legend(loc = 'upper left')

# flip legend
handles, labels = ax.get_legend_handles_labels()
order = [3, 2, 1, 0]
legend = ax.legend(
    [handles[idx] for idx in order],
    [labels[idx] for idx in order],
    loc="upper left",
    bbox_to_anchor=(-0.05, 1.05),
    frameon=False,
    facecolor=[0, 0, 0, 0],
)

fig.tight_layout()

save_figure(fig=fig, filepath=fig_dir / f"{x.name}-{y.name}-scatter-ALL")

#### Colored scatter

In [None]:
# fig, ax = plt.subplots(1, 1)

# y = ds["evaporation_fraction"]
# x = -ds["source_precipitation"]
# c = ds["cloud_mass_radius_mean"]

# for mp in ["condensation"]:
#     style = microphysics_styles.get_style(mp).copy()
#     style.pop("color")
#     style.update(alpha=1.0)
#     style["label"] += r" $\mathbf{with} \, f_v$"
#     sc = ax.scatter(
#         x.sel(microphysics=mp),
#         y.sel(microphysics=mp),
#         c=c.sel(microphysics=mp),
#         cmap=strength_cmap,
#         **style,
#     )

# fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20))

# ax.set_xscale("log")
# ax.set_yscale("log")
# ax.set_xlabel(label_from_attrs(x, name_width=20))
# ax.set_ylabel(label_from_attrs(y, name_width=20))
# # ax.legend(loc="lower left")
# fig.tight_layout()

# save_figure(fig=fig, filepath=appendix_fig_dir / f"COLORED-SCATTER-{x.name}-{y.name}-color-{c.name}")

# fig, ax = plt.subplots(1, 1)

# y = ds["evaporation_fraction"]
# x = -ds["source_precipitation"]
# c = ds["cloud_liquid_water_content"]

# for mp in ["condensation"]:
#     style = microphysics_styles.get_style(mp).copy()
#     style.pop("color")
#     style.update(alpha=1.0)
#     style["label"] += r" $\mathbf{with} \, f_v$"
#     sc = ax.scatter(
#         x.sel(microphysics=mp),
#         y.sel(microphysics=mp),
#         c=c.sel(microphysics=mp),
#         cmap=strength_cmap,
#         **style,
#     )

# fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20))

# ax.set_xscale("log")
# ax.set_yscale("log")
# ax.set_xlabel(label_from_attrs(x, name_width=20))
# ax.set_ylabel(label_from_attrs(y, name_width=20))
# # ax.legend(loc="lower left")
# fig.tight_layout()
# save_figure(fig=fig, filepath=appendix_fig_dir / f"COLORED-SCATTER-{x.name}-{y.name}-color-{c.name}")

# fig, ax = plt.subplots(1, 1)

# y = ds["evaporation_fraction"]
# x = -ds["source_precipitation"]
# c = ds["relative_humidity_mean"]

# for mp in ["condensation"]:
#     style = microphysics_styles.get_style(mp).copy()
#     style.pop("color")
#     style.update(alpha=1.0)
#     style["label"] += r" $\mathbf{with} \, f_v$"
#     sc = ax.scatter(
#         x.sel(microphysics=mp),
#         y.sel(microphysics=mp),
#         c=c.sel(microphysics=mp),
#         cmap=strength_cmap,
#         **style,
#     )

# fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20))

# ax.set_xscale("log")
# ax.set_yscale("log")
# ax.set_xlabel(label_from_attrs(x, name_width=20))
# ax.set_ylabel(label_from_attrs(y, name_width=20))
# # ax.legend(loc="lower left")
# fig.tight_layout()
# save_figure(fig=fig, filepath=appendix_fig_dir / f"COLORED-SCATTER-{x.name}-{y.name}-color-{c.name}")

### Relative differnces

In [None]:
def plot_relative_differences(
    ax: plt.Axes,
    y_var_name: str,
    x_var_name: str,
    ds: xr.Dataset,
    ds_sem: xr.Dataset,
    microphysics_list: list = [
        "collision_condensation",
        "coalbure_condensation_small",
        "coalbure_condensation_large",
    ],
):

    y = ds[x_var_name]

    x_all = ds[y_var_name]
    x_refernce = x_all.sel(microphysics="condensation")

    x_sem_all = ds_sem[y_var_name]
    x_sem_refernce = x_sem_all.sel(microphysics="condensation")

    attrs = x_all.attrs.copy()

    # f = (A - B)/B

    # df/dA = 1/B
    # df/dB = -A/B**2

    # df = sqrt((df/dA * dA)**2 + (df/dB * dB)**2)
    # df = sqrt((1/B * dA)**2 + (-A/B**2 * dB)**2)

    A = x_all
    B = x_refernce
    dA = x_sem_all
    dB = x_sem_refernce

    f = (A - B) / B
    df = ((1 / B * dA) ** 2 + (-A / B**2 * dB) ** 2) ** 0.5

    x = f * 100
    x_sem = df * 100

    x.attrs.update(
        long_name=f"{attrs['long_name']} relative difference to {microphysics_styles['condensation']['name']}",
        units=r"\%",
    )

    x, y = y, x
    x = x.sel(microphysics=microphysics_list)
    y = y.sel(microphysics=microphysics_list)

    # x_no_ventilation = ds_no_ventilation["inflow_precipitation"]
    # y_no_ventilation = - ds_no_ventilation["source_precipitation"]

    for mp in microphysics_list:
        style = microphysics_styles.get_style(mp).copy()
        ax.scatter(
            x.sel(microphysics=mp),
            y.sel(microphysics=mp),
            **style,
        )

    # for _ax in axs:
    #     _ax.set_xscale("log")
    #     _ax.set_yscale("log")
    #     _ax.set_xlabel(label_from_attrs(x))
    #     _ax.set_ylabel(label_from_attrs(y))

    ax.plot(
        x.transpose("microphysics", ...),
        y.transpose("microphysics", ...),
        color="grey",
        alpha=0.1,
    )

    ax.axhline(0, color="black", linestyle="--", linewidth=0.5, zorder=0)
    # ax.set_yscale('symlog', linthresh = 1e-6, linscale = 0.2)
    # lims = np.array([1e-6, 2.5e1])
    # ax.set_ylim(lims.min(), lims.max())
    # ax.set_xlim(1e-4, lims.max())

    ax.set_xlabel(label_from_attrs(x))
    ax.set_ylabel(label_from_attrs(y, name_width=20))
    ax.legend()

    return ax


# ax_no_ventilation = plt.Axes = axs[1]

variable_combinations = [
    ("inflow_precipitation", "source_precipitation"),
    ("cloud_mass_radius_mean", "source_precipitation"),
    # ("mean_evaporation_height", "source_precipitation"),
    ("inflow_precipitation", "evaporation_fraction"),
    ("cloud_mass_radius_mean", "evaporation_fraction"),
    # ('mean_evaporation_height', "evaporation_fraction"),
]

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=large_fig_size, sharey=True)

for _ax, (_x, _y) in zip(axs.flatten(order="C"), variable_combinations):

    _ax = plot_relative_differences(
        ax=_ax,
        x_var_name=_x,
        y_var_name=_y,
        ds=ds,
        ds_sem=ds_sem,
        microphysics_list=["collision_condensation", "coalbure_condensation_small"],
    )
    _ax.set_xscale("log")
    _ax.set_yscale("linear")
    _ax.set_ylim(-20, 80)

add_subplotlabel(axs=axs.flatten())

fig.tight_layout()

save_figure(fig=fig, filepath=appendix_fig_dir / f"microphysics-relative-differences-COMBINED-PLOTS")

In [None]:
def plot_relative_differences(
    ax: plt.Axes,
    y_var_name: str,
    x_var_name: str,
    ds: xr.Dataset,
    ds_sem: xr.Dataset,
    microphysics_list: list = [
        "collision_condensation",
        "coalbure_condensation_small",
        "coalbure_condensation_large",
    ],
):

    y = ds[x_var_name]

    x_all = ds[y_var_name]
    x_refernce = x_all.sel(microphysics="condensation")

    x_sem_all = ds_sem[y_var_name]
    x_sem_refernce = x_sem_all.sel(microphysics="condensation")

    attrs = x_all.attrs.copy()

    # f = (A - B)/B

    # df/dA = 1/B
    # df/dB = -A/B**2

    # df = sqrt((df/dA * dA)**2 + (df/dB * dB)**2)
    # df = sqrt((1/B * dA)**2 + (-A/B**2 * dB)**2)

    A = x_all
    B = x_refernce
    dA = x_sem_all
    dB = x_sem_refernce

    f = (A - B) / B
    df = ((1 / B * dA) ** 2 + (-A / B**2 * dB) ** 2) ** 0.5

    x = f * 100
    x_sem = df * 100

    x.attrs.update(
        long_name=f"{attrs['long_name']} relative difference to {microphysics_styles['condensation']['name']}",
        units=r"\%",
    )

    x, y = y, x
    x = x.sel(microphysics=microphysics_list)
    y = y.sel(microphysics=microphysics_list)

    # x_no_ventilation = ds_no_ventilation["inflow_precipitation"]
    # y_no_ventilation = - ds_no_ventilation["source_precipitation"]

    for mp in microphysics_list:
        style = microphysics_styles.get_style(mp).copy()
        ax.scatter(
            x.sel(microphysics=mp),
            y.sel(microphysics=mp),
            **style,
        )

    # for _ax in axs:
    #     _ax.set_xscale("log")
    #     _ax.set_yscale("log")
    #     _ax.set_xlabel(label_from_attrs(x))
    #     _ax.set_ylabel(label_from_attrs(y))

    ax.plot(
        x.transpose("microphysics", ...),
        y.transpose("microphysics", ...),
        color="grey",
        alpha=0.1,
    )

    ax.axhline(0, color="black", linestyle="--", linewidth=0.5, zorder=0)
    # ax.set_yscale('symlog', linthresh = 1e-6, linscale = 0.2)
    # lims = np.array([1e-6, 2.5e1])
    # ax.set_ylim(lims.min(), lims.max())
    # ax.set_xlim(1e-4, lims.max())

    ax.set_xlabel(label_from_attrs(x))
    ax.set_ylabel(label_from_attrs(y, name_width=20))
    ax.legend()

    return ax


# ax_no_ventilation = plt.Axes = axs[1]

variable_combinations = [
    ("inflow_precipitation", "source_precipitation"),
    ("cloud_mass_radius_mean", "source_precipitation"),
    # ("mean_evaporation_height", "source_precipitation"),
    ("inflow_precipitation", "evaporation_fraction"),
    ("cloud_mass_radius_mean", "evaporation_fraction"),
    # ('mean_evaporation_height', "evaporation_fraction"),
]

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=large_fig_size, sharey=True)

for _ax, (_x, _y) in zip(axs.flatten(order="C"), variable_combinations):

    _ax = plot_relative_differences(
        ax=_ax,
        x_var_name=_x,
        y_var_name=_y,
        ds=ds,
        ds_sem=ds_sem,
        microphysics_list=[
            "collision_condensation",
            "coalbure_condensation_small",
            "coalbure_condensation_large",
        ],
    )
    _ax.set_xscale("log")
    _ax.set_yscale("linear")
    _ax.set_ylim(-20, 80)

add_subplotlabel(axs=axs.flatten())

fig.tight_layout()

save_figure(fig=fig, filepath=appendix_fig_dir / f"microphysics-relative-differences-COMBINED-PLOTS-all")

## Evaporation Profiles

In [None]:
y_ticks = np.arange(0, 1.01, 0.25)

fig = plt.figure()
gs = fig.add_gridspec(nrows=1, ncols=1)

ax = fig.add_subplot(gs[:, :])

plot_microphysics = ["condensation"]

x = -ds_normalized["evaporation_rate_energy"]
x_sem = -ds_normalized_sem["evaporation_rate_energy"]
attrs = x.attrs.copy()
y = ds_normalized["normalized_gridbox_coord3"]

for mp in plot_microphysics:
    _x = x.sel(microphysics=mp)
    _x_sem = x_sem.sel(microphysics=mp)
    md_mean = _x.mean("cloud_id")
    md_sem = propagate_mean_std(_x, _x_sem, dim="cloud_id")
    style_full = microphysics_styles[mp].copy()

    ax.plot(
        _x.T,
        y.T,
        color=adjust_lightness("grey", 1.5),
        alpha=0.3,
        zorder=1,
    )

    ax.set_yticks(y_ticks)
    ax.set_yticklabels([])

    # mean and std
    # # median and IQR

    ax.plot(
        _x.median("cloud_id"),
        y,
        label=style_full["name"] + " Median",
        color=style_full["dark_color"],
        linestyle="-",
        zorder=4,
    )
    ax.fill_betweenx(
        y,
        _x.quantile(0.25, "cloud_id"),
        _x.quantile(0.75, "cloud_id"),
        alpha=0.3,
        color=adjust_lightness(style_full["light_color"], 1.5),
        zorder=2,
        label=style_full["name"] + " IQR",
    )

    ax.plot(
        md_mean,
        y,
        label=style_full["name"] + " Mean",
        color=style_full["color"],
        linestyle="--",
        zorder=4,
    )
    # ax.fill_betweenx(
    #     y,
    #     md_mean - md_sem,
    #     md_mean + md_sem,
    #     alpha=0.3,
    #     color=adjust_lightness(style_full["light_color"], 1.7),
    #     zorder=1,
    #     label=" Mean ± Std.Dev",
    # )


ax.set_yticks(y_ticks, y_ticks)
ax.legend(loc="upper right")
ax.set_xlim(0, None)
ax.set_ylim(0, 1)

# ax.axvline(0, color="k", linestyle="--", alpha=1, zorder=10)
# ax.set_xscale('log')
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y, return_units=False, name_width=25))
# fig.suptitle("Evaporation profiles | Difference to EvapOnly")

fig.tight_layout()
save_figure(fig, fig_dir / f"profiles-{x.name}-{y.name}-LINEAR")

In [None]:
y_ticks = np.arange(0, 1.01, 0.25)

fig = plt.figure(figsize=wide_fig_size)
gs = fig.add_gridspec(nrows=1, ncols=1)

ax = fig.add_subplot(gs[:, :])

plot_microphysics = ["condensation"]

x = -ds_normalized["evaporation_rate_energy"]
x_sem = -ds_normalized_sem["evaporation_rate_energy"]
attrs = x.attrs.copy()
y = ds_normalized["normalized_gridbox_coord3"]

c = ds_normalized["liquid_water_content"]

norm = mcolors.Normalize(vmin=0, vmax=ds["cloud_liquid_water_content"].max().data)


for mp in plot_microphysics:
    _x = x.sel(microphysics=mp)
    _x_sem = x_sem.sel(microphysics=mp)
    _y = y

    # select all but the top most gridboxes
    _x = _x.sel(normalized_gridbox_coord3=slice(0, 0.99))
    _y = _y.sel(normalized_gridbox_coord3=slice(0, 0.99))
    _c = c.sel(normalized_gridbox_coord3=slice(0, 0.99))

    md_mean = _x.mean("cloud_id")
    md_sem = propagate_mean_std(_x, _x_sem, dim="cloud_id")
    style_full = microphysics_styles[mp].copy()

    _xx = _x
    _yy = _y.expand_dims(cloud_id=_x["cloud_id"])
    _cc = _c.sel(microphysics=mp)

    # plot colored lines of individual clouds
    # ax.plot(
    #     _x.T,
    #     _y.T,
    #     color=adjust_lightness("grey", 1.5),
    #     alpha=0.3,
    #     zorder=1,
    # )

    sc = ax.scatter(
        _xx,
        _yy,
        c=_cc,
        s=0,
        alpha=1,
        marker=".",
        cmap=strength_cmap,
        norm=norm,
    )

    for cloud_id in x["cloud_id"]:

        # gridboxes = slice(0, ds['max_gridbox'].sel(cloud_id=cloud_id, microphysics="condensation") - 1)

        # xx = np.flip(x.sel(cloud_id=cloud_id, gridbox = gridboxes).data)
        # yy = np.flip(y.sel(cloud_id=cloud_id, gridbox = gridboxes).data)
        # cc = np.flip(c.sel(cloud_id=cloud_id, gridbox = gridboxes).data)

        xx = np.flip(_xx.sel(cloud_id=cloud_id).data)
        yy = np.flip(_yy.sel(cloud_id=cloud_id).data)
        cc = np.flip(_cc.sel(cloud_id=cloud_id).data)

        rng = np.arange(0, len(xx) - 1)
        lines = [[(xx[i], yy[i]), (xx[i + 1], yy[i + 1])] for i in rng]
        colors = strength_cmap(norm(cc[rng]))

        lc = mcollections.LineCollection(segments=lines, colors=colors)  # Use a random colormap
        lc.set_linewidth(0.75)  # Set line width
        lc.set_alpha(1)  # Set line width
        ax.add_collection(lc)  # Add the line collection to the axes

    # ax.plot(
    #     _x.T,
    #     _y.T,
    #     color=adjust_lightness("grey", 1.5),
    #     alpha=0.3,
    # )

    ax.set_yticks(y_ticks)
    ax.set_yticklabels([])

    # mean and std
    # # median and IQR

    ax.plot(
        _x.median("cloud_id"),
        _y,
        label=style_full["name"] + " Median",
        color=style_full["dark_color"],
        linestyle="-",
        zorder=4,
    )
    ax.fill_betweenx(
        _y,
        _x.quantile(0.25, "cloud_id"),
        _x.quantile(0.75, "cloud_id"),
        alpha=0.3,
        color=adjust_lightness(style_full["light_color"], 1.5),
        zorder=2,
        label=style_full["name"] + " IQR",
    )

    ax.plot(
        md_mean,
        _y,
        label=style_full["name"] + " Mean",
        color=style_full["color"],
        linestyle="--",
        zorder=4,
    )

ax.set_yticks(y_ticks, y_ticks)
ax.legend(loc="upper right")
# ax.set_xscale("log")

ax.set_xlim(0, None)
ax.set_ylim(0, 1)

# ax.axvline(0, color="k", linestyle="--", alpha=1, zorder=10)
# ax.set_xscale('log')
ax.set_xlabel(label_from_attrs(x))
ax.set_ylabel(label_from_attrs(y, return_units=False, name_width=25))

fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20, linebreak=True))

fig.tight_layout()

save_figure(fig, fig_dir / f"vertical-profiles-{x.name}-{y.name}-color-{c.name}")

### Differences between microphysics 

In [None]:
x_all = -ds_normalized["evaporation_rate_energy"]
x_refernce = x_all.sel(microphysics="condensation")

x_sem_all = ds_normalized_sem["evaporation_rate_energy"]
x_sem_refernce = x_sem_all.sel(microphysics="condensation")

attrs = x_all.attrs.copy()

# f = (A - B)/B

# df/dA = 1/B
# df/dB = -A/B**2

# df = sqrt((df/dA * dA)**2 + (df/dB * dB)**2)
# df = sqrt((1/B * dA)**2 + (-A/B**2 * dB)**2)

A = x_all
B = x_refernce
dA = x_sem_all
dB = x_sem_refernce

f = (A - B) / B
df = ((1 / B * dA) ** 2 + (-A / B**2 * dB) ** 2) ** 0.5

x = f * 100
x_sem = df * 100

x.attrs.update(
    long_name=f"Relative difference of {attrs['long_name']} compared to {microphysics_styles['condensation']['name']}",
    units=r"\%",
)

y = ds_normalized["normalized_gridbox_coord3"]


y_ticks = [0, 0.5, 1]

fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(5, 5), sharex=True, sharey=True)


plot_microphysics = [
    "collision_condensation",
    "coalbure_condensation_small",
    "coalbure_condensation_large",
]

for _ax, mp in zip(axs, plot_microphysics):

    _x = x.sel(microphysics=mp)
    _x_std = x_sem.sel(microphysics=mp)
    md_mean = _x.mean("cloud_id")
    md_std = propagate_mean_std(_x, _x_std, dim="cloud_id")

    style_full = microphysics_styles[mp].copy()

    _ax.set_title(microphysics_styles.get_setup(mp)["name"])

    _ax.plot(
        md_mean,
        y,
        color=style_full["dark_color"],
        linestyle="--",
        label="Mean",
        zorder=10,
    )
    _ax.plot(
        _x.median("cloud_id"),
        y,
        color=style_full["dark_color"],
        linestyle="-",
        label="Median",
        zorder=10,
    )

    _ax.fill_betweenx(
        y,
        _x.quantile(0.1, "cloud_id"),
        _x.quantile(0.9, "cloud_id"),
        zorder=1,
        color=adjust_lightness(style_full["light_color"], 1.5),
        label="10-90%",
    )
    _ax.fill_betweenx(
        y,
        _x.quantile(0.25, "cloud_id"),
        _x.quantile(0.75, "cloud_id"),
        zorder=2,
        color=adjust_lightness(style_full["light_color"], 1.4),
        label="25-75%",
    )
    _ax.fill_betweenx(
        y,
        _x.quantile(0.33, "cloud_id"),
        _x.quantile(0.66, "cloud_id"),
        zorder=3,
        color=adjust_lightness(style_full["light_color"], 1.3),
        label="33-66%",
    )


for _ax in axs:
    _ax.axvline(0, color="k", linestyle="--", alpha=0.5, zorder=10)
    _ax.set_xlim(-15, 15)
    _ax.set_ylim(0, 1)
    _ax.set_yticks(y_ticks)

fig.supxlabel(label_from_attrs(x, name_width=40))
fig.supylabel(label_from_attrs(y))
# fig.suptitle("Evaporation profiles | Difference to EvapOnly")
fig.tight_layout()

add_subplotlabel(axs=list(axs))

fig.tight_layout()

save_figure(fig, fig_dir / "evaporation_profiles_diff-percentile-normalized-NO-COMBINATION")

In [None]:
x_all = -ds_normalized["evaporation_rate_energy"]
x_refernce = x_all.sel(microphysics="condensation")

x_sem_all = ds_normalized_sem["evaporation_rate_energy"]
x_sem_refernce = x_sem_all.sel(microphysics="condensation")

attrs = x_all.attrs.copy()

# f = (A - B)/B

# df/dA = 1/B
# df/dB = -A/B**2

# df = sqrt((df/dA * dA)**2 + (df/dB * dB)**2)
# df = sqrt((1/B * dA)**2 + (-A/B**2 * dB)**2)

A = x_all
B = x_refernce
dA = x_sem_all
dB = x_sem_refernce

f = (A - B) / B
df = ((1 / B * dA) ** 2 + (-A / B**2 * dB) ** 2) ** 0.5

x = f * 100
x_sem = df * 100

x.attrs.update(
    long_name=f"Relative difference of {attrs['long_name']} compared to {microphysics_styles['condensation']['name']}",
    units=r"\%",
)

y = ds_normalized["normalized_gridbox_coord3"]


y_ticks = [0, 0.5, 1]

fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(5, 5), sharex=True, sharey=True)


plot_microphysics = [
    "collision_condensation",
    "coalbure_condensation_small",
    "coalbure_condensation_large",
]

for _ax, mp in zip(axs, plot_microphysics):

    _x = x.sel(microphysics=mp)
    _x_std = x_sem.sel(microphysics=mp)
    md_mean = _x.mean("cloud_id")
    md_std = propagate_mean_std(_x, _x_std, dim="cloud_id")

    style_full = microphysics_styles[mp].copy()

    _ax.set_title(microphysics_styles.get_setup(mp)["name"])

    _ax.plot(
        _x.T,
        y.T,
        color=adjust_lightness("grey", 1.2),
        alpha=0.3,
        linewidth=0.5,
        zorder=5,
    )
    _ax.plot(
        md_mean,
        y,
        color=style_full["dark_color"],
        linestyle="--",
        label="Mean",
        zorder=10,
    )
    _ax.plot(
        _x.median("cloud_id"),
        y,
        color=style_full["dark_color"],
        linestyle="-",
        label="Median",
        zorder=10,
    )

    _ax.fill_betweenx(
        y,
        _x.quantile(0.1, "cloud_id"),
        _x.quantile(0.9, "cloud_id"),
        zorder=1,
        color=adjust_lightness(style_full["light_color"], 1.5),
        label="10-90%",
    )
    _ax.fill_betweenx(
        y,
        _x.quantile(0.25, "cloud_id"),
        _x.quantile(0.75, "cloud_id"),
        zorder=2,
        color=adjust_lightness(style_full["light_color"], 1.4),
        label="25-75%",
    )
    _ax.fill_betweenx(
        y,
        _x.quantile(0.33, "cloud_id"),
        _x.quantile(0.66, "cloud_id"),
        zorder=3,
        color=adjust_lightness(style_full["light_color"], 1.2),
        label="33-66%",
    )


for _ax in axs:
    _ax.axvline(0, color="k", linestyle="--", alpha=0.5, zorder=10)
    _ax.set_xlim(-15, 15)
    _ax.set_ylim(0, 1)
    _ax.set_yticks(y_ticks)

fig.supxlabel(label_from_attrs(x, name_width=40))
fig.supylabel(label_from_attrs(y))
# fig.suptitle("Evaporation profiles | Difference to EvapOnly")
fig.tight_layout()

add_subplotlabel(axs=list(axs))

fig.tight_layout()

save_figure(
    fig, appendix_fig_dir / "evaporation_profiles_diff-percentile-normalized-NO-COMBINATION-individual"
)

## Correlations

### Calculate correlation coefficients

linear

In [None]:
correlation_vars = (
    "cloud_mass_radius_mean",
    "cloud_liquid_water_content",
    "inflow_precipitation",
    "relative_humidity_mean",
    "cloud_base_height",
)

correlated_var = -ds["source_precipitation"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(correlated_var, x, dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_CIE = xr.Dataset(correlations)

correlated_var = ds["evaporation_fraction"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(correlated_var, x, dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_EF = xr.Dataset(correlations)

correlated_var = ds["mean_evaporation_height"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(correlated_var, x, dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_MEH = xr.Dataset(correlations)

logarithmic

In [None]:
correlation_vars = (
    "cloud_mass_radius_mean",
    "cloud_liquid_water_content",
    "inflow_precipitation",
    "relative_humidity_mean",
    "cloud_base_height",
)

correlated_var = -ds["source_precipitation"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(np.log(correlated_var), np.log(x), dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_log_CIE = xr.Dataset(correlations)

correlated_var = ds["evaporation_fraction"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(np.log(correlated_var), np.log(x), dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_log_EF = xr.Dataset(correlations)

correlated_var = ds["mean_evaporation_height"]
correlations = dict()
for var in correlation_vars:
    x = ds[var]
    correlation = xr.corr(np.log(correlated_var), np.log(x), dim="cloud_id")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations_log_MEH = xr.Dataset(correlations)

In [None]:
fig, axs = plt.subplots(ncols=2)

axs_ef: plt.Axes = axs[0]
axs_cie: plt.Axes = axs[1]


y = ds["evaporation_fraction"].sel(microphysics="condensation")
x = ds["cloud_mass_radius_mean"].sel(microphysics="condensation")
correlation = ds_correlations_EF["cloud_mass_radius_mean"].sel(microphysics="condensation")
axs_ef.set_title(f"R = {correlation.data:.2f}")
axs_ef.scatter(
    x,
    y,
    **microphysics_styles.get_style("condensation"),
)
axs_ef.set_xlabel(label_from_attrs(x, name_width=20))
axs_ef.set_ylabel(label_from_attrs(y, name_width=20))

y = -ds["source_precipitation"].sel(microphysics="condensation")
x = ds["cloud_liquid_water_content"].sel(microphysics="condensation")
correlation = ds_correlations_CIE["cloud_liquid_water_content"].sel(microphysics="condensation")
axs_cie.set_title(f"R = {correlation.data:.2f}")
axs_cie.scatter(
    x,
    y,
    **microphysics_styles.get_style("condensation"),
)
axs_cie.set_xlabel(label_from_attrs(x, name_width=20))
axs_cie.set_ylabel(label_from_attrs(y, name_width=20))

add_subplotlabel(axs=axs)

fig.tight_layout()

save_figure(fig, fig_dir / "leading-correlation-evaporation_fraction-source_precipitation")

### All correlations as a mosaic

both 

In [None]:
fig, axs = plt.subplots(nrows=3, ncols=len(correlation_vars), figsize=(2 * len(correlation_vars), 7.5))

axs_ef: Tuple[plt.Axes, plt.Axes, plt.Axes] = axs[0]
axs_ef[1].sharey(axs_ef[0])
axs_ef[2].sharey(axs_ef[0])

axs_cie: Tuple[plt.Axes, plt.Axes, plt.Axes] = axs[1]
axs_cie[1].sharey(axs_cie[0])
axs_cie[2].sharey(axs_cie[1])

axs_meh: Tuple[plt.Axes, plt.Axes, plt.Axes] = axs[2]
axs_meh[1].sharey(axs_meh[0])
axs_meh[2].sharey(axs_meh[1])

# for the evaporation fraction
for i, var in enumerate(correlation_vars):
    y = ds["evaporation_fraction"].sel(microphysics="condensation")
    x = ds[var].sel(microphysics="condensation")
    correlation = ds_correlations_EF[var].sel(microphysics="condensation")
    correlation_log = ds_correlations_log_EF[var].sel(microphysics="condensation")
    axs_ef[i].set_title(
        r"$R$" + f"={correlation.data:.2f}\n" + r"$R_{log}$" + f"={correlation_log.data:.2f}"
    )
    axs_ef[i].scatter(
        x,
        y,
        **microphysics_styles.get_style("condensation"),
    )
    axs_ef[i].set_xlabel(label_from_attrs(x, name_width=20, linebreak=True))

axs_ef[0].set_ylabel(label_from_attrs(y, name_width=15))

# for the column integrate evaporation
for i, var in enumerate(correlation_vars):
    y = -ds["source_precipitation"].sel(microphysics="condensation")
    x = ds[var].sel(microphysics="condensation")
    correlation = ds_correlations_CIE[var].sel(microphysics="condensation")
    correlation_log = ds_correlations_log_CIE[var].sel(microphysics="condensation")
    axs_cie[i].set_title(
        r"$R$" + f"={correlation.data:.2f}\n" + r"$R_{log}$" + f"={correlation_log.data:.2f}"
    )
    axs_cie[i].scatter(
        x,
        y,
        **microphysics_styles.get_style("condensation"),
    )
    axs_cie[i].set_xlabel(label_from_attrs(x, name_width=20, linebreak=True))

axs_cie[0].set_ylabel(label_from_attrs(y, name_width=15))

# for the mean evaporation height
for i, var in enumerate(correlation_vars):
    y = ds["mean_evaporation_height"].sel(microphysics="condensation")
    x = ds[var].sel(microphysics="condensation")
    correlation = ds_correlations_MEH[var].sel(microphysics="condensation")
    correlation_log = ds_correlations_log_MEH[var].sel(microphysics="condensation")
    axs_meh[i].set_title(
        r"$R$" + f"={correlation.data:.2f}\n" + r"$R_{log}$" + f"={correlation_log.data:.2f}"
    )
    axs_meh[i].scatter(
        x,
        y,
        **microphysics_styles.get_style("condensation"),
    )
    axs_meh[i].set_xlabel(label_from_attrs(x, name_width=20, linebreak=True))

axs_meh[0].set_ylabel(label_from_attrs(y, name_width=15))

add_subplotlabel(axs=axs.flatten(), location="upper center")

fig.tight_layout()
save_figure(fig, appendix_fig_dir / "all-correlations")

### Vertical correlations

In [None]:
ds_normalized.dims



In [None]:
fig, ax = plt.subplots(ncols=1, nrows=1)

y = ds["relative_humidity"]
x = ds["liquid_water_content"]
c = -ds["evaporation_rate_energy"]
# c = c / c.max('gridbox')

# y = ds_normalized["normalized_gridbox_coord3"].expand_dims({"microphysics" : ds_normalized["microphysics"], 'cloud_id' : ds_normalized["cloud_id"]})
# c = ds_normalized["relative_humidity"]
# x = -ds_normalized["evaporation_rate_energy"]

x = x.sel(microphysics="condensation")
y = y.sel(microphysics="condensation")
c = c.sel(microphysics="condensation")
style = microphysics_styles.get_style("condensation").copy()

norm = mcolors.Normalize(vmin=c.min(), vmax=c.max())

style.pop("color")
style.update(marker=".")
style.update(alpha=0.75)
sc = ax.scatter(
    x,
    y,
    c=c,
    s=0,
    alpha=1,
    marker=".",
    cmap=strength_cmap,
    norm=norm,
)

for cloud_id in x["cloud_id"]:

    gridboxes = slice(0, ds["max_gridbox"].sel(cloud_id=cloud_id, microphysics="condensation") - 1)

    xx = np.flip(x.sel(cloud_id=cloud_id, gridbox=gridboxes).data)
    yy = np.flip(y.sel(cloud_id=cloud_id, gridbox=gridboxes).data)
    cc = np.flip(c.sel(cloud_id=cloud_id, gridbox=gridboxes).data)

    # xx = np.flip(x.sel(cloud_id=cloud_id).data)
    # yy = np.flip(y.sel(cloud_id=cloud_id).data)
    # cc = np.flip(c.sel(cloud_id=cloud_id).data)

    rng = np.arange(0, len(xx) - 1)
    lines = [[(xx[i], yy[i]), (xx[i + 1], yy[i + 1])] for i in rng]
    colors = strength_cmap(norm(cc[rng]))

    lc = mcollections.LineCollection(segments=lines, colors=colors)  # Use a random colormap
    lc.set_linewidth(2)  # Set line width
    lc.set_alpha(0.75)  # Set line width
    ax.add_collection(lc)  # Add the line collection to the axes

fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20, linebreak=True))

ax.set_xscale("log")
# ax.set_yscale("log")
ax.set_xlabel(label_from_attrs(x, name_width=20))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="lower left")
ax.set_xlim(8e-6, None)
fig.tight_layout()

save_figure(fig, fig_dir / f"scatter-{x.name}-{y.name}-color-{c.name}")

In [None]:
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(8, 3))

y = ds["relative_humidity"]
x = ds["liquid_water_content"]
c = -ds["evaporation_rate_energy"]
# c = c / c.max('gridbox')

# y = ds_normalized["normalized_gridbox_coord3"].expand_dims({"microphysics" : ds_normalized["microphysics"], 'cloud_id' : ds_normalized["cloud_id"]})
# c = ds_normalized["relative_humidity"]
# x = -ds_normalized["evaporation_rate_energy"]

x = x.sel(microphysics="condensation")
y = y.sel(microphysics="condensation")
c = c.sel(microphysics="condensation")
style = microphysics_styles.get_style("condensation").copy()

norm = mcolors.Normalize(vmin=c.min(), vmax=c.max())

style.pop("color")
style.update(marker=".")
style.update(alpha=0.75)
sc = ax.scatter(
    x,
    y,
    c=c,
    s=0,
    alpha=1,
    marker=".",
    cmap=strength_cmap,
    norm=norm,
)

for cloud_id in x["cloud_id"]:

    gridboxes = slice(0, ds["max_gridbox"].sel(cloud_id=cloud_id, microphysics="condensation") - 1)

    xx = np.flip(x.sel(cloud_id=cloud_id, gridbox=gridboxes).data)
    yy = np.flip(y.sel(cloud_id=cloud_id, gridbox=gridboxes).data)
    cc = np.flip(c.sel(cloud_id=cloud_id, gridbox=gridboxes).data)

    # xx = np.flip(x.sel(cloud_id=cloud_id).data)
    # yy = np.flip(y.sel(cloud_id=cloud_id).data)
    # cc = np.flip(c.sel(cloud_id=cloud_id).data)

    rng = np.arange(0, len(xx) - 1)
    lines = [[(xx[i], yy[i]), (xx[i + 1], yy[i + 1])] for i in rng]
    colors = strength_cmap(norm(cc[rng]))

    lc = mcollections.LineCollection(segments=lines, colors=colors)  # Use a random colormap
    lc.set_linewidth(2)  # Set line width
    lc.set_alpha(0.75)  # Set line width
    ax.add_collection(lc)  # Add the line collection to the axes

fig.colorbar(sc, ax=ax, label=label_from_attrs(c, name_width=20, linebreak=True))

ax.set_xscale("log")
# ax.set_yscale("log")
ax.set_xlabel(label_from_attrs(x, name_width=20))
ax.set_ylabel(label_from_attrs(y, name_width=20))
# ax.legend(loc="lower left")
ax.set_xlim(8e-6, None)
fig.tight_layout()

save_figure(fig, fig_dir / f"scatter-{x.name}-{y.name}-color-{c.name}-wide")

In [None]:
x_vars = ("relative_humidity", "liquid_water_content", "mass_radius_mean")

correlated_var = -ds["evaporation_rate_energy"].transpose("gridbox", ...)

correlations = dict()
for var in x_vars:
    x = ds[var].transpose("gridbox", ...)
    correlation = xr.corr(correlated_var, x, dim="gridbox")
    correlations[var] = correlation

# store correlations in dataset
ds_correlations = xr.Dataset(correlations)

In [None]:
fig, ax = plt.subplots()

bins = np.arange(-1, 1.1, 0.25)

y = ds_correlations.sel(microphysics="condensation")

m, s = y.mean("cloud_id"), y.std("cloud_id")
labels = [label_from_attrs(ds[key], return_units=False) for key in ds_correlations.data_vars.keys()]
labels[-1] = "Mean mass radius"

for i, key in enumerate(ds_correlations.data_vars.keys()):
    labels[i] += rf" : {m[key].data:.2f}$\pm${s[key].data:.2f}"

data = y.data_vars.values()

colors = [
    adjust_lightness(default_colors[1], 0.75),
    adjust_lightness(default_colors[1], 1.1),
    adjust_lightness(default_colors[1], 1.3),
]

n, b, p = ax.hist(x=data, bins=bins, label=labels, color=colors, alpha=1, align="mid")

ax.set_xticks(b)
ax.tick_params(axis="x", rotation=90)

ax.set_xlabel(
    f"Correlation with {label_from_attrs(correlated_var, return_units = False)} along altitude"
)
ax.set_ylabel("Number of clouds")
ax.legend()

fig.tight_layout()

save_figure(fig=fig, filepath=fig_dir / f"vertical-correlation-{correlated_var.name}-histograms")

In [None]:
fig, ax = plt.subplots(figsize=small_fig_size)

keys = [
    "mass_radius_mean",
    "liquid_water_content",
    "relative_humidity",
]

y = ds_correlations.sel(microphysics="condensation")
data = np.array([y[key].data for key in keys])

m, s = y.mean("cloud_id"), y.std("cloud_id")
labels = [label_from_attrs(ds[key], return_units=False) for key in keys]
labels[0] = "Mean mass radius"

# for i, key in enumerate(ds_correlations.data_vars.keys()):
#     labels[i] += rf" : {m[key].data:.2f}$\pm${s[key].data:.2f}"


colors = [
    adjust_lightness(default_colors[1], 1.3),
    adjust_lightness(default_colors[1], 1.1),
    adjust_lightness(default_colors[1], 0.75),
]


def adjacent_values(vals, q1, q3):
    upper_adjacent_value = q3 + (q3 - q1) * 1.5
    upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])

    lower_adjacent_value = q1 - (q3 - q1) * 1.5
    lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
    return lower_adjacent_value, upper_adjacent_value


quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1)
means, stds = np.mean(data, axis=1), np.std(data, axis=1)
inds = np.arange(1, len(medians) + 1)
for m, i, c in zip(means, inds, colors):
    ax.barh(y=i, width=m, height=0.5, color=c)

ax.scatter(medians, inds, marker="o", color="grey", s=30, zorder=8)
ax.scatter(quartile1, inds, marker="|", color="grey", s=30, zorder=8)
ax.scatter(quartile3, inds, marker="|", color="grey", s=30, zorder=8)
ax.hlines(inds, quartile1, quartile3, color="grey", linestyle="--", lw=1, zorder=4)

ax.scatter(means, inds, marker="X", color="k", s=30, zorder=10)
ax.scatter(means - stds, inds, marker="|", color="k", s=30, zorder=10)
ax.scatter(means + stds, inds, marker="|", color="k", s=30, zorder=10)
ax.hlines(inds, means - stds, means + stds, color="k", linestyle="-", lw=1, zorder=3)


# ax.set_xlim(-1, 1)
ax.set_ylim(0.5, len(medians) + 0.5)
ax.set_yticks(inds, labels)
ax.set_xticks(np.arange(-1, 1.1, 0.5))
ax.xaxis.set_tick_params(rotation=90)

ax.set_xlabel(
    f"\n".join(
        textwrap.wrap(
            f"Correlation with {label_from_attrs(correlated_var, return_units = False)} along altitude",
            30,
        )
    )
)
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=small_fig_size)

keys = [
    "mass_radius_mean",
    "liquid_water_content",
    "relative_humidity",
]

y = ds_correlations.sel(microphysics="condensation")
data = np.array([y[key].data for key in keys])

m, s = y.mean("cloud_id"), y.std("cloud_id")
labels = [label_from_attrs(ds[key], return_units=False) for key in keys]
labels[0] = "Mean mass radius"

# for i, key in enumerate(ds_correlations.data_vars.keys()):
#     labels[i] += rf" : {m[key].data:.2f}$\pm${s[key].data:.2f}"


colors = [
    adjust_lightness(default_colors[1], 1.3),
    adjust_lightness(default_colors[1], 1.1),
    adjust_lightness(default_colors[1], 0.75),
]


def adjacent_values(vals, q1, q3):
    upper_adjacent_value = q3 + (q3 - q1) * 1.5
    upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])

    lower_adjacent_value = q1 - (q3 - q1) * 1.5
    lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
    return lower_adjacent_value, upper_adjacent_value


parts = ax.violinplot(
    data.T, showmeans=False, showmedians=False, showextrema=False, orientation="horizontal"
)

for pc, color in zip(parts["bodies"], colors):
    pc.set_facecolor(color)
    pc.set_edgecolor("None")
    pc.set_alpha(1)

quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1)
means, stds = np.mean(data, axis=1), np.std(data, axis=1)
whiskers = np.array(
    [adjacent_values(sorted_array, q1, q3) for sorted_array, q1, q3 in zip(data.T, quartile1, quartile3)]
)
whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1]

inds = np.arange(1, len(medians) + 1)

ax.scatter(medians, inds, marker="o", color="grey", s=30, zorder=8)
ax.scatter(quartile1, inds, marker="|", color="grey", s=30, zorder=8)
ax.scatter(quartile3, inds, marker="|", color="grey", s=30, zorder=8)
ax.hlines(inds, quartile1, quartile3, color="grey", linestyle="--", lw=1, zorder=4)

ax.scatter(means, inds, marker="X", color="k", s=30, zorder=10)
ax.scatter(means - stds, inds, marker="|", color="k", s=30, zorder=10)
ax.scatter(means + stds, inds, marker="|", color="k", s=30, zorder=10)
ax.hlines(inds, means - stds, means + stds, color="k", linestyle="-", lw=1, zorder=3)


# ax.set_xlim(-1, 1)
ax.set_ylim(0.5, len(medians) + 0.5)
ax.set_yticks(inds, labels)
ax.set_xticks(np.arange(-1, 1.1, 0.5))
ax.xaxis.set_tick_params(rotation=90)

ax.set_xlabel(
    f"\n".join(
        textwrap.wrap(
            f"Correlation with {label_from_attrs(correlated_var, return_units = False)} along altitude",
            30,
        )
    )
)
fig.tight_layout()

# check on the DSDs evolution along altitude

In [None]:
plt.imshow(xi)

<matplotlib.image.AxesImage at 0x7ffb17adaf90>

In [None]:
from sdm_eurec4a.conversions import msd_from_psd

In [None]:
561,

(561,)

In [None]:
cloud_id = np.random.choice(ds["cloud_id"].data)
selected_ds = ds.sel(microphysics="condensation").sel(cloud_id=cloud_id)
selected_ds = selected_ds.sel(gridbox=slice(0, selected_ds["max_gridbox"]))

xi = (
    msd_from_psd(selected_ds, psd_name="xi_temporal_mean", scale_name="radius_bins", scale_factor=1e-6)
    / selected_ds["gridbox_volume"]
)
xi.attrs.update(
    long_name="Mean size distribution",
    units="kg m^{-3} um^{-1}",
    description="Mean size distribution computed from the temporal mean of the PSD xi divided by the gridbox volume",
)
fig, axs = plt.subplot_mosaic(
    [
        ["dsd", "evap", "mmr", "relhum", "rwc"],
    ],
    figsize=(12, 4),
    gridspec_kw={"width_ratios": [3, 1, 1, 1, 1]},
    sharey=True,
)

axs["dsd"].pcolormesh(selected_ds["radius_bins"], selected_ds["gridbox_coord3"], xi, cmap=strength_cmap)
axs["dsd"].set_xscale("log")
axs["dsd"].set_xlim(1e2, 3e3)
axs["dsd"].set_xlabel("Radius in um")
axs["dsd"].set_ylabel(label_from_attrs(selected_ds["gridbox_coord3"], name_width=20))
fig.colorbar(axs["dsd"].collections[0], ax=axs["dsd"], label=label_from_attrs(xi, linebreak=True))

for name, var_name in zip(
    ["evap", "mmr", "relhum", "rwc"],
    ["evaporation_rate_energy", "mass_radius_mean", "relative_humidity", "liquid_water_content"],
):
    data = selected_ds[var_name]
    if var_name == "evaporation_rate_energy":
        data = -data
    axs[name].plot(
        data,
        selected_ds["gridbox_coord3"],
    )
    # axs[name].set_title(label_from_attrs(selected_ds[data], name_width=10))
    axs[name].set_xlabel(label_from_attrs(selected_ds[var_name], name_width=10))
    axs[name].set_yticks([])
    axs[name].xaxis.set_tick_params(rotation=-45)

fig.suptitle(f"Cloud ID: {cloud_id.item()}")
fig.tight_layout()

In [None]:
cloud_id = np.random.choice(ds["cloud_id"].data)
selected_ds = ds.sel(microphysics="condensation").sel(cloud_id=cloud_id)
selected_ds = selected_ds.sel(gridbox=slice(0, selected_ds["max_gridbox"]))

xi = (
    msd_from_psd(selected_ds, psd_name="xi_temporal_mean", scale_name="radius_bins", scale_factor=1e-6)
    / selected_ds["gridbox_volume"]
)
xi.attrs.update(
    long_name="Mean size distribution",
    units="kg m^{-3} um^{-1}",
    description="Mean size distribution computed from the temporal mean of the PSD xi divided by the gridbox volume",
)
fig, axs = plt.subplot_mosaic(
    [
        ["dsd", "evap", "mmr", "relhum", "rwc"],
    ],
    figsize=(12, 4),
    gridspec_kw={"width_ratios": [3, 1, 1, 1, 1]},
    sharey=True,
)

axs["dsd"].pcolormesh(selected_ds["radius_bins"], selected_ds["gridbox_coord3"], xi, cmap=strength_cmap)
axs["dsd"].set_xscale("log")
axs["dsd"].set_xlim(1e2, 3e3)
axs["dsd"].set_xlabel("Radius in um")
axs["dsd"].set_ylabel(label_from_attrs(selected_ds["gridbox_coord3"], name_width=20))
fig.colorbar(axs["dsd"].collections[0], ax=axs["dsd"], label=label_from_attrs(xi, linebreak=True))

for name, var_name in zip(
    ["evap", "mmr", "relhum", "rwc"],
    ["evaporation_rate_energy", "mass_radius_mean", "relative_humidity", "liquid_water_content"],
):
    data = selected_ds[var_name]
    if var_name == "evaporation_rate_energy":
        data = -data
    axs[name].plot(
        data,
        selected_ds["gridbox_coord3"],
    )
    # axs[name].set_title(label_from_attrs(selected_ds[data], name_width=10))
    axs[name].set_xlabel(label_from_attrs(selected_ds[var_name], name_width=10))
    axs[name].set_yticks([])
    axs[name].xaxis.set_tick_params(rotation=-45)

fig.suptitle(f"Cloud ID: {cloud_id.item()}")
fig.tight_layout()

In [None]:
master_data_dir = data_dir / "condensation"
relative_path_to_eulerian_dataset = Path("processed/eulerian_dataset.nc")
pattern = "cluster_*/"

data_dir_list = np.array(sorted(list(master_data_dir.glob(pattern))))
eulerian_dataset_path_list = data_dir_list / relative_path_to_eulerian_dataset

In [None]:
from sdm_eurec4a.constants import TimeSlices

In [None]:
path = np.random.choice(eulerian_dataset_path_list)
eul = xr.open_dataset(path)
data = eul["number_superdroplets"].sel(time=TimeSlices.quasi_stationary_state).sel(gridbox=slice(0, 20))

fig, axs = plt.subplot_mosaic(
    [["temp_mean", "all"], ["rain", "aerosol"]], figsize=(12, 7), gridspec_kw={"width_ratios": [1, 1]}
)

data.sel(radius_bins=slice(0, None)).mean("time").plot(ax=axs["temp_mean"], cmap=strength_cmap)
axs["temp_mean"].set_xscale("log")
axs["temp_mean"].set_xlim(10, 3e3)

for name, sli in zip(["all", "rain", "aerosol"], [slice(None, None), slice(0, None), slice(None, 0)]):
    _ax = axs[name]
    data.sel(radius_bins=sli).sum("radius_bins").T.plot(ax=_ax, cmap=strength_cmap)
    _ax.set_title(name)

fig.suptitle(f"Cloud: {path.parent.parent.name}")
fig.tight_layout()

'cluster_234'

In [None]:
paths = np.random.choice(eulerian_dataset_path_list, 20)
paths = eulerian_dataset_path_list
mm = []
ss = []
for path in paths:
    if path.is_file():
        eul = xr.open_dataset(path)
        data = (
            eul["number_superdroplets"]
            .sel(time=TimeSlices.quasi_stationary_state)
            .sel(gridbox=slice(0, eul["gridbox"].max() - 1))
            .sel(radius_bins=slice(0, None))
        )
        m, s = data.sum("radius_bins").mean("time"), data.sum("radius_bins").std("time")

        mm.append(m)
        ss.append(s)

mm = xr.concat(mm, dim="cloud_id")
ss = xr.concat(ss, dim="cloud_id")

In [None]:
paths = np.random.choice(eulerian_dataset_path_list, 20)
paths = eulerian_dataset_path_list
amm = []
ass = []
for path in paths:
    if path.is_file():
        eul = xr.open_dataset(path)
        data = (
            eul["number_superdroplets"]
            .sel(time=TimeSlices.quasi_stationary_state)
            .sel(gridbox=slice(0, eul["gridbox"].max() - 1))
        )
        m, s = data.sum("radius_bins").mean("time"), data.sum("radius_bins").std("time")

        amm.append(m)
        ass.append(s)

amm = xr.concat(amm, dim="cloud_id")
ass = xr.concat(ass, dim="cloud_id")

In [None]:
plt.figure(figsize=wide_fig_size)
plt.plot(
    mm.T,
    mm["gridbox"],
    linewidth=0.5,
    color="red",
    alpha=0.5,
)
plt.plot(mm[0], mm["gridbox"], linewidth=0.5, color="red", alpha=0.5, label="Rain drops")
plt.plot(amm.T, amm["gridbox"], linewidth=0.5, color="grey", alpha=0.5)
plt.plot(amm[0], amm["gridbox"], linewidth=0.5, color="grey", alpha=0.5, label="Rain AND aerosol drops")

plt.ylabel("Gridbox")
plt.xlabel("SDs per gridbox")
plt.legend()
plt.title("SDs per gridbox\nTemporal mean over stationary state")

Text(0.5, 1.0, 'SDs per gridbox\nTemporal mean over stationary state')

In [None]:
fig, ax = plt.subplots()

ax.pcolormesh(selected_ds["radius_bins"], selected_ds["gridbox"], xi, shading="auto", cmap=strength_cmap)

# ax.set_xscale("log")

### Appendix correlations

In [None]:
cloud_id = 396

x = ds["radius_bins"]
y = ds["number_concentration"].sel(gridbox=ds["max_gridbox"]).sel(microphysics="condensation").T
fig, axs = plt.subplot_mosaic([["dsd", "cloud", "evap"]], figsize=(9, 3))

# plot distirbutions
axs["dsd"].plot(
    x,
    y,
    color="grey",
    alpha=0.1,
)
axs["dsd"].plot(
    x,
    y.sel(cloud_id=cloud_id),
    color="red",
    alpha=1,
)

axs["dsd"].set_xlim(50, None)

axs["dsd"].set_xlabel(label_from_attrs(x, name_width=20), fontsize=10)
axs["dsd"].set_ylabel(label_from_attrs(y, name_width=15), fontsize=10)

x = ds["inflow_precipitation"].sel(microphysics="condensation")
y = ds["cloud_mass_radius_mean"].sel(microphysics="condensation")

axs["cloud"].scatter(
    x,
    y,
    color="grey",
    alpha=0.1,
)
axs["cloud"].scatter(
    x.sel(cloud_id=cloud_id),
    y.sel(cloud_id=cloud_id),
    color="red",
    label=f"Cloud {cloud_id}",
    s=10,
)

axs["cloud"].set_xlabel(label_from_attrs(x, name_width=15), fontsize=10)
axs["cloud"].set_ylabel(label_from_attrs(y, name_width=15), fontsize=10)


x = -ds["source_precipitation"].sel(microphysics="condensation")
y = ds["evaporation_fraction"].sel(microphysics="condensation")

axs["evap"].scatter(
    x,
    y,
    color="grey",
    alpha=0.1,
)
axs["evap"].scatter(
    x.sel(cloud_id=cloud_id),
    y.sel(cloud_id=cloud_id),
    color="red",
    label=f"Cloud {cloud_id}",
    s=10,
)

axs["evap"].set_xlabel(label_from_attrs(x, name_width=15), fontsize=10)
axs["evap"].set_ylabel(label_from_attrs(y, name_width=15), fontsize=10)


for _name in axs:
    axs[_name].set_yscale("log")
    axs[_name].set_xscale("log")

fig.tight_layout()

In [None]:
_d = xr.open_dataset(
    RepositoryPath("levante").repo_dir / "data/model/input_v4.2/relative_humidity_parameters.nc"
)

print(_d)

<xarray.Dataset> Size: 10kB
Dimensions:   (cloud_id: 260)
Coordinates:
  * cloud_id  (cloud_id) int64 2kB 80 81 82 83 84 85 ... 567 568 569 570 571 572
Data variables:
    f_0       (cloud_id) float64 2kB ...
    slope_1   (cloud_id) float64 2kB ...
    x_split   (cloud_id) float64 2kB ...
    slope_2   (cloud_id) float64 2kB ...
