#### Imports

In [None]:
from typing import Union
from typing_extensions import NotRequired, TypedDict
from functools import reduce

from pathlib import Path
import textwrap

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.colors as mcolors
import numpy as np
import xarray as xr


from sdm_eurec4a.visulization import (
    adjust_lightness_array,
    set_custom_rcParams,
    handler_map_alpha,
)
from sdm_eurec4a.reductions import mean_and_stderror_of_mean

from sdm_eurec4a import RepositoryPath
import warnings

warnings.filterwarnings("ignore")

In [None]:
def set_xticks_time(ax):
    xticks = [0, 500, 1000]
    ax.set_xticks(xticks)


def set_yticks_height(ax):
    yticks = [0, 500, 1000, 1500, 2000]
    ax.set_yticks(yticks)


def set_yticks_height_km(ax):
    yticks = [0, 0.5, 1, 1.5, 2]
    ax.set_yticks(yticks)


def set_logxticks_meter(ax):
    xticks = [1e-6, 1e-3]
    xticklabels = [r"$10^{-6}$", r"$10^{-3}$"]
    ax.set_xticks(xticks, xticklabels)


def set_logxticks_micrometer(ax):
    xticks = [1e-3, 1e0, 1e3]
    xticklabels = [r"$10^{-3}$", r"$10^{0}$", r"$10^{3}$"]
    ax.set_xticks(xticks, xticklabels)


def set_logtyticks_psd(ax):
    yticks = [1e0, 1e6]
    yticklabels = [r"$10^0$", r"$10^6$"]
    ax.set_yticks(yticks, yticklabels)


def set_yticks_lwc(ax):
    ax.set_yticks([0, 0.1, 0.2])

In [None]:
import statsmodels.api as sm


def linear_fit(x: xr.DataArray, y: xr.DataArray):
    x = x.values.flatten()
    y = y.values.flatten()
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]
    idx = np.isfinite(x) & np.isfinite(y)
    x = x[idx]
    y = y[idx]
    X = np.column_stack((x,))
    X = sm.add_constant(X)

    corr = np.corrcoef(x, y)

    model = sm.OLS(y, X)
    results = model.fit()

    return results, corr, x, y


def linear_fit_new(x: Union[np.ndarray, xr.DataArray], y: Union[np.ndarray, xr.DataArray]):
    if isinstance(x, xr.DataArray):
        x = x.values.flatten()
    elif isinstance(x, np.ndarray):
        x = x.flatten()
    else:
        raise ValueError("x and y must be either xr.DataArray or np.ndarray")
    if isinstance(y, xr.DataArray):
        y = y.values.flatten()
    elif isinstance(y, np.ndarray):
        y = y.flatten()
    else:
        raise ValueError("x and y must be either xr.DataArray or np.ndarray")

    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]
    idx = np.isfinite(x) & np.isfinite(y)
    x = x[idx]
    y = y[idx]
    X = np.column_stack((x,))
    X = sm.add_constant(X)

    corr = np.corrcoef(x, y)

    model = sm.OLS(y, X)
    results = model.fit()

    return results, corr


def linear_fit_plot(ax, x: xr.DataArray, y: xr.DataArray, alpha: float = 0.05):
    results, corr, x, y = linear_fit(x, y)

    pred_ols = results.get_prediction()
    iv_l = pred_ols.summary_frame(alpha=alpha)["obs_ci_lower"]
    iv_u = pred_ols.summary_frame(alpha=alpha)["obs_ci_upper"]

    ax.plot(x, results.fittedvalues, "r-", label="OLS fit ($\\rho$= {:.2f})".format(corr[0, 1]))
    ax.plot(x, iv_u, "r:", label=f"CI with $\\alpha$ = {alpha}")
    ax.plot(x, iv_l, "r:")

In [None]:
def label_from_attrs(
    da: xr.DataArray,
    return_name: bool = True,
    return_units: bool = True,
    linebreak: bool = False,
    name_width: Union[int, None] = None,
) -> str:
    try:
        name = f"{da.attrs['long_name']}"
    except KeyError:
        name = f"{da.name}"

    if "units" in da.attrs:
        units = f"{da.attrs['units']}"
        if "$" not in units:
            units = f"${units}$"

        units = units.replace("$", " ")
        units = rf"$\left[ {units} \right]$"
    else:
        units = "[???]"

    if return_name == True:
        if name_width == None:
            name = name
        else:
            name = textwrap.fill(name, name_width)

    if return_name == True and return_units == True:
        if linebreak == True:
            return f"{name}\n{units}"
        else:
            return f"{name} {units}"

    elif return_name == True and return_units == False:
        return f"{name}"
    elif return_name == False and return_units == True:
        return f"{units}"
    else:
        return ""

In [None]:
plt.style.use("default")
default_colors = set_custom_rcParams()
plt.rcParams.update(
    {
        "axes.spines.top": False,
        "axes.spines.right": False,
        "axes.spines.left": False,
        "axes.spines.bottom": False,
    }
)

dark_colors = adjust_lightness_array(default_colors, amount=0.5)

repo_path = RepositoryPath("levante")()
print(repo_path)

subdata_dir = "output_v3.5"
data_path = Path("/home/m/m301096/CLEO/data/") / subdata_dir

ds_subpath = "combined/eulerian_dataset_combined_v2.nc"

# THE PATH TO THE SCRIPT DIRECTORY
script_dir = Path("/home/m/m301096/repositories/sdm-eurec4a/notebooks/thesis/results/")
print(script_dir)


fig_dir = repo_path / "results" / script_dir.relative_to(repo_path) / subdata_dir / "environment"
print(fig_dir)

fig_dir.mkdir(parents=True, exist_ok=True)

/home/m/m301096/repositories/sdm-eurec4a
/home/m/m301096/repositories/sdm-eurec4a/notebooks/thesis/results
/home/m/m301096/repositories/sdm-eurec4a/results/notebooks/thesis/results/output_v3.5/environment


# Set time slice to use for temporal mean and median

In [None]:
time_slice = slice(1500, 3590)  # seconds
radius_split = 45  # µm
radius_slice = slice(1e0, None)  # µm

# Load datasets

In [None]:
class MicrophysicDict(TypedDict):
    dataset: xr.Dataset
    microphysics: str
    path: Path
    # linestyle: Union[str, tuple]
    color: str


class OptionalDictOfMicrophysicDict(TypedDict):
    null_microphysics: NotRequired[MicrophysicDict]
    condensation: NotRequired[MicrophysicDict]
    collision_condensation: NotRequired[MicrophysicDict]
    coalbure_condensation_large: NotRequired[MicrophysicDict]
    coalbure_condensation_small: NotRequired[MicrophysicDict]


class DictOfMicrophysicDict(TypedDict):
    null_microphysics: MicrophysicDict
    condensation: MicrophysicDict
    collision_condensation: MicrophysicDict
    coalbure_condensation_cke: MicrophysicDict
    coalbure_condensation_large: MicrophysicDict
    coalbure_condensation_small: MicrophysicDict


data_dict = DictOfMicrophysicDict(
    null_microphysics=MicrophysicDict(
        microphysics="Null microphysics",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
    condensation=MicrophysicDict(
        microphysics="Condensation/Evaporation",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
    collision_condensation=MicrophysicDict(
        microphysics="Coll-coal, cond./evap.",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
    coalbure_condensation_cke=MicrophysicDict(
        microphysics="Coll-coal-breakup n by CKE\nand cond./evap.",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
    coalbure_condensation_large=MicrophysicDict(
        microphysics="Coll-coal-breakup (n=125)\nand cond./evap.",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
    coalbure_condensation_small=MicrophysicDict(
        microphysics="Coll-coal-breakup (n=5)\nand cond./evap.",
        path=Path(),
        dataset=xr.Dataset(),
        color="k",
    ),
)

colors_dict = dict(
    null_microphysics="grey",
    condensation=default_colors[0],
    collision_condensation=default_colors[1],
    coalbure_condensation_cke=default_colors[2],
    coalbure_condensation_large=default_colors[3],
    coalbure_condensation_small=default_colors[4],
)

for mp in data_dict:
    data_dict[mp]["path"] = data_path / f"{mp}" / ds_subpath
    data_dict[mp]["color"] = colors_dict[mp]

for key in data_dict:
    ds = xr.open_dataset(data_dict[key]["path"], chunks={"cloud_id": 2})
    # ds = ds.sel(cloud_id = [18, 301])
    ds.attrs.update(microphysics=data_dict[key]["microphysics"])
    ds.attrs.update(microphysics_short=key)

    data_dict[key]["dataset"] = ds

# ---------------------------------------------------- #
# Reindex the datasets to have the same radius bins
# ---------------------------------------------------- #

combined_radius_bins = reduce(
    np.union1d, [data_dict[mp]["dataset"]["radius_bins"].values for mp in data_dict]
)
fill_value = np.nan
print("Number of radius bins:", len(combined_radius_bins))
print("Fill value:", fill_value)
for mp in data_dict:
    data_dict[mp]["dataset"] = data_dict[mp]["dataset"].reindex(
        radius_bins=combined_radius_bins, fill_value=fill_value
    )

Number of radius bins: 76
Fill value: nan


### All clouds which are simulated

In [None]:
intersect_cloud_ids = reduce(
    np.intersect1d, [data_dict[key]["dataset"]["cloud_id"].data for key in data_dict]
)
for mp in data_dict:
    print(mp, len(data_dict[mp]["dataset"]["cloud_id"]))

print("\nIntersect:", len(intersect_cloud_ids))
print("cloud_ids:", intersect_cloud_ids)

null_microphysics 94
condensation 78
collision_condensation 83
coalbure_condensation_cke 91
coalbure_condensation_large 93
coalbure_condensation_small 94

Intersect: 66
cloud_ids: [  9  11  18  20  21  22  65  67  68  71  72  73  74  88  94 110 113 114
 130 135 136 142 194 197 198 199 201 203 205 207 208 211 212 213 214 215
 217 218 219 220 221 222 223 224 230 233 235 236 237 292 293 295 296 301
 303 305 306 307 308 309 311 312 314 359 361 362]


In [None]:
clouds_dict = {
    "222": dict(
        cloud_id=222,
        color="r",
    ),
    "142": dict(
        cloud_id=142,
        color="b",
    ),
}
for key in data_dict:
    ds = data_dict[key]["dataset"]
    for cloud_id in clouds_dict:
        cloud_id = clouds_dict[cloud_id]["cloud_id"]
        is_in = cloud_id in ds["cloud_id"]
        print(f"{cloud_id}, {key}, {is_in}")

222, null_microphysics, True
142, null_microphysics, True
222, condensation, True
142, condensation, True
222, collision_condensation, True
142, collision_condensation, True
222, coalbure_condensation_cke, True
142, coalbure_condensation_cke, True
222, coalbure_condensation_large, True
142, coalbure_condensation_large, True
222, coalbure_condensation_small, True
142, coalbure_condensation_small, True


### Add further variables e.g. latent cooling

In [None]:
# NOTE: For now, it needs to be divided by 2 s to get values per second Bue to a bug.
def add_variables(
    ds: xr.Dataset, latent_heat_of_condensation: float = 2.265e6, time_slice=time_slice  # J kg-1
):
    # fix masks
    ds["sub_cloud_layer_mask"] = ds["sub_cloud_layer_mask"].fillna(0).astype(bool)

    # fix attributes
    ds["mass_represented"].attrs["long_name"] = "Mass"

    ds["radius_bins"].attrs["long_name"] = "Radius"
    ds["radius_bins"].attrs["units"] = "$\\mu m$"

    ds["relative_humidity"].attrs["long_name"] = "Relative humidity"
    ds["relative_humidity"].attrs["units"] = "$\\%$"

    ds["gridbox_thickness"] = ds["gridbox_top"] - ds["gridbox_bottom"]
    ds["cloud_altitude"] = ds["gridbox_coord3"].sel(gridbox=ds["max_gridbox"])

    # It seems that xi was stored as an integer. This is not wanted, because nan values will just be large integers.
    ds["xi"] = ds["xi"].astype(float)
    ds["xi"] = ds["xi"].where(ds["xi"] < 1e12)

    ds["xi"].attrs["units"] = "$\\#$"
    ds["xi"].attrs["long_name"] = "Real droplet num. conc."

    ds["xi_per_volume"] = ds["xi"] / ds["gridbox_volume"]
    ds["xi_per_volume"].attrs["long_name"] = ds["xi"].attrs["long_name"]
    ds["xi_per_volume"].attrs["units"] = "$\\# m^{-3}$"

    ds["number_superdroplets_per_volume"] = 1000 * ds["number_superdroplets"] / ds["gridbox_volume"]
    ds["number_superdroplets_per_volume"].attrs["units"] = "$10^3m^{-3}$"
    ds["number_superdroplets_per_volume"].attrs["long_name"] = "Number of superdroplets per volume"

    ds["mass_represented_per_volume"] = 1e3 * ds["mass_represented"] / ds["gridbox_volume"]
    ds["mass_represented_per_volume"].attrs["units"] = "$g m^{-3}$"
    ds["mass_represented_per_volume"].attrs["long_name"] = "Mass"

    ds["evaporation_full"] = 1e3 * ds["massdelta_condensation"].sel(time=time_slice).where(
        ds["sub_cloud_layer_mask"]
    )
    ds["evaporation_full"].attrs["units"] = "$g m^{-3} s^{-1}$"
    ds["evaporation_full"].attrs["long_name"] = "Evaporation rate"
    ds["evaporation_full"].attrs["description"] = "Evaporation rate for all timesteps"

    # ds["evaporation"] = (
    #     ds["evaporation_full"].sel(time=time_slice).mean("time", keep_attrs=True, skipna=True)
    # )
    # ds["evaporation"].attrs["units"] = "$g m^{-3} s^{-1}$"
    # ds["evaporation"].attrs["long_name"] = "Evaporation rate"

    # ds["evaporation"].attrs["units"] = "$g m^{-3} s^{-1}$"
    # ds["evaporation"].attrs["long_name"] = "Evaporation rate"

    ds["latent_heating"] = (
        1e-3
        * ds["evaporation_full"].sel(time=time_slice).mean("time", keep_attrs=True, skipna=True)
        * latent_heat_of_condensation
    )  # kg m-3 s-1 * J kg-1 = W m-3
    ds["latent_heating"].attrs["units"] = "$W m^{-3}$"
    ds["latent_heating"].attrs["long_name"] = "Latent heating"

    ds["latent_heating_full"] = (
        1e-3 * ds["evaporation_full"].sel(time=time_slice) * latent_heat_of_condensation
    )  # kg m-3 s-1 * J kg-1 = W m-3
    ds["latent_heating_full"].attrs["units"] = "$W m^{-3}$"
    ds["latent_heating_full"].attrs["long_name"] = "Latent heating"
    ds["latent_heating_full"].attrs["description"] = "Latent heating for all timesteps"

    ds["latent_heating_mean"] = ds["latent_heating"].mean("gridbox", keep_attrs=True)
    ds["latent_heating_sum"] = (ds["latent_heating"] * ds["gridbox_thickness"]).sum(
        "gridbox", keep_attrs=True
    )
    ds["latent_heating_sum"].attrs["units"] = "$W m^{-2}$"
    ds["latent_heating_sum"].attrs["long_name"] = "Column int. latent heating"

    ds["latent_heating_radius_bins"] = (
        ds["mass_difference_per_volume"].sel(time=time_slice).where(ds["sub_cloud_layer_mask"])
        * latent_heat_of_condensation
    )
    ds["latent_heating_radius_bins"].attrs["units"] = "$W m^{-3}$"
    ds["latent_heating_radius_bins"].attrs["long_name"] = "Latent heating"

    # In order to mask out outliers in the latent heating field,
    # the total latent heating at a timestep for a gridbox should not exceed
    # the minimum or maximum value of the latent heating field from CLEOs monitor output.
    ds["mask_latent_heating"] = (
        ds["latent_heating_radius_bins"] > ds["latent_heating_radius_bins"].quantile(0.05, "time")
    ) & (ds["latent_heating_radius_bins"] < ds["latent_heating_radius_bins"].quantile(0.95, "time"))
    ds["mask_latent_heating"].attrs[
        "description"
    ] = "In order to mask out outliers in the latent heating field, the total latent heating at a timestep for a gridbox should not exceed the minimum or maximum value of the latent heating field from CLEOs monitor output."

    ds["latent_heating_radius_bins_masked"] = ds["latent_heating_radius_bins"].where(
        ds["mask_latent_heating"]
    )

    ds["latent_heating_radius_bins_time_median"] = (
        ds["latent_heating_radius_bins"]
        .sel(time=time_slice)
        .median("time", keep_attrs=True, skipna=True)
    )
    ds["latent_heating_radius_bins_time_median"].attrs["units"] = "$W m^{-3}$"
    ds["latent_heating_radius_bins_time_median"].attrs["long_name"] = "Latent heating"
    ds["latent_heating_radius_bins_time_median"].attrs[
        "description"
    ] = "Median latent heating for all timesteps"


for mp in data_dict:
    ds = data_dict[mp]["dataset"]
    add_variables(ds)

# Analysis of the domain and metrics

## Identify outliers


It seems the outlier is cloud ``296``

In [None]:
null_microphysics: xr.Dataset = data_dict["null_microphysics"]["dataset"]
condensation: xr.Dataset = data_dict["condensation"]["dataset"]
collision_condensation: xr.Dataset = data_dict["collision_condensation"]["dataset"]
coalbure_condensation_cke: xr.Dataset = data_dict["coalbure_condensation_cke"]["dataset"]
coalbure_condensation_large: xr.Dataset = data_dict["coalbure_condensation_large"]["dataset"]
coalbure_condensation_small: xr.Dataset = data_dict["coalbure_condensation_small"]["dataset"]

# fig, axs = plt.subplots(ncols=5, figsize=(15, 5))
# null_microphysics["mass_represented"].sel(gridbox = 0).sum(dim="radius_bins").plot(ax = axs[0])
# condensation["mass_represented"].sel(gridbox = 0).sum(dim="radius_bins").plot(ax = axs[1])
# collision_condensation["mass_represented"].sel(gridbox = 0).sum(dim="radius_bins").plot(ax = axs[2])
# coalbure_condensation_large["mass_represented"].sel(gridbox = 0).sum(dim="radius_bins").plot(ax = axs[3])
# coalbure_condensation_small["mass_represented"].sel(gridbox = 0).sum(dim="radius_bins").plot(ax = axs[4])

# remove outlier cloud_id 296
cloud_id_selection = intersect_cloud_ids[intersect_cloud_ids != 296]

null_microphysics = null_microphysics.sel(cloud_id=cloud_id_selection)
condensation = condensation.sel(cloud_id=cloud_id_selection)
collision_condensation = collision_condensation.sel(cloud_id=cloud_id_selection)
coalbure_condensation_cke = coalbure_condensation_cke.sel(cloud_id=cloud_id_selection)
coalbure_condensation_large = coalbure_condensation_large.sel(cloud_id=cloud_id_selection)
coalbure_condensation_small = coalbure_condensation_small.sel(cloud_id=cloud_id_selection)

# Cloud properties

#### Cloud height impact

In [None]:
data = condensation

fig, ax = plt.subplots(ncols=1, figsize=(5, 5), sharey=False)

# Vertical profiles
ax.plot(
    data["latent_heating"].where(data["sub_cloud_layer_mask"]).T,
    data["gridbox_coord3"].T,
    linewidth=1.5,
    alpha=0.5,
    color=default_colors[0],
)
ax.set_xlabel(label_from_attrs(data["latent_heating"]))
ax.set_ylabel("Height [m]")
# ax.set_title("Latent heating profiles")

ax.set_yticks([0, 250, 500, 750, 1000])
ax.set_ylim([0, 1100])

fig.tight_layout()
fig.savefig(fig_dir / "profiles_latent_heating.svg")
fig.savefig(fig_dir / "profiles_latent_heating.pdf")

In [None]:
data = condensation

fig, ax = plt.subplots(ncols=1, figsize=(5, 5), sharey=False)

lh_cumsum = (
    ((data["latent_heating"] * data["gridbox_thickness"]).sortby(-data["gridbox"]))
    .cumsum("gridbox", skipna=True)
    .where(data["sub_cloud_layer_mask"])
    .T
)
lh_cumsum = lh_cumsum.sortby(lh_cumsum["gridbox"])

lh_cumsum.attrs["long_name"] = "Column int. latent heating"
lh_cumsum.attrs["units"] = "$W m^{-2}$"

# Vertical profiles
ax.plot(
    lh_cumsum.sel(gridbox=slice(1, None)),
    data["gridbox_coord3"].sel(gridbox=slice(1, None)).T,
    linewidth=1.5,
    alpha=0.5,
    color=default_colors[0],
)
ax.set_xlabel(label_from_attrs(lh_cumsum))
ax.set_ylabel("Height [m]")
# ax.set_title("Profiles of column int. latent heating")

ax.set_yticks([0, 250, 500, 750, 1000])
ax.set_ylim([0, 1100])
ax.set_xlim([-60, 0])

fig.tight_layout()
fig.savefig(fig_dir / "profiles_column_integrated_latent_heating.svg")
fig.savefig(fig_dir / "profiles_column_integrated_latent_heating.pdf")

### Histogram of column integrated latent heating

In [None]:
data = condensation.sel(time=time_slice)
ylim = [0, 12]
da = (data["latent_heating_full"] * data["gridbox_thickness"]).sum("gridbox")
da.attrs["units"] = "Wm^{-2}"
da.attrs["long_name"] = "Column int. latent heating"

da_time_mean, da_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)
da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)


fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=np.arange(-60, -10, 2),
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.1f} $\pm$ {da_cloud_sem.values:.1f} " + "$Wm^{-2}$",
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

# ax.axvline(
#     lh_sum_time_mean.compute().median("cloud_id"),
#     color="red",
#     label="Median",
#     linestyle = "--"
# )


ax.set_xlabel(label_from_attrs(da))
ax.set_ylabel("Occurence")
# ax.set_title(f"Histogram of {label_from_attrs(da, return_units=False).lower()}")
ax.legend()
ax.set_ylim(ylim)
ax.set_xlim([-60, -10])

fig.savefig(fig_dir / "histogram_column_integreate_latent_heating.svg")
fig.savefig(fig_dir / "histogram_column_integreate_latent_heating.pdf")

### Column integrated latent heating vs. LWC at cloud base

In [None]:
data = condensation.sel(time=time_slice)
ylim = [0, 12]

lhci = (data["latent_heating_full"] * data["gridbox_thickness"]).sum("gridbox")
# lhci = (data["latent_heating_full"]).mean("gridbox")
lhci.attrs["units"] = "Wm^{-2}"
lhci.attrs["long_name"] = "Column int. latent heating"
lhci = lhci.compute()
lwc = (
    data["mass_represented_per_volume"]
    .sel(gridbox=data["max_gridbox"])
    .sum("radius_bins", keep_attrs=True)
)
lwc = lwc.compute()
lwc.attrs["units"] = "g m^{-3}"
lwc.attrs["long_name"] = "Liquid water content"


lhci_time_mean, lhci_time_sem = mean_and_stderror_of_mean(
    data=lhci,
    dims=("time",),
)
lwc_time_mean, lwc_time_sem = mean_and_stderror_of_mean(
    data=lwc,
    dims=("time",),
)

lhci_time_mean = lhci_time_mean.compute()
lhci_time_mean.attrs["units"] = "Wm^{-2}"
lhci_time_mean.attrs["long_name"] = "Column int. latent heating"
lhci_time_sem = lhci_time_sem.compute()

lwc_time_mean = lwc_time_mean.compute()
lwc_time_mean.attrs["units"] = "g m^{-3}"
lwc_time_mean.attrs["long_name"] = "Liquid water content"
lwc_time_sem = lwc_time_sem.compute()


lhci_cloud_mean, lhci_cloud_sem = mean_and_stderror_of_mean(
    data=lhci_time_mean,
    dims=("cloud_id",),
    data_std=lhci_time_sem,
)
lwc_cloud_mean, lwc_cloud_sem = mean_and_stderror_of_mean(
    data=lwc_time_mean,
    dims=("cloud_id",),
    data_std=None,
)

lhci_cloud_mean = lhci_cloud_mean.compute()
lhci_cloud_sem = lhci_cloud_sem.compute()
lwc_cloud_mean = lwc_cloud_mean.compute()
lwc_cloud_sem = lwc_cloud_sem.compute()

In [None]:
fig, ax = plt.subplots(figsize=(5, 4.5))

# ax.errorbar(
#     x = lwc_cloud_mean,
#     y = lhci_cloud_mean,
#     xerr = lwc_cloud_sem,
#     yerr = lhci_cloud_sem,
#     marker = "",
#     linestyle = "",
#     color = "k",
#     label = "Mean of all clouds",
# )

ax.scatter(
    x=lwc_time_mean,
    y=lhci_time_mean,
    # xerr = lwc_time_sem,
    # yerr = lhci_time_sem,
    marker=".",
    linestyle="",
    label="Individual cloud",
)

linear_fit_plot(
    ax,
    lwc_time_mean,
    lhci_time_mean,
)

ax.set_xlabel(label_from_attrs(lwc_time_mean))
ax.set_ylabel(label_from_attrs(lhci_time_mean))
ax.set_xlim([0, 0.17])
ax.set_xticks([0, 0.05, 0.1, 0.15])
ax.set_ylim([-60, -10])
ax.legend()
# fig.suptitle(f"{label_from_attrs(lhci, return_units=False)} against the LWC in the cloud layer")
fig.tight_layout()
fig.savefig(fig_dir / "scatter-column_integrated_latent_heating-liquid_water_content.svg")

# Thermodynamics

In [None]:
latent_heating = 1e3 * condensation["latent_heating"]
latent_heating.attrs["units"] = "m" + condensation["latent_heating"].attrs["units"].replace("$", "")
latent_heating.attrs["long_name"] = condensation["latent_heating"].attrs["long_name"]

liquid_water_content = (
    data["mass_represented_per_volume"]
    .sum("radius_bins")
    .sel(time=time_slice)
    .mean("time", keep_attrs=True)
    .astype(float)
)
liquid_water_content = liquid_water_content.where(data["sub_cloud_layer_mask"]).compute()
liquid_water_content.attrs.update(
    units="g m^{-3}",
    long_name="Liquid water content",
)  #
liquid_water_content_init_1d = liquid_water_content.sel(gridbox=data["max_gridbox"] - 1).compute()
liquid_water_content_init = liquid_water_content_init_1d.drop("gridbox").expand_dims(
    gridbox=data["gridbox"]
)

relative_humidity = data["relative_humidity"]
relative_humidity = relative_humidity.where(data["sub_cloud_layer_mask"]).compute()

coord3 = data["gridbox_coord3"]

In [None]:
# calculat the slope of a linear regression for the relative humidity for all clouds


def slope_intercept(x, y, **kwargs):
    # print(f"received {type(x)} shape: {x.shape}")
    # print(f"received {type(y)} shape: {y.shape}")
    try:
        idx = np.isfinite(x) & np.isfinite(y)
        slope, intercept = np.polyfit(x[idx], y[idx], **kwargs)
    except:
        return np.nan, np.nan
    return slope, intercept


relative_humidity_slopes, rh_inter = xr.apply_ufunc(
    slope_intercept,
    1e-3 * data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    relative_humidity.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)
relative_humidity_slopes.attrs["units"] = rh.attrs["units"] + " (km)^{-1}"
relative_humidity_slopes.attrs["long_name"] = "Relative humidity vertical gradient"
relative_humidity_slopes = relative_humidity_slopes.compute()

latent_heating_slopes, latent_heating_inter = xr.apply_ufunc(
    slope_intercept,
    1e-3 * data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    latent_heating.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)

latent_heating_slopes.attrs["units"] = latent_heating.attrs["units"] + " (km)^{-1}"
latent_heating_slopes.attrs["long_name"] = "Latent heating vertical gradient"
latent_heating_slopes = latent_heating_slopes.compute()

In [None]:
fig, ax = plt.subplots(figsize=(10, 4.5))
sc = ax.scatter(
    liquid_water_content,
    relative_humidity,
    c=latent_heating,
    marker=".",
    alpha=0.8,
    cmap="inferno",
    vmin=-100,
    vmax=0,
)
ax.set_xlabel(label_from_attrs(liquid_water_content))
ax.set_ylabel(label_from_attrs(relative_humidity))
fig.colorbar(sc, ax=ax, label=label_from_attrs(latent_heating))

# fig.suptitle("Latent heating against mass in gridbox and relative humidity")
fig.savefig(fig_dir / "scatter_latent_heating_mass_rh.svg")
fig.savefig(fig_dir / "scatter_latent_heating_mass_rh.pdf")

In [None]:
fig, axs = plt.subplots(figsize=(10, 6.5), sharey=True, nrows=2)

sc = axs[0].scatter(
    latent_heating,
    coord3,
    c=liquid_water_content,
    marker=".",
    alpha=0.8,
    cmap="inferno",
)
fig.colorbar(sc, ax=axs[0], label=label_from_attrs(liquid_water_content))
axs[0].set_xlabel(label_from_attrs(latent_heating))

# Impact of relative humidity slope
sc = axs[1].scatter(
    latent_heating - latent_heating.sel(gridbox=data["max_gridbox"] - 2),
    coord3,
    c=(relative_humidity_slopes.expand_dims(gridbox=data["gridbox"])).T,
    marker=".",
    alpha=0.8,
    cmap="inferno",
)
fig.colorbar(sc, ax=axs[1], label=label_from_attrs(relative_humidity_slopes, name_width=25))
axs[1].set_xlabel(f"Difference to cloud layer of {label_from_attrs(latent_heating)}")

for ax in axs:
    ax.set_ylabel("Height [m]")
    ax.set_ylim([0, 1100])

fig.tight_layout()
fig.savefig(fig_dir / "scatter_latent_heating.svg")
fig.savefig(fig_dir / "scatter_latent_heating.pdf")
# fig.suptitle("Latent heating against mass in gridbox and relative humidity")

In [None]:
fig, ax = plt.subplots(figsize=(5, 4.5))

ax.scatter(
    x=relative_humidity_slopes,
    y=latent_heating_slopes,
    # xerr = lwc_time_sem,
    # yerr = lhci_time_sem,
    marker=".",
    linestyle="",
    label="Individual cloud",
)

linear_fit_plot(
    ax,
    relative_humidity_slopes,
    latent_heating_slopes,
)

ax.set_xlabel(label_from_attrs(relative_humidity_slopes))
ax.set_ylabel(label_from_attrs(latent_heating_slopes, linebreak=True))
ax.legend(loc="upper left")
# fig.suptitle(f"{label_from_attrs(lhci, return_units=False)} against the LWC in the cloud layer")
fig.tight_layout()
fig.savefig(fig_dir / "scatter-latent_heating_slope-relative_humidity_slope.svg")

# Combination RH slope and LWC

In [None]:
varibles = [lwc_time_mean, relative_humidity_slopes]
y_variables = [lhci_time_mean, latent_heating_slopes]
fig, axs = plt.subplots(figsize=(10, 4.5), ncols=len(varibles))

for i, (var, y_var) in enumerate(zip(varibles, y_variables)):
    axs[i].scatter(
        var,
        y_var,
        marker=".",
        alpha=1,
        label="Individual clouds",
    )
    linear_fit_plot(
        ax=axs[i],
        x=var,
        y=y_var,
        alpha=0.05,
    )
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(y_var))
    # axs[i].set_title(f"{label_from_attrs(latent_heating_slopes)} against {label_from_attrs(var)}")

axs[0].set_xlim([0, 0.17])
axs[0].set_xticks([0, 0.05, 0.1, 0.15])

axs[1].set_ylabel(label_from_attrs(latent_heating_slopes, linebreak=True))

axs[0].legend(loc="upper right")
axs[1].legend(loc="upper left")
fig.tight_layout()

fig.savefig(fig_dir / "scatter-cloud_properties-thermodynamics.svg")
fig.savefig(fig_dir / "scatter-cloud_properties-thermodynamics.pdf")

In [None]:
varibles = [lwc_time_mean, relative_humidity_slopes]
y_variables = [lhci_time_mean, latent_heating_slopes]
fig, axs = plt.subplots(figsize=(10, 4.5), ncols=len(varibles))

for i, (var, y_var) in enumerate(zip(varibles, y_variables)):
    axs[i].scatter(
        var,
        y_var,
        marker=".",
        alpha=0.5,
        label="Individual clouds",
    )
    linear_fit_plot(
        ax=axs[i],
        x=var,
        y=y_var,
        alpha=0.05,
    )
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(y_var))
    # axs[i].set_title(f"{label_from_attrs(latent_heating_slopes)} against {label_from_attrs(var)}")

axs[0].set_xlim([0, 0.17])
axs[0].set_xticks([0, 0.05, 0.1, 0.15])

axs[1].set_ylabel(label_from_attrs(latent_heating_slopes, linebreak=True))

axs[0].legend(loc="upper right")
axs[1].legend(loc="upper left")
fig.tight_layout()

fig.savefig(fig_dir / "scatter-cloud_properties-thermodynamics.svg")
fig.savefig(fig_dir / "scatter-cloud_properties-thermodynamics.pdf")

In [None]:
def linear_fit_uncertainty(x: xr.DataArray, y: xr.DataArray):
    results, corr = linear_fit_new(
        x=x,
        y=y,
    )

    x0 = results.params[0]
    x0_error = results.bse[0]
    x1 = results.params[1]
    x1_error = results.bse[1]

    return x0, x0_error, x1, x1_error, corr[1, 0]


_, _, rh_slopes, rh_slopes_error, rh_corr = xr.apply_ufunc(
    linear_fit_uncertainty,
    1e-3 * data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    relative_humidity.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], [], [], [], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
)


_, _, lh_slopes, lh_slopes_error, lh_corr = xr.apply_ufunc(
    linear_fit_uncertainty,
    1e-3 * data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    latent_heating.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], [], [], [], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
)

Example of linear regression with uncertainty

In [None]:
i, ie, s, se, corr = linear_fit_uncertainty(
    x=rh_slopes,
    y=lh_slopes,
)
x = np.arange(0, 30)
plt.fill_between(
    x=x,
    y1=(s - 2 * se) * x + i - 2 * ie,
    y2=(s + 2 * se) * x + i + 2 * ie,
    alpha=0.3,
    color="grey",
)

plt.plot(
    x,
    s * x + i,
    label=f"y = {s:.2f}x + {i:.2f}",
    color="k",
)

plt.scatter(
    x=rh_slopes,
    y=lh_slopes,
)

plt.legend()

<matplotlib.legend.Legend at 0x7ffe044be510>

# Histogram of LWC

In [None]:
lwc_bins = np.arange(0, 171, 5)

Cloud LWC

In [None]:
data = condensation.sel(time=time_slice)
ylim = (0, 12)

da = 1e6 * data["liquid_water_content"].sum("radius_bins")
da.attrs["units"] = "$mg m^{-3}$"
da.attrs["long_name"] = "Liquid water content"

da = da.sel(gridbox=data["max_gridbox"])

da_time_mean, da_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)
da_time_mean = da_time_mean.compute()
da_time_sem = da_time_sem.compute()

da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)
da_cloud_mean = da_cloud_mean.compute()
da_cloud_sem = da_cloud_sem.compute()


fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=lwc_bins,
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.2f} $\pm$ {da_cloud_sem.values:.2f} "
    + label_from_attrs(da, return_name=False),
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

ax.set_xlabel(label_from_attrs(da))
ax.set_ylabel("Occurence")
ax.set_title(f"Histogram of {label_from_attrs(da, return_units=False).lower()} in cloud layer")
ax.legend()

ax.set_ylim(ylim)
ax.set_xlim([lwc_bins[0], lwc_bins[-1]])
fig.savefig(fig_dir / "liquid_water_content_histogram_cloud.svg")

Top sub cloud layer

In [None]:
data = condensation.sel(time=time_slice)
ylim = (0, 12)
da = 1e6 * data["liquid_water_content"].sum("radius_bins")
da.attrs["units"] = "$mg m^{-3}$"
da.attrs["long_name"] = "Liquid water content"

da = da.sel(gridbox=data["max_gridbox"] - 2)

da_time_mean, da_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)
da_time_mean = da_time_mean.compute()
da_time_sem = da_time_sem.compute()

da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)
da_cloud_mean = da_cloud_mean.compute()
da_cloud_sem = da_cloud_sem.compute()


fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=lwc_bins,
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.2f} $\pm$ {da_cloud_sem.values:.2f} "
    + label_from_attrs(da, return_name=False),
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

ax.set_xlabel(label_from_attrs(da))
ax.set_ylabel("Occurence")
ax.set_title(f"Histogram of {label_from_attrs(da, return_units=False).lower()} in sub-cloud layer")
ax.legend()

ax.set_ylim(ylim)
ax.set_xlim([lwc_bins[0], lwc_bins[-1]])

fig.savefig(fig_dir / "liquid_water_content_histogram_subcloud.svg")

Surface LWC

In [None]:
data = condensation.sel(time=time_slice)
ylim = (0, 12)

da = 1e6 * data["liquid_water_content"].sum("radius_bins")
da.attrs["units"] = "$mg m^{-3}$"
da.attrs["long_name"] = "Liquid water content"

da = da.sel(gridbox=0)

da_time_mean, da_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)
da_time_mean = da_time_mean.compute()
da_time_sem = da_time_sem.compute()

da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)
da_cloud_mean = da_cloud_mean.compute()
da_cloud_sem = da_cloud_sem.compute()

fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=lwc_bins,
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.2f} $\pm$ {da_cloud_sem.values:.2f} "
    + label_from_attrs(da, return_name=False),
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

ax.set_xlabel(label_from_attrs(da))
ax.set_ylabel("Occurence")
ax.set_title(f"Histogram of {label_from_attrs(da, return_units=False).lower()} in surface layer")
ax.legend()

ax.set_ylim(ylim)
ax.set_xlim([lwc_bins[0], lwc_bins[-1]])

fig.savefig(fig_dir / "liquid_water_content_histogram_surface.svg")

#### Difference

Total

In [None]:
data = condensation.sel(time=time_slice)
ylim = (0, 15)
da = 1e6 * data["liquid_water_content"].sum("radius_bins")
da.attrs["units"] = "$mg m^{-3}$"
da.attrs["long_name"] = "Liquid water content"

da = xr.concat(
    [da.sel(gridbox=0), da.sel(gridbox=data["max_gridbox"] - 2)],
    dim="layer",
)

da_raw_time_mean, da_raw_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)

# the total difference
da_time_mean = da_raw_time_mean.sel(layer=1) - da_raw_time_mean.sel(layer=0)
da_time_sem: xr.DataArray = np.sqrt(
    da_raw_time_sem.sel(layer=1) ** 2 + da_raw_time_sem.sel(layer=0) ** 2
)
da_time_mean = da_time_mean.compute()
da_time_sem = da_time_sem.compute()

# # the fractional
# da_time_mean = 100 * da_raw_time_mean.sel(layer = 0) / da_raw_time_mean.sel(layer = 1)
# da_time_mean = da_time_mean.compute()
# da_time_sem = 100 * np.sqrt(
#     da_time_mean**2 * (
#         (da_raw_time_sem.sel(layer = 1) / da_raw_time_mean.sel(layer = 1))**2
#         + (da_raw_time_sem.sel(layer = 0) / da_raw_time_mean.sel(layer = 0))**2
#         )
#     )
# da_time_sem : xr.DataArray = da_time_sem
# da_time_sem = da_time_sem.compute()


da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)

da_cloud_mean = da_cloud_mean.compute()
da_cloud_sem = da_cloud_sem.compute()


fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=20,
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.1f} $\pm$ {da_cloud_sem.values:.1f} "
    + label_from_attrs(da, return_name=False),
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

ax.set_xlabel("Difference in " + label_from_attrs(da))
ax.set_ylabel("Occurence")
ax.set_title(f"Difference in LWC top sub cloud layer to surface")
ax.legend()

ax.set_ylim(ylim)
fig.savefig(fig_dir / "liquid_water_content_histogram_diff_total.svg")

Fraction

In [None]:
xr.cov(da_raw_time_mean.sel(layer=1), da_raw_time_mean.sel(layer=0), dim="cloud_id").values

array(1047.03182537)

In [None]:
data = condensation.sel(time=time_slice)
ylim = (0, 30)

da = 1e6 * data["liquid_water_content"].sum("radius_bins")
da.attrs["units"] = "$mg m^{-3}$"
da.attrs["long_name"] = "Liquid water content"

da = xr.concat(
    [da.sel(gridbox=0), da.sel(gridbox=data["max_gridbox"] - 2)],
    dim="layer",
)

da_raw_time_mean, da_raw_time_sem = mean_and_stderror_of_mean(
    data=da,
    dims=("time",),
)

# # the total difference
# da_time_mean = da_raw_time_mean.sel(layer = 1) - da_raw_time_mean.sel(layer = 0)
# da_time_sem : xr.DataArray = np.sqrt(da_raw_time_sem.sel(layer = 1)**2 + da_raw_time_sem.sel(layer = 0)**2)
# da_time_mean = da_time_mean.compute()
# da_time_sem = da_time_sem.compute()

# the fractional
da_time_mean = 100 * da_raw_time_mean.sel(layer=0) / da_raw_time_mean.sel(layer=1)
da_time_mean = da_time_mean.compute()
da_time_sem = 100 * np.sqrt(
    da_time_mean**2
    * (
        (da_raw_time_sem.sel(layer=1) / da_raw_time_mean.sel(layer=1)) ** 2
        + (da_raw_time_sem.sel(layer=0) / da_raw_time_mean.sel(layer=0)) ** 2
        - 2
        * xr.cov(da_raw_time_mean.sel(layer=1), da_raw_time_mean.sel(layer=0), dim="cloud_id")
        / (da_raw_time_mean.sel(layer=1) * da_raw_time_mean.sel(layer=0))
    )
)
da_time_sem: xr.DataArray = da_time_sem
da_time_sem = da_time_sem.compute()


da_cloud_mean, da_cloud_sem = mean_and_stderror_of_mean(
    data=da_time_mean,
    dims=("cloud_id",),
    data_std=da_time_sem,
)

da_cloud_mean = da_cloud_mean.compute()
da_cloud_sem = da_cloud_sem.compute()


fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    da_time_mean,
    bins=20,
    color="k",
    alpha=0.5,
    density=False,
)
ax.axvline(
    da_cloud_mean,
    color="darkorange",
    label=f"Mean: {da_cloud_mean.values:.1f} $\pm$ {da_cloud_sem.values:.1f} " + "[%]",
)

ax.fill_betweenx(
    ylim,
    da_cloud_mean - 2 * da_cloud_sem,
    da_cloud_mean + 2 * da_cloud_sem,
    color="orange",
    alpha=0.3,
    label="2 SEM",
)

ax.set_xlabel("Fraction in [%]")
ax.set_ylabel("Occurence")
ax.set_title(f"Fraction of liquid water content reaching the surface")
ax.legend(loc="upper left")

ax.set_ylim(ylim)
fig.savefig(fig_dir / "liquid_water_content_histogram_diff_fraction.svg")

## Comparison to ATR data

In [None]:
cloud_composite = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/cloud_composite_si_units.nc"
)
identified_clouds = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)

In [None]:
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite, select_individual_cloud_by_id


# match_clouds_and_cloudcomposite(
#     ds_clouds = identified_clouds,
#     ds_cloudcomposite = cloud_composite,
#     )

In [None]:
lwc_means = []
lwc_sems = []
lwc_datas = dict()

for cloud_id in condensation["cloud_id"]:
    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds_match = cloud_composite.sel(time=slice(start, end))
    lwc = ds_match["liquid_water_content_original"]
    lwc_mean, lwc_sem = mean_and_stderror_of_mean(
        data=lwc,
        dims=("time",),
    )
    lwc_mean = lwc_mean.compute()
    lwc_sem = lwc_sem.compute()

    lwc_mean = lwc_mean.expand_dims(dim=dict(cloud_id=[cloud_id]))
    lwc_sem = lwc_sem.expand_dims(dim=dict(cloud_id=[cloud_id]))
    lwc = lwc.expand_dims(dim=dict(cloud_id=[cloud_id])).drop("time")

    lwc_means.append(lwc_mean)
    lwc_sems.append(lwc_sem)
    lwc_datas[str(cloud_id.values)] = lwc

lwc_mean = xr.concat(lwc_means, dim="cloud_id")
lwc_sem = xr.concat(lwc_sems, dim="cloud_id")

In [None]:
cloud_composite_lwc_mean, cloud_composite_lwc_sem = lwc_mean, lwc_sem
cleo_lwc = 1e3 * condensation["liquid_water_content"].sel(gridbox=condensation["max_gridbox"]).sel(
    time=time_slice
).sum("radius_bins", keep_attrs=True)
cleo_lwc_mean, cleo_lwc_sem = mean_and_stderror_of_mean(
    data=cleo_lwc,
    dims=("time",),
)
cleo_lwc_mean = cleo_lwc_mean.compute()
cleo_lwc_sem = cleo_lwc_sem.compute()

cleo_lwc_mean.attrs.update(units="$g m^{-3}$", long_name="CLEO Liquid water content")
cloud_composite_lwc_mean.attrs.update(units="$g m^{-3}$", long_name="ATR Liquid water content")

Surface LWC

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
ax.errorbar(
    x=cleo_lwc_mean,
    y=cloud_composite_lwc_mean,
    xerr=2 * cleo_lwc_sem,
    yerr=2 * cloud_composite_lwc_sem,
    linestyle="None",
    marker=".",
    alpha=0.5,
)
ax.plot([0, 1], [0, 1], color="k", linestyle="--")
ax.set_xlabel(label_from_attrs(cleo_lwc_mean))
ax.set_ylabel(label_from_attrs(cloud_composite_lwc_mean))
fig.suptitle("Liquid water content comparison\nbetween CLEO and ATR")
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
ax.set_box_aspect(1)

fig.tight_layout()
fig.savefig(fig_dir / "lwc_comparison_cleo_atr.svg")
ax.set_xlim(0, 0.6)
ax.set_ylim(0, 0.6)
fig.tight_layout()
fig.savefig(fig_dir / "lwc_comparison_cleo_atr_zoom.svg")

In [None]:
psd_datas = dict()

for cloud_id in null_microphysics["cloud_id"]:
    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds_match = cloud_composite.sel(time=slice(start, end))
    psd = ds_match["particle_size_distribution"]
    psd = psd.expand_dims(dim=dict(cloud_id=[cloud_id])).drop("time")

    psd_datas[str(cloud_id.values)] = psd

cleo_psd_mean = (
    condensation["xi_per_volume"]
    .sel(gridbox=condensation["max_gridbox"])
    .sel(time=time_slice)
    .mean("time", keep_attrs=True)
    .compute()
)
cleo_psd_init = (
    condensation["xi_per_volume"].sel(gridbox=condensation["max_gridbox"]).sel(time=0).compute()
)
cleo_psd_init_nmp = (
    null_microphysics["xi_per_volume"]
    .sel(gridbox=null_microphysics["max_gridbox"])
    .sel(time=0)
    .compute()
)

In [None]:
from sdm_eurec4a.input_processing import transfer
import lmfit
from sdm_eurec4a.reductions import shape_dim_as_dataarray


def fit_particle_size_distribution(
    ds_cloudcomposite: xr.Dataset,
    particle_split_radius: float = 45e-6,  # 45 micrometer
) -> transfer.PSD_LnNormal:
    """
    Fits the particle size distribution (PSD) of cloud and rain droplets
    idependently.

    Note
    ----
    The PSD is fitted with a bimodal Lognormal distribution.
    For the cloud droplets, the PSD is fitted with
    - geometric mean between 0.1 micrometer and the split radius.
    - geometric sigma between 0 and 1.7.
    For the rain droplets, the PSD is fitted with
    - geometric mean within the range of radius values provided.

    Parameters
    ----------
    ds_cloudcomposite : xr.Dataset
        Dataset containing the cloud composite data.
    particle_split_radius : float, optional
        The radius at which to split the data into cloud and rain droplets. Default is 45 micrometers.

    Returns
    -------
    psd_fit : transfer.PSD_LnNormal
        The fitted particle size distribution.
    """

    # Split data into cloud and rain
    ds_small_droplets = ds_cloudcomposite.sel(radius=slice(None, particle_split_radius))
    ds_rain_droplets = ds_cloudcomposite.sel(radius=slice(particle_split_radius, None))

    # ======================================
    # Fit the PSDs
    # ======================================

    # Use the PSD_LnNormal model
    psd_rain_fit = transfer.PSD_LnNormal()
    psd_cloud_fit = transfer.PSD_LnNormal()

    # ---------
    # Rain
    # ---------
    data = ds_rain_droplets
    radi2d = shape_dim_as_dataarray(da=data, output_dim="radius")
    psd_model = psd_rain_fit.get_model()

    # update geometric mean to be within range of the data
    psd_rain_fit.update_individual_model_parameters(
        lmfit.Parameter(
            name="geometric_means",
            min=data["radius"].min().data,
            max=data["radius"].max().data,
        )
    )

    # fit model parameters and update them
    model_result = psd_model.fit(
        data=data.data, radii=radi2d.data, params=psd_rain_fit.get_model_parameters(), nan_policy="omit"
    )
    psd_rain_fit.lmfitParameterValues_to_dict(model_result.params)

    # ---------
    # Small cloud and drizzle
    # ---------
    # For this, the parameters need to be updated

    # update geometric mean to be within range of 0.1 micrometer and the split radius
    psd_cloud_fit.update_individual_model_parameters(
        lmfit.Parameter(
            name="geometric_means",
            value=1e-5,
            min=0.1e-6,  # at least 0.1 micrometer
            max=particle_split_radius,  # at most the split radius (default 45 micrometer)
        )
    )
    # update geometric sigma to be within range of 0 and 1.7.
    # NOTE: No real physical meaning, but it is a good range for the fit
    psd_cloud_fit.update_individual_model_parameters(
        lmfit.Parameter(
            name="geometric_sigmas",
            value=1.1,
            min=0,
            max=1.7,
        )
    )

    data = ds_small_droplets
    radi2d = shape_dim_as_dataarray(da=data, output_dim="radius")
    psd_model = psd_cloud_fit.get_model()

    # fit model parameters and update them
    model_result = psd_model.fit(
        data=data.data, radii=radi2d.data, params=psd_cloud_fit.get_model_parameters(), nan_policy="omit"
    )
    psd_cloud_fit.lmfitParameterValues_to_dict(model_result.params)

    # --------
    # Combine the fits
    # --------

    psd_fit = psd_rain_fit + psd_cloud_fit

    return psd_fit

In [None]:
from sdm_eurec4a.visulization import ncols_nrows_from_N
from sdm_eurec4a.input_processing.transfer import fit_lnnormal_for_psd, fit_2lnnormal_for_psd
from sdm_eurec4a.conversions import lwc_from_psd


np.random.seed(879345)
random_cloud_ids = np.random.choice(condensation["cloud_id"], 9, replace=False)

psd_fits = dict()
lwc_fits = dict()
for cloud_id in random_cloud_ids:
    cloud_id_str = str(cloud_id)
    psd = psd_datas[cloud_id_str]
    psd = psd.where(psd != 0, drop=True)
    psd = psd.mean("cloud_id")
    psd_fit = fit_particle_size_distribution(
        ds_cloudcomposite=psd,
    )
    psd_fits[cloud_id_str] = psd_fit

    psd_fit = psd_fits[cloud_id_str].eval_func(psd["radius"])

from sdm_eurec4a.conversions import lwc_from_psd

lwc_cleo_init = 1e3 * lwc_from_psd(
    ds=xr.Dataset(data_vars=dict(particle_size_distribution=cleo_psd_init)),
    sum_dim="radius_bins",
    scale_name="radius_bins",
    scale_factor=1e-6,
)
lwc_cleo_init.attrs.update(units="$g m^{-3}$", long_name="CLEO Liquid water content")
# calculate LWC
for cloud_id in random_cloud_ids:
    cloud_id_str = str(cloud_id)
    psd = psd_datas[cloud_id_str]
    psd_fit = psd_fits[cloud_id_str].eval_func(psd["radius"])

    lwc_fit = 1e3 * lwc_from_psd(xr.Dataset(data_vars=dict(particle_size_distribution=psd_fit)))
    lwc_fits[cloud_id_str] = lwc_fit

In [None]:
fig, axs = plt.subplots(
    figsize=(15, 15),
    sharex=True,
    sharey=True,
    **ncols_nrows_from_N(len(random_cloud_ids)),
)

for idx, cloud_id in enumerate(random_cloud_ids):
    cloud_id_str = str(cloud_id)
    cc_psd = psd_datas[cloud_id_str].mean("cloud_id")
    psd_fit = psd_fits[cloud_id_str]

    sel_cleo_psd_mean = cleo_psd_mean.sel(cloud_id=cloud_id)
    sel_cleo_psd_init = cleo_psd_init.sel(cloud_id=cloud_id)
    sel_cleo_psd_init_nmp = cleo_psd_init_nmp.sel(cloud_id=cloud_id)

    lwc_cleo = lwc_cleo_init.sel(cloud_id=cloud_id)
    lwc_fit = lwc_fits[cloud_id_str]
    lwc_atr = cloud_composite_lwc_mean.sel(cloud_id=cloud_id)

    ax = axs.flatten()[idx]

    ax.plot(
        1e6 * cc_psd["radius"],
        cc_psd,
        marker="+",
        linestyle="None",
        color="grey",
        alpha=0.5,
    )

    ax.plot(
        np.nan,
        np.nan,
        marker="+",
        linestyle="None",
        color="grey",
        alpha=1,
        label=f"ATR data {lwc_atr.values:.2f} $g m^{{-3}}$",
    )

    ax.plot(
        1e6 * cc_psd["radius"],
        psd_fit.eval_func(cc_psd["radius"]),
        linestyle="-",
        color="b",
        label=f"ATR Fit {lwc_fit.values:.2f} $g m^{{-3}}$",
    )

    ax.plot(
        sel_cleo_psd_init["radius_bins"],
        sel_cleo_psd_init,
        marker="x",
        linestyle="-",
        color="r",
        label=f"CLEO init {lwc_cleo.values:.2f} $g m^{{-3}}$",
    )

    # ax.plot(
    #     sel_cleo_psd_mean["radius_bins"],
    #     sel_cleo_psd_mean,
    #     marker = "x",
    #     linestyle = "-",
    #     color = "r",
    #     label = f"CLEO stationary"
    # )

    ax.legend()
    ax.set_title(f"Cloud ID: {cloud_id}")


ax = axs.flatten()[0]
ax.set_xscale("log")
ax.set_yscale("symlog", linthresh=1e0, linscale=1)
ax.set_ylim(0, 1e8)

for ax in axs[-1, :]:
    ax.set_xlabel("Radius [µm]")

for ax in axs[:, 0]:
    ax.set_ylabel("Number concentration $[\# m^{-3} (log(\mu m))^{-1}]$")


fig.savefig(fig_dir / "comparison_psd_fit_and_lwc_cleo_atr.svg")
fig.savefig(fig_dir / "comparison_psd_fit_and_lwc_cleo_atr.pdf")

## LWC for all microphysics

In [None]:
datas = [
    condensation,
    collision_condensation,
    coalbure_condensation_cke,
    coalbure_condensation_large,
    coalbure_condensation_small,
]

bins = np.arange(-500, 50, 2)

fig, ax = plt.subplots(figsize=(10, 5))


for data in datas:
    mp = data.attrs["microphysics_short"]
    print(mp)
    color = colors_dict[mp]

    data = data.sel(time=time_slice)

    lh_sum = (data["latent_heating_full"] * data["gridbox_thickness"]).sum("gridbox")

    lh_sum.attrs["units"] = "Wm^{-2}"
    lh_sum.attrs["long_name"] = "Column int. latent heating"

    lh_sum_time_mean, lh_sum_time_sem = mean_and_stderror_of_mean(
        data=lh_sum,
        dims=("time",),
    )
    lh_sum_time_mean = lh_sum_time_mean.compute()
    lh_sum_time_sem = lh_sum_time_sem.compute()

    lh_sum_cloud_mean, lh_sum_cloud_sem = mean_and_stderror_of_mean(
        data=lh_sum_time_mean,
        dims=("cloud_id",),
        data_std=lh_sum_time_sem,
    )
    lh_sum_cloud_mean = lh_sum_cloud_mean.compute()
    lh_sum_cloud_sem = lh_sum_cloud_sem.compute()

    ax.hist(
        lh_sum_time_mean,
        bins=bins,
        color=[0.85, 0.85, 0.85],
        alpha=1,
        density=True,
        zorder=1,
    )
    ax.axvline(
        lh_sum_cloud_mean,
        color=color,
        label="Mean",
        zorder=3,
    )

    ax.fill_betweenx(
        [0, 0.1],
        lh_sum_cloud_mean - 2 * lh_sum_cloud_sem,
        lh_sum_cloud_mean + 2 * lh_sum_cloud_sem,
        color=color,
        alpha=0.1,
        label="2 SEM",
        zorder=2,
    )

    # ax.axvline(
    #     lh_sum_time_mean.compute().median("cloud_id"),
    #     color="red",
    #     label="Median",
    #     linestyle = "--"
    # )


ax.set_xlabel(label_from_attrs(lh_sum))
ax.set_ylabel("Density")
ax.set_title(f"Histogram of {label_from_attrs(lh_sum, return_units=False)}")

ax.set_ylim(0, 0.065)

ax.set_xlim(-100, -10)
fig.savefig(fig_dir / "histogram_column_integrated_latent_heating_all_microphysics.svg")
fig.savefig(fig_dir / "histogram_column_integrated_latent_heating_all_microphysics.pdf")

condensation
collision_condensation
coalbure_condensation_cke
coalbure_condensation_large
coalbure_condensation_small


# Impact of thermodynamics

``NOTE:`` 

**It is good to see, that the evaporation rates are in line with the literature**

*... of 15–352 Wm−2 over a 700 m deep sub-cloud layer is equivalent to 2–50 K d−1 of evaporative cooling. This is comparable to the typical stratocumulus cloud-top radiative longwave cooling (4–10 K d−1) and with the rain evaporation cooling rate at cloud base in the marine sub-cloud stratocumulus deck of 2–20 K d−1 (shown in Wood, 2005).*
([Sarkar et al., 2023, p. 12685](zotero://select/library/items/G2B2A8IK)) ([pdf](zotero://open-pdf/library/items/ZNPPEKFT?page=15&annotation=K6IIGBHX))

We can see that the height of the cloud does alter the mean evaporation rate slightly. 
With more evaporation for lower clouds.
Why this is the case, we do not yet know.
But this can also be due to higher cloud beeing sampled at different days!

In the end there seems to be no big correlation.

BUT: certainly the height does play a big role in terms of total evaporated rate.

#### Thermodynamics

In [None]:
data = condensation

latent_heating = 1e3 * data["latent_heating"]
latent_heating.where(data["sub_cloud_layer_mask"])
latent_heating.attrs["long_name"] = data["latent_heating"].attrs["long_name"]
latent_heating.attrs["units"] = "m" + data["latent_heating"].attrs["units"].replace("$", "")


latent_heating_anomaly = 1e3 * (
    data["latent_heating"] - data["latent_heating"].mean("gridbox", keep_attrs=True)
)
latent_heating_anomaly.attrs["long_name"] = "Latent heating vertical anomaly"
latent_heating_anomaly.attrs["units"] = latent_heating.attrs["units"]

In [None]:
m = (
    data["mass_represented_per_volume"]
    .sum("radius_bins")
    .sel(time=time_slice)
    .mean("time", keep_attrs=True)
    .astype(float)
)
m = m.where(data["sub_cloud_layer_mask"]).compute()
m_anomaly = m - m.mean("gridbox", keep_attrs=True).compute()

m_init_1d = m.sel(gridbox=data["max_gridbox"] - 1).drop("gridbox").compute()
m_init = m_init_1d.expand_dims(gridbox=data["gridbox"])

rh = data["relative_humidity"]
rh = rh.where(data["sub_cloud_layer_mask"]).compute()
rh_anomaly = (rh - rh.mean("gridbox")).compute()

coord3 = data["gridbox_coord3"]

In [None]:
# calculat the slope of a linear regression for the relative humidity for all clouds


def slope_intercept(x, y, **kwargs):
    # print(f"received {type(x)} shape: {x.shape}")
    # print(f"received {type(y)} shape: {y.shape}")
    try:
        idx = np.isfinite(x) & np.isfinite(y)
        slope, intercept = np.polyfit(x[idx], y[idx], **kwargs)
    except:
        return np.nan, np.nan
    return slope, intercept


rh_slopes, rh_inter = xr.apply_ufunc(
    slope_intercept,
    data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    rh.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)
latent_heating_slopes, latent_heating_inter = xr.apply_ufunc(
    slope_intercept,
    data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    latent_heating.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)

In [None]:
# Pure
print("plot latent heating")
fig, ax = plt.subplots(figsize=(10, 4.5))
sc = ax.scatter(
    m,
    rh,
    c=latent_heating,
    marker=".",
    alpha=0.8,
    cmap="inferno",
    vmin=-100,
    vmax=0,
)
ax.set_xlabel(label_from_attrs(data["mass_represented_per_volume"]))
ax.set_ylabel(label_from_attrs(data["relative_humidity"]))
fig.colorbar(sc, ax=ax, label=label_from_attrs(latent_heating))

fig.suptitle("Latent heating against mass in gridbox and relative humidity")

# Anolamy

fig, ax = plt.subplots(figsize=(10, 4.5))
sc = ax.scatter(
    m_anomaly,
    rh_anomaly,
    c=latent_heating_anomaly,
    marker=".",
    alpha=0.8,
    cmap="RdBu",
    vmin=-30,
    vmax=30,
)
ax.set_xlabel(label_from_attrs(data["mass_represented_per_volume"]))
ax.set_ylabel(label_from_attrs(data["relative_humidity"]))
fig.colorbar(sc, ax=ax, label=label_from_attrs(latent_heating_anomaly))

fig.suptitle("Latent heating anomalyagainst mass in gridbox and relative humidity")

plot latent heating


Text(0.5, 0.98, 'Latent heating anomalyagainst mass in gridbox and relative humidity')

In [None]:
# calculat the slope of a linear regression for the relative humidity for all clouds


def slope_intercept(x, y, **kwargs):
    # print(f"received {type(x)} shape: {x.shape}")
    # print(f"received {type(y)} shape: {y.shape}")
    try:
        idx = np.isfinite(x) & np.isfinite(y)
        slope, intercept = np.polyfit(x[idx], y[idx], **kwargs)
    except:
        return np.nan, np.nan
    return slope, intercept


rh_slopes, rh_inter = xr.apply_ufunc(
    slope_intercept,
    data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    rh.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)
latent_heating_slopes, latent_heating_inter = xr.apply_ufunc(
    slope_intercept,
    data["gridbox_coord3"].chunk(dict(cloud_id=-1)).compute(),
    latent_heating.chunk(dict(cloud_id=-1)).compute(),
    input_core_dims=[["gridbox"], ["gridbox"]],
    output_core_dims=[[], []],
    exclude_dims={"gridbox"},
    # output_sizes={"slope": 1, "intercept": 1},
    # output_dtypes=[[float], [float]],
    vectorize=True,
    dask="parallelized",
    kwargs=dict(deg=1),
)

In [None]:
varibles = [rh_slopes, m_init_1d]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    axs[i].scatter(
        var,
        latent_heating_slopes,
        marker=".",
        alpha=0.5,
        label="Individual clouds",
    )
    linear_fit_plot(
        ax=axs[i],
        x=var,
        y=latent_heating_slopes,
    )
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating))
    axs[i].set_title(f"{label_from_attrs(latent_heating)} against {label_from_attrs(var)}")
    axs[i].legend()

In [None]:
varibles = [rh_slopes, m_init_1d]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    axs[i].scatter(
        var,
        latent_heating_slopes,
        marker=".",
        alpha=0.5,
        label="Individual clouds",
    )
    linear_fit_plot(
        ax=axs[i],
        x=var,
        y=latent_heating_slopes,
    )
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating))
    axs[i].set_title(f"{label_from_attrs(latent_heating)} against {label_from_attrs(var)}")
    axs[i].legend()

In [None]:
varibles = [rh, m]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    axs[i].scatter(
        var,
        latent_heating,
        marker=".",
        alpha=0.5,
        label="Individual clouds",
    )
    linear_fit_plot(
        ax=axs[i],
        x=var,
        y=latent_heating,
    )
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating))
    axs[i].set_title(f"{label_from_attrs(latent_heating)} against {label_from_attrs(var)}")
    axs[i].legend()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

varibles = [rh_slopes, m_init_1d]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    x = var.data
    y = latent_heating_slopes.values
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]

    X = np.column_stack((x,))

    X = sm.add_constant(X)

    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())

    pred_ols = results.get_prediction()
    iv_l = pred_ols.summary_frame()["obs_ci_lower"]
    iv_u = pred_ols.summary_frame()["obs_ci_upper"]

    axs[i].plot(x, y, "o", label="data")
    axs[i].plot(x, results.fittedvalues, "r-", label="OLS")
    axs[i].plot(x, iv_u, "r:")
    axs[i].plot(x, iv_l, "r:")
    axs[i].legend(loc="best")
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating_slopes))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

varibles = [rh, m]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    x = var.values
    y = latent_heating.values
    x = x.flatten()
    y = y.flatten()
    idx = np.isfinite(x) & np.isfinite(y)
    x = x[idx]
    y = y[idx]
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]

    X = np.column_stack((x,))

    X = sm.add_constant(X)

    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())

    pred_ols = results.get_prediction()
    iv_l = pred_ols.summary_frame()["obs_ci_lower"]
    iv_u = pred_ols.summary_frame()["obs_ci_upper"]

    axs[i].plot(x, y, "o", label="data")
    axs[i].plot(x, results.fittedvalues, "r-", label="OLS")
    axs[i].plot(x, iv_u, "r:")
    axs[i].plot(x, iv_l, "r:")
    axs[i].legend(loc="best")
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating_slopes))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

varibles = [
    rh,
    m,
]

fig, axs = plt.subplots(figsize=(16, 6), ncols=3)

for i, var in enumerate(varibles):
    x = var.values
    y = latent_heating_anomaly.values
    x = x.flatten()
    y = y.flatten()
    idx = np.isfinite(x) & np.isfinite(y)
    x = x[idx]
    y = y[idx]
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]

    X = np.column_stack((x,))

    X = sm.add_constant(X)

    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())

    pred_ols = results.get_prediction()
    iv_l = pred_ols.summary_frame()["obs_ci_lower"]
    iv_u = pred_ols.summary_frame()["obs_ci_upper"]

    axs[i].scatter(x, y, marker=".", label="data")
    axs[i].plot(x, results.fittedvalues, "r-", label="OLS")
    axs[i].plot(x, iv_u, "r:")
    axs[i].plot(x, iv_l, "r:")
    axs[i].legend(loc="best")
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating_slopes))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

varibles = [rh_anomaly, m_anomaly]

fig, axs = plt.subplots(figsize=(16, 6), ncols=2)

for i, var in enumerate(varibles):
    x = var.values
    y = latent_heating_anomaly.values
    x = x.flatten()
    y = y.flatten()
    idx = np.isfinite(x) & np.isfinite(y)
    x = x[idx]
    y = y[idx]
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]

    X = np.column_stack((x,))

    X = sm.add_constant(X)

    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())

    pred_ols = results.get_prediction()
    iv_l = pred_ols.summary_frame()["obs_ci_lower"]
    iv_u = pred_ols.summary_frame()["obs_ci_upper"]

    axs[i].scatter(x, y, marker=".", label="data")
    axs[i].plot(x, results.fittedvalues, "r-", label="OLS")
    axs[i].plot(x, iv_u, "r:")
    axs[i].plot(x, iv_l, "r:")
    axs[i].legend(loc="best")
    axs[i].set_xlabel(label_from_attrs(var))
    axs[i].set_ylabel(label_from_attrs(latent_heating_slopes))

In [None]:
# here we want to analyse the realtion of the latent heating slope to :
# 1. the relative humidity slope
# 2. the initial mass in the gridbox

# further the correlation coefficients will be calcultaed and a linear regression will be performed
# the linear regression shall be plotted with its uncertainty


fig, axs = plt.subplots(figsize=(14, 5), ncols=2)

axs[0].scatter(
    rh_slopes,
    latent_heating_slopes,
    # linestyle = '-',
    # marker=".",
    alpha=1,
)

axs[1].scatter(
    m_init_1d,
    latent_heating_slopes,
    # linestyle = '-',
    # marker=".",
    alpha=1,
)

<matplotlib.collections.PathCollection at 0x7ffe642306e0>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

# Example data
x = rh_slopes
y = latent_heating_slopes

# sort by x
idx = np.argsort(x)
x = x.data[idx]
y = y.data[idx]

# Perform linear regression
slope, intercept, r_value, p_value, std_err = linregress(x, y)

# Calculate the regression line
regression_line = slope * x + intercept

# Calculate the uncertainty (standard error)
slope_uncertainty = std_err
intercept_uncertainty = std_err * np.sqrt(np.sum((x - np.mean(x)) ** 2) / len(x))

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(x, y, label="Data points")
plt.plot(x, regression_line, color="red", label="Linear regression")
plt.fill_between(
    x,
    regression_line - slope_uncertainty,
    regression_line + slope_uncertainty,
    color="red",
    alpha=0.2,
    label="Uncertainty",
)
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.title("Linear Regression with Uncertainty")
plt.show()
print(f"Slope: {slope:.3f} ± {slope_uncertainty:.3f}")
print(f"Intercept: {intercept:.3f} ± {intercept_uncertainty:.3f}")
print(f"R-squared: {r_value**2:.3f}")
print(f"P-value: {p_value:.3f}")

Slope: 2.644 ± 0.234
Intercept: -0.016 ± 0.002
R-squared: 0.670
P-value: 0.000


In [None]:
plt.plot(x, regression_line + slope_uncertainty)
plt.fill_betweenx(x, regression_line - slope_uncertainty, regression_line - slope_uncertainty)

<matplotlib.collections.PolyCollection at 0x7ffe244d7a10>

In [None]:
fig, axs = plt.subplots(figsize=(14, 5), ncols=2)

axs[0].scatter(
    rh_anomaly,
    latent_heating_anomaly,
    marker=".",
    alpha=0.5,
)
axs[1].scatter(
    m_init.T,
    latent_heating_anomaly,
    marker=".",
    alpha=0.5,
)

<matplotlib.collections.PathCollection at 0x7ffe441e2840>

In [None]:
print(xr.corr(rh, latent_heating).values)
print(xr.corr(rh_anomaly, latent_heating_anomaly).values)
print(xr.cov(rh, latent_heating_anomaly).values)
print(xr.cov(m, latent_heating).values)
print(xr.cov(m_anomaly, latent_heating_anomaly).values)
print(xr.cov(m, latent_heating_anomaly).values)

0.4981414062727827
0.8972324609381398
30.123976579476608
-0.37322667649540714
0.008303319842886014
0.008303319842886013


In [None]:
xr.corr(
    m,
    latent_heating_anomaly,
).values

array(0.03129359)

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))

ax.scatter(
    condensation["relative_humidity"],
    condensation["latent_heating"],
    marker=".",
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(condensation["relative_humidity"]))
ax.set_ylabel(label_from_attrs(condensation["latent_heating"]))
fig.suptitle("Latent heating rate vs. relative humidity")
fig.savefig(fig_dir / "latent_heating_against_humidity.svg")
fig, ax = plt.subplots(figsize=(7, 5))

ax.scatter(
    condensation["mass_represented_per_volume"].sum("radius_bins").sel(time=time_slice).mean("time"),
    condensation["latent_heating"],
    marker=".",
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(condensation["mass_represented_per_volume"]))
ax.set_ylabel(label_from_attrs(condensation["latent_heating"]))
fig.suptitle("Latent heating rate vs. mass")
fig.savefig(fig_dir / "latent_heating_against_mass.svg")

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))


latent_heating_anomaly = condensation["latent_heating"] - condensation["latent_heating"].mean(
    "gridbox", keep_attrs=True
)
latent_heating_anomaly.attrs["long_name"] = "Latent heating anomaly"
latent_heating_anomaly.attrs["units"] = condensation["latent_heating"].attrs["units"]

ax.scatter(
    condensation["relative_humidity"],
    latent_heating_anomaly,
    marker=".",
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(condensation["relative_humidity"]))
ax.set_ylabel(label_from_attrs(latent_heating_anomaly))
fig.suptitle("Latent heating anomaly vs. relative humidity")
fig.savefig(fig_dir / "latent_heating_anomaly_against_humidity.svg")
fig, ax = plt.subplots(figsize=(7, 5))

ax.scatter(
    condensation["mass_represented_per_volume"].sum("radius_bins").sel(time=time_slice).mean("time"),
    latent_heating_anomaly,
    marker=".",
    alpha=0.5,
)
ax.set_xlabel(label_from_attrs(condensation["mass_represented_per_volume"]))
ax.set_ylabel(label_from_attrs(latent_heating_anomaly))
fig.suptitle("Latent heating anomaly vs. mass")
fig.savefig(fig_dir / "latent_heating_anomaly_against_mass.svg")