In [None]:
import warnings
import numpy as np
from scipy.stats import norm
import xarray as xr
import matplotlib.pyplot as plt
import lmfit
from typing import Union, List, Tuple

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
)
from sdm_eurec4a.identifications import select_individual_cloud_by_id, match_clouds_and_cloudcomposite

from sdm_eurec4a.reductions import mean_and_stderror_of_mean

warnings.filterwarnings("ignore")

default_colors = set_custom_rcParams()

In [None]:
import random


def random_integers_sum_to_n(parts, n):
    """
    Divide an integer n into a given number of diverse/random non-zero integers.

    Parameters:
    n (int): The integer to be divided.
    parts (int): The number of parts to divide the integer into.

    Returns:
    List[int]: A list of integers that sum up to n.
    """
    if parts > n:
        raise ValueError("Number of parts cannot be greater than the integer itself.")

    # Generate random break points
    break_points = sorted(random.sample(range(1, n), parts - 1))
    print(break_points)

    # Create the parts by calculating the differences between break points
    result = [b - a for a, b in zip([0] + break_points, break_points + [n])]

    return result

In [None]:
def ln_normal_distribution(
    x: np.ndarray, scale_factor: float, geometric_mean: float, geometric_sigma: float
) -> np.ndarray:
    sigtilda = np.log(geometric_sigma)
    mutilda = np.log(geometric_mean)

    norm = scale_factor / (np.sqrt(2 * np.pi) * sigtilda)
    exponent = -((np.log(x) - mutilda) ** 2) / (2 * sigtilda**2)

    dn_dlnr = norm * np.exp(exponent)  # eq.5.8 [lohmann intro 2 clouds]

    return dn_dlnr


def normal_distribution(x, mu, sigma, scale_factor):
    return scale_factor * 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))


def diff_same_size(x: np.ndarray):
    """
    This function gives you the width between x values.
    """

    # Calculate differences between consecutive x values
    diffs = np.diff(x)

    # Initialize dx array with zeros
    dx = np.zeros_like(x)

    # For each x value (except the first and last), calculate the average of the differences with its neighbors
    dx[1:-1] = (diffs[:-1] + diffs[1:]) / 2

    # For the first and last x values, use linear interpolation
    dx[0] = diffs[0]
    dx[-1] = diffs[-1]

    return dx

# Create minimum problem example

In [None]:
class TestData:
    def __init__(
        self,
        x: Union[np.ndarray, List, Tuple],
        y: Union[np.ndarray, List, Tuple],
        dx: Union[np.ndarray, List, Tuple],
        name: str = "",
    ):
        self.x = x
        self.y = y
        self.dx = dx

        self.y_normalized = self.y / self.dx
        self.name = name

    def __getitem__(self, key):
        return self.__dict__[key]

    def __setitem__(self, key, value):
        self.__dict__[key] = value

    def resample(self, width: np.ndarray) -> "TestData":
        """
        Resample the data in non uniform intervals given by width array
        The width array elements need to sum up to the length of the data array

        Parameters:
        -----------
        width: array
            array with the width of the intervals

        Returns:
        --------
        TestData object with resampled data
        """

        assert np.sum(width) == len(self.x)

        end = np.cumsum(width)
        start = end - width

        x = []
        y = []
        dx = []
        for i, (s, e) in enumerate(zip(start, end)):

            x.append(np.mean(self.x[s:e]))
            y.append(np.sum(self.y[s:e]))
            dx.append(np.sum(self.dx[s:e]))

        return TestData(np.array(x), np.array(y), np.array(dx))

    def normalize(self) -> "TestData":
        """
        Normalize the data by dividing the y values by the dx values
        """
        return TestData(
            x=self.x,
            y=self.y / self.dx,
            dx=self.dx,
        )

    def __str__(self) -> str:
        return f"{self.name}\nx: {self.x},\ny: {self.y},\ndx: {self.dx}"

    def plot_bar(self, ax=None, normalized=False, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()

        x = self.x
        dx = self.dx

        if normalized:
            y = self.y_normalized
        else:
            y = self.y

        ax.bar(x=x, height=y, width=dx, **kwargs)
        # ax.scatter(
        #     x,
        #     y,
        #     marker = 'x',
        #     color = kwargs.get('edgecolor', 'black'),
        #     )
        return ax

    def plot_scatter(self, ax=None, normalized=False, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()

        x = self.x
        dx = self.dx

        if normalized:
            y = self.y_normalized
        else:
            y = self.y

        ax.scatter(
            x=x,
            y=y,
            # marker = 'o',
            **kwargs,
        )
        # ax.scatter(
        #     x,
        #     y,
        #     marker = 'x',
        #     color = kwargs.get('edgecolor', 'black'),
        #     )
        return ax

    @property
    def fit_result(self):
        return self._fit_result

    @fit_result.setter
    def fit_result(self, fit_result):
        self._fit_result = fit_result

    @fit_result.getter
    def fit_result(self):
        return self._fit_result


# set up normal distribution with observations in equal intervals
x_equal = np.arange(-6, 7, 1, dtype=float)
dx_equal = np.full_like(a=x_equal, fill_value=x_equal[1] - x_equal[0])
y_equal = norm.pdf(x_equal)

td1 = TestData(x_equal, y_equal, dx_equal, name="equal")

# resample data in non uniform intervals
width = np.array((2, 1, 1, 1, 1, 3, 1, 2, 1))
td2 = td1.resample(width)
td2.name = "uneq.1"

# resample data in non uniform intervals
width = np.array((2, 1, 3, 3, 1, 2, 1))
td3 = td1.resample(width)
td3.name = "uneq.2"


td1_normalized = td1.normalize()
td2_normalized = td2.normalize()
td3_normalized = td3.normalize()

for td in (td1_normalized, td2_normalized, td3_normalized):
    print(td.dx)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[2. 1. 1. 1. 1. 3. 1. 2. 1.]
[2. 1. 3. 3. 1. 2. 1.]


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 3), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

style = dict(
    # color = 'None',
    alpha=0.8,
    linewidth=2,
)

markers = ["o", "x", "s"]
for i, td in enumerate([td1, td2, td3]):
    td.plot_scatter(
        ax=ax,
        normalized=False,
        label=td.name,
        marker=markers[i],
        # edgecolor = default_colors[i],
        color=default_colors[i],
        **style,
    )

for i, td in enumerate([td1_normalized, td2_normalized, td3_normalized]):
    td.plot_bar(
        ax=ax_norm, normalized=True, label=td.name, edgecolor=default_colors[i], color="None", **style
    )

for _ax in axs:
    _ax.set_xlabel("x")
    _ax.legend()

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

# ax.set_xscale('log')

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


Text(0, 0.5, 'probability')

### Describe the problem

In [None]:
def normal_distribution(x, mu, sigma, scale_factor):
    return scale_factor * 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))


lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))

params = lmfit.Parameters()
params.add("scale_factor", value=1)
params.add("mu", value=2)
params.add("sigma", value=2)

# fit the log nornmal distribution to the data of all three TestData objects
for td in (td1, td2, td3, td1_normalized, td2_normalized, td3_normalized):
    td.fit_result = lm_mod.fit(data=td.y, x=td.x, **params)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 3.5), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

x = np.arange(-6, 6, 0.1)
for td in (td1, td2, td3):
    # make sure to use the same color for data and fit
    lines = ax.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    ax.plot(x, td.fit_result.eval(x=x), color=color)

for td in (td1_normalized, td2_normalized, td3_normalized):
    # make sure to use the same color for data and fit
    lines = ax_norm.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    ax_norm.plot(x, td.fit_result.eval(x=x), color=color)

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

Text(0, 0.5, 'probability')

### How to maintain the Integral over the quantitiy with different x spacings

The sum of the data is equal in both cases.

In [None]:
assert td2.y.sum() == td1.y.sum() == td3.y.sum()
for td in (td1, td2, td3):
    print(f"{td.name} {td.y.sum()/ td1.y.sum()}")

equal 1.0
uneq.1 1.0
uneq.2 1.0


But the FITS using different x spacings does not give the same sum over the values.

This needs to be solved.
Ask Clara, how she did this.

In [None]:
top = "\t"
for td_x in (td1, td2, td3):
    top += f"{td_x.name}\t"
print(top)

result = np.zeros((3, 3))

for i, td_x in enumerate((td1, td2, td3)):
    for j, td_y in enumerate((td1, td2, td3)):
        result[i, j] = np.sum(td_x.fit_result.eval(x=td_y.x))

    print(f"{td_x.name}\t{np.round(result[i, :], 4)}")

	equal	uneq.1	uneq.2	
equal	[1.     0.5469 0.3005]
uneq.1	[1.9312 0.9871 0.7387]
uneq.2	[3.9657 2.249  1.0003]


### LogNormal Case


In [None]:
np.random.seed(42)

# set up normal distribution with observations in equal intervals
x_equal = np.arange(-4, 6, 0.5, dtype=float)
x_equal = np.arange(-4, 6, 0.25, dtype=float)
x_equal = np.exp(x_equal)

dx_equal = diff_same_size(x_equal)
y_equal = ln_normal_distribution(x_equal, scale_factor=1, geometric_mean=1, geometric_sigma=2)

td1_ln = TestData(x_equal, y_equal, dx_equal, name="equal")

# resample data in non uniform intervals

N = len(x_equal)

random.seed(42)
width = random_integers_sum_to_n(14, N)
td2_ln = td1_ln.resample(width)
td2_ln.name = "uneq.1"

# resample data in non uniform intervals

width = random_integers_sum_to_n(7, N)
td3_ln = td1_ln.resample(width)
td3_ln.name = "uneq.2"


td1_ln_normalized = td1_ln.normalize()
td2_ln_normalized = td2_ln.normalize()
td3_ln_normalized = td3_ln.normalize()

[1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 18, 19, 38]
[2, 13, 14, 15, 27, 33]


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 3), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

style = dict(
    # color = 'None',
    alpha=0.8,
    linewidth=2,
)

markers = ["o", "x", "s"]
for i, td in enumerate([td1_ln, td2_ln, td3_ln]):
    td.plot_scatter(
        ax=ax,
        normalized=False,
        label=rf"{td.name} $\sum$ {td.y.sum():.2f}",
        # edgecolor = default_colors[i],
        marker=markers[i],
        color=default_colors[i],
        **style,
    )

for i, td in enumerate([td1_ln_normalized, td2_ln_normalized, td3_ln_normalized]):
    td.plot_scatter(
        ax=ax_norm,
        # normalized = True,
        label=rf"{td.name} $\sum$ {td.y.sum():.2f}",
        marker=markers[i],
        color=default_colors[i],
        # color = "None",
        **style,
    )

for _ax in axs.flatten():
    _ax.set_xlabel("x")
    _ax.legend(loc="upper left")

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

for _ax in axs:
    _ax.set_xscale("log")

In [None]:
lm_mod = lmfit.Model(ln_normal_distribution, independent_vars=("x",))

params = lmfit.Parameters()
params.add("scale_factor", value=1, min=0)
params.add("geometric_mean", value=3, min=0)
params.add("geometric_sigma", value=2, min=0)

# fit the log nornmal distribution to the data of all three TestData objects
for td in (
    td1_ln,
    td2_ln,
    td3_ln,
    td1_ln_normalized,
    td2_ln_normalized,
    td3_ln_normalized,
):
    try:
        td.fit_result = lm_mod.fit(
            data=td.y,
            x=td.x,
            # nan_policy='omit',
            **params,
        )
    except Exception as e:
        print(e)
        continue

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 3.5), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

x = np.arange(-6, 6, 0.01)
x = np.exp(x)
for td in (td1_ln, td2_ln, td3_ln):
    # make sure to use the same color for data and fit
    lines = ax.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    y = td.fit_result.eval(x=x)
    ax.plot(x, y, color=color, label=rf"{td.name} $\sum$ {y.sum():.2f}")

for td in (td1_ln_normalized, td2_ln_normalized, td3_ln_normalized):
    # make sure to use the same color for data and fit
    lines = ax_norm.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    y = td.fit_result.eval(x=x)
    ax_norm.plot(x, y, color=color, label=rf"{td.name} $\sum$ {y.sum():.2f}")

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

for _ax in axs:
    _ax.set_xscale("log")
    _ax.legend()

In [None]:
fig, ax = plt.subplots()

x = np.exp(np.arange(-6, 6, 0.1))
dx = diff_same_size(x)

td = td1_ln
ax.plot(td.x, td.y, "o", color="black", label=rf"original $\sum$ {td.y.sum():.2f}")


for i, td_norm in enumerate((td1_ln_normalized, td2_ln_normalized, td3_ln_normalized)):

    fit = td_norm.fit_result
    y_ln = fit.eval(x=td.x) * td.dx
    y = fit.eval(x=x) * dx

    ax.plot(td.x, y_ln, ".", color=default_colors[i], label=rf"$\sum$ {y_ln.sum():.2f}")
    ax.plot(x, y, "x", color=default_colors[i], label=rf"$\sum$ {y.sum():.2f}")


ax.set_xscale("log")
ax.legend()

<matplotlib.legend.Legend at 0x7fff246e0320>

# ATR Observations

In [None]:
cloud_composite = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/cloud_composite_si_units.nc"
)
identified_clouds = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)

attrs = cloud_composite["radius"].attrs.copy()
attrs.update({"units": "µm"})
cloud_composite["radius"] = cloud_composite["radius"]
cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
cloud_composite["radius"].attrs = attrs

# cloud_composite = cloud_composite.sel(radius = slice(10, None))

identified_clouds = identified_clouds.where(
    identified_clouds.duration.dt.total_seconds() > 100, drop=True
)
identified_clouds

In [None]:
lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))
params_rain = lmfit.Parameters()
params_rain.add("mu", value=np.log(300e-6), min=np.log(80e-6), max=np.log(2e-3))
params_rain.add("scale_factor", value=1, min=0)
params_rain.add("sigma", value=0.5, max=1)

params_cloud = lmfit.Parameters()
params_cloud.add("mu", value=np.log(0.1e-6), min=np.log(10e-6), max=np.log(50e-6))
params_cloud.add("scale_factor", value=1e5, min=0)
params_cloud.add("sigma", value=0.5, max=1)

RADIUS = cloud_composite["radius"]


# for cloud_id in identified_clouds.cloud_id:
def fit_both(cloud_id):
    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds = cloud_composite.sel(time=slice(start, end))

    ds["particle_size_distribution"] = ds["particle_size_distribution"] / ds["bin_width"]

    ds_rain = ds.sel(radius=slice(50e-6, None))
    ds_cloud = ds.sel(radius=slice(None, 50e-6))

    td_cloud = TestData(
        x=np.log(ds_cloud["radius"].expand_dims(time=ds.time).transpose("time", "radius")),
        y=ds_cloud["particle_size_distribution"].transpose("time", "radius"),
        dx=ds_cloud["bin_width"].expand_dims(time=ds.time).transpose("time", "radius"),
        name="cloud",
    )
    td_rain = TestData(
        x=np.log(ds_rain["radius"].expand_dims(time=ds.time).transpose("time", "radius")),
        y=ds_rain["particle_size_distribution"].transpose("time", "radius"),
        dx=ds_rain["bin_width"].expand_dims(time=ds.time).transpose("time", "radius"),
        name="cloud",
    )

    for td in (td_cloud, td_rain):

        # td.x = td.x.mean('time')
        # td.y = td.y.mean('time')
        # td.dx = td.dx.mean('time')

        td.x = td.x.values.flatten()
        td.y = td.y.values.flatten()
        td.dx = td.dx.values.flatten()
        args = np.isfinite(td.y) & np.isfinite(td.x)
        td.x = td.x[args]
        td.y = td.y[args]
        td.dx = td.dx[args]
    # td.y = td.y * np.exp(td.x) ** 3

    td_cloud.fit_result = lm_mod.fit(data=td_cloud.y, x=td_cloud.x, **params_cloud)
    td_rain.fit_result = lm_mod.fit(data=td_rain.y, x=td_rain.x, **params_rain)
    return td_cloud, td_rain


test_dict = {}

for cloud_id in identified_clouds.cloud_id.values:
    cloud_id_str = str(cloud_id)
    try:
        td_cloud, td_rain = fit_both(cloud_id)
    except TypeError:
        print("error in cloud_id", cloud_id)
    test_dict[cloud_id_str] = dict(
        cloud=td_cloud,
        rain=td_rain,
    )

In [None]:
cloud_id = np.random.choice(identified_clouds.cloud_id.values)
# cloud_id = 356
cloud_id_str = str(cloud_id)

ds = match_clouds_and_cloudcomposite(
    ds_clouds=select_individual_cloud_by_id(identified_clouds, cloud_id),
    ds_cloudcomposite=cloud_composite,
)
radius = ds["radius"]
dx = ds["bin_width"]
psd = ds["particle_size_distribution"]


td_rain = test_dict[cloud_id_str]["rain"]
x = np.log(radius)
psd_fit = td_rain.fit_result.eval(x=x) * dx
lwc_fit = 1000 * psd_fit * 4 / 3 * np.pi * RADIUS**3


fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 3))
ax.plot(psd.radius, psd, marker=".", linestyle="None", color="k", alpha=0.2)
ax.plot(radius, psd_fit, color="r", linestyle=":")

ax.set_xscale("log")
ax.set_yscale("symlog", linthresh=1, linscale=0.1)
ax.set_ylim(-1, None)

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 3), width_ratios=[1, 0.07])

ax = axs[0]
cax = axs[1]

msd = psd * psd["radius"] ** 3
lwc = ds["liquid_water_content"]

pcm = ax.pcolormesh(
    psd.time,
    psd.radius,
    1e9 * msd,
    cmap="Blues",
    shading="nearest",
    vmin=0,
    vmax=50,
)
fig.colorbar(pcm, cax=cax, label="MSD [mg/m³]")
ax.set_yscale("log")
ax.set_ylabel("Radius [µm]")

ax2 = ax.twinx()
ax2.plot(lwc.time, 1e3 * lwc, color="black", lw=2, linestyle=":")
ax2.axhline(1e3 * lwc.mean("time"), color="black", lw=2, linestyle="-")
# ax2.fill_between(lwc.time,
#                 1e3 * (lwc.mean('time') - lwc.std('time')),
#                 1e3 * (lwc.mean('time') + lwc.std('time')),
#                 color="black",
#                 alpha = 0.1
#     )
ax2.axhline(1e3 * lwc_fit.sum(), color="red", lw=2)
ax2.set_ylabel("LWC [g/m³]")

ax2.set_ylim(0, 2)

fig.add_axes(ax2)

fig.tight_layout()

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id.values:

    d = test_dict[str(cloud_id)]
    td_cloud = d["cloud"]
    td_rain = d["rain"]

    radius = cloud_composite.radius

    psd = td_cloud.fit_result.eval(x=np.log(radius)) + td_rain.fit_result.eval(x=np.log(radius))
    msd = 1000 * 4 / 3 * np.pi * psd * radius**3
    # msd_cumsum = ds['mass_size_distribution'].cumsum('radius')
    psd_cumsum = np.cumsum(psd)
    msd_cumsum = np.cumsum(msd)
    msd_cumsum = msd_cumsum / msd_cumsum[-1]
    psd_cumsum = psd_cumsum / psd_cumsum[-1]

    ax.plot(
        1e6 * radius,
        msd_cumsum,
        # marker = 'o',
        color="blue",
    )

    ax2.plot(
        1e6 * radius,
        psd_cumsum,
        # marker = 'x',
        color="red",
    )

ax.set_xscale("log")
ax2.set_xscale("log")

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id:

    dsi = select_individual_cloud_by_id(identified_clouds, cloud_id)

    ds = cloud_composite.sel(time=slice(dsi.start.values[0], dsi.end.values[0]))

    msd_cumsum = ds["mass_size_distribution"].cumsum("radius")
    psd_cumsum = ds["particle_size_distribution"].cumsum("radius")

    msd_cumsum = msd_cumsum / msd_cumsum.isel(radius=-1)
    psd_cumsum = psd_cumsum / psd_cumsum.isel(radius=-1)

    ax.plot(
        1e6 * msd_cumsum.radius,
        msd_cumsum.mean("time"),
        # marker = 'o',
        color="blue",
    )

    ax2.plot(
        1e6 * psd_cumsum.radius,
        psd_cumsum.mean("time"),
        # marker = 'x',
        color="red",
    )

ax.set_xscale("log")
ax2.set_xscale("log")

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id.values:

    d = test_dict[str(cloud_id)]
    td_cloud = d["cloud"]
    td_rain = d["rain"]

    radius = cloud_composite.radius

    psd = td_cloud.fit_result.eval(x=np.log(radius)) + td_rain.fit_result.eval(x=np.log(radius))
    msd = 1000 * 4 / 3 * np.pi * psd * radius**3
    # msd_cumsum = ds['mass_size_distribution'].cumsum('radius')
    psd_cumsum = np.cumsum(psd)
    msd_cumsum = np.cumsum(msd)
    msd_cumsum = msd_cumsum / msd_cumsum[-1]
    psd_cumsum = psd_cumsum / psd_cumsum[-1]

    ax.plot(
        1e6 * radius,
        msd_cumsum,
        # marker = 'o',
        color="blue",
    )

    ax2.plot(
        1e6 * radius,
        psd_cumsum,
        # marker = 'x',
        color="red",
    )

ax.set_xscale("log")
ax2.set_xscale("log")

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id.values:

    dsi = select_individual_cloud_by_id(identified_clouds, cloud_id)
    ds = cloud_composite.sel(time=slice(dsi.start.values[0], dsi.end.values[0]))
    radius = ds.radius

    m_obs, s_obs = mean_and_stderror_of_mean(ds["mass_size_distribution"].sum("radius"), ("time",))

    fit = test_dict[str(cloud_id)]
    td_cloud = fit["cloud"]
    td_rain = fit["rain"]

    psd = td_rain.fit_result.eval(x=np.log(radius))
    msd = 1000 * 4 / 3 * np.pi * psd * radius**3

    m_fit, s_fit = np.sum(msd), 0

    ax.errorbar(
        x=1e3 * m_obs,
        xerr=1e3 * s_obs,
        y=1e3 * m_fit,
        yerr=1e3 * s_fit,
        marker="o",
    )

ax.set_xlim(0, 4)
ax.set_ylim(0, 4)
ax.plot(
    ax.get_xlim(),
    ax.get_ylim(),
    color="black",
    linestyle="--",
)

[<matplotlib.lines.Line2D at 0x7ffed870e480>]

In [None]:
cloud_id = np.random.choice(identified_clouds.cloud_id.values)
# cloud_id = 273
da = select_individual_cloud_by_id(identified_clouds, cloud_id)
start = da["start"].values[0]
end = da["end"].values[0]
ds_match = cloud_composite.sel(time=slice(start, end)).sel(radius=slice(50e-6, None))

lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))
params = lmfit.Parameters()
params.add("scale_factor", value=1)
params.add("mu", value=-8, min=-11, max=-5)
params.add("sigma", value=0.1, max=5)
RADIUS = ds_match["radius"]

td_cloud = TestData(
    x=np.log(ds_match["radius"].expand_dims(time=ds_match.time).transpose("time", "radius")),
    y=ds_match["particle_size_distribution"].transpose("time", "radius"),
    dx=ds_match["bin_width"].expand_dims(time=ds_match.time).transpose("time", "radius"),
    name="cloud",
)


td_cloud_mean = TestData(
    x=np.log(ds_match["radius"]),
    y=ds_match["particle_size_distribution"].mean(dim="time"),
    dx=ds_match["bin_width"],
    name="cloud",
)

td_cloud_mean_norm = td_cloud_mean.normalize()

for td in (td_cloud, td_cloud_mean, td_cloud_mean_norm):
    td.x = td.x.values.flatten()
    td.y = td.y.values.flatten()
    td.dx = td.dx.values.flatten()

    args = np.isfinite(td.y) & np.isfinite(td.x)
    td.x = td.x[args]
    td.y = td.y[args]
    td.dx = td.dx[args]

# plt.xscale('log')
# fit the log nornmal distribution to the data of all three TestData objects

for td in (td_cloud, td_cloud_mean, td_cloud_mean_norm):
    td.fit_result = lm_mod.fit(data=td.y, x=td.x, **params)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 3))
ax.scatter(
    td_cloud.x,
    td_cloud.y,
    marker=".",
    # color = 'b',
)
ax.scatter(
    td_cloud_mean.x,
    td_cloud_mean.y,
    marker="o",
    # color = 'r',
)

# td_cloud.x = np.exp(td_cloud.x)

x = np.arange(-12, -6, 0.1)
ax.plot(x, td_cloud.fit_result.eval(x=x), color="r")
ax.plot(x, td_cloud_mean.fit_result.eval(x=x), color="b", linestyle="--")
ax1 = ax.twinx()
ax1.plot(x, td_cloud_mean_norm.fit_result.eval(x=x), color="g", linestyle=":")

psd_fit = td_cloud.fit_result.eval(x=np.log(RADIUS))
lwc_fit = 1000 * psd_fit * 4 / 3 * np.pi * RADIUS**3
# plt.xscale('log')

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 3), width_ratios=[1, 0.07])

ax = axs[0]
cax = axs[1]

psd = ds_match["particle_size_distribution"]
msd = psd * psd["radius"] ** 3
lwc = ds_match["liquid_water_content"]

pcm = ax.pcolormesh(
    psd.time,
    psd.radius,
    msd,
    cmap="Blues",
    shading="nearest",
)
ax.set_yscale("log")
ax.set_ylabel("Radius [µm]")

ax2 = ax.twinx()
ax2.plot(lwc.time, 1e3 * lwc, color="black", lw=2, linestyle=":")
ax2.axhline(1e3 * lwc.mean("time"), color="black", lw=2, linestyle="-")
ax2.fill_between(
    lwc.time,
    1e3 * (lwc.mean("time") - lwc.std("time")),
    1e3 * (lwc.mean("time") + lwc.std("time")),
    color="black",
    alpha=0.1,
)
ax2.axhline(1e3 * lwc_fit.sum(), color="red", lw=2)
ax2.set_ylabel("LWC [g/m³]")

fig.add_axes(ax2)

fig.colorbar(pcm, cax=cax, label="MSD [µm³/m³]")
fig.tight_layout()

In [None]:
psd_observations = dict()

for cloud_id in cloud_ids:
    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds_match = cloud_composite.sel(time=slice(start, end))

    psd = ds_match["particle_size_distribution"]
    psd = psd.expand_dims(dim=dict(cloud_id=[cloud_id]))

    psd_observations[str(cloud_id)] = psd

In [None]:
lwc_means = []
lwc_sems = []
lwc_observations = dict()

for cloud_id in cloud_ids:

    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds_match = cloud_composite.sel(time=slice(start, end))

    # Particle size distribution
    psd = ds_match["particle_size_distribution"]
    psd = psd.expand_dims(dim=dict(cloud_id=[cloud_id]))
    psd_observations[str(cloud_id)] = psd

    # Liquid water content
    lwc = ds_match["liquid_water_content_original"]
    lwc_mean, lwc_sem = mean_and_stderror_of_mean(
        data=lwc,
        dims=("time",),
    )
    lwc_mean = lwc_mean.compute()
    lwc_sem = lwc_sem.compute()

    lwc_mean = lwc_mean.expand_dims(dim=dict(cloud_id=[cloud_id]))
    lwc_sem = lwc_sem.expand_dims(dim=dict(cloud_id=[cloud_id]))
    lwc = lwc.expand_dims(dim=dict(cloud_id=[cloud_id]))

    lwc_means.append(lwc_mean)
    lwc_sems.append(lwc_sem)
    lwc_observations[str(cloud_id)] = lwc

lwc_mean = xr.concat(lwc_means, dim="cloud_id")
lwc_sem = xr.concat(lwc_sems, dim="cloud_id")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5), sharex=True)


psd = psd_observations[str(cloud_ids[0])].mean("cloud_id")
msd = psd * psd["radius"] ** 3
lwc = lwc_observations[str(cloud_ids[0])].mean("cloud_id")

ax.pcolormesh(
    psd.time,
    psd.radius,
    msd,
    cmap="Reds",
    shading="nearest",
)
ax.set_yscale("log")
ax.set_ylabel("Radius [µm]")

ax2 = ax.twinx()
ax2.plot(lwc.time, lwc, color="black", lw=2)
ax2.set_ylabel("LWC [g/m³]")

Text(0, 0.5, 'LWC [g/m³]')

In [None]:
def fit_particle_size_distribution(
    ds_cloudcomposite: xr.Dataset,
    particle_split_radius: float = 45,  # 45 micrometer
) -> transfer.PSD_LnNormal:
    """
    Fits the particle size distribution (PSD) of cloud and rain droplets
    idependently.

    Note
    ----
    The PSD is fitted with a bimodal Lognormal distribution.
    For the cloud droplets, the PSD is fitted with
    - geometric mean between 0.1 micrometer and the split radius.
    - geometric sigma between 0 and 1.7.
    For the rain droplets, the PSD is fitted with
    - geometric mean within the range of radius values provided.

    Parameters
    ----------
    ds_cloudcomposite : xr.Dataset
        Dataset containing the cloud composite data.
    particle_split_radius : float, optional
        The radius at which to split the data into cloud and rain droplets. Default is 45 micrometers.

    Returns
    -------
    psd_fit : transfer.PSD_LnNormal
        The fitted particle size distribution.
    """

    # Split data into cloud and rain
    ds_small_droplets = ds_cloudcomposite.sel(radius=slice(None, particle_split_radius))
    ds_rain_droplets = ds_cloudcomposite.sel(radius=slice(particle_split_radius, None))

    # ======================================
    # Fit the PSDs
    # ======================================

    # Use the PSD_LnNormal model
    psd_rain_fit = transfer.PSD_LnNormal()
    psd_cloud_fit = transfer.PSD_LnNormal()

    # ---------
    # Rain
    # ---------
    data = ds_rain_droplets
    radi2d = shape_dim_as_dataarray(da=data, output_dim="radius")
    psd_model = psd_rain_fit.get_model()

    # update geometric mean to be within range of the data
    psd_rain_fit.update_individual_model_parameters(
        lmfit.Parameter(
            name="geometric_means",
            min=data["radius"].min().data,
            max=data["radius"].max().data,
        )
    )

    # fit model parameters and update them
    model_result = psd_model.fit(
        data=data.data, radii=radi2d.data, params=psd_rain_fit.get_model_parameters(), nan_policy="omit"
    )
    psd_rain_fit.lmfitParameterValues_to_dict(model_result.params)

    # # ---------
    # # Small cloud and drizzle
    # # ---------
    # # For this, the parameters need to be updated

    # # update geometric mean to be within range of 0.1 micrometer and the split radius
    # psd_cloud_fit.update_individual_model_parameters(
    #     lmfit.Parameter(
    #         name="geometric_means",
    #         value=1e-5 * 1e6,
    #         min=0.1e-6 * 1e6,  # at least 0.1 micrometer
    #         max=particle_split_radius,  # at most the split radius (default 45 micrometer)
    #     )
    # )
    # # update geometric sigma to be within range of 0 and 1.7.
    # # NOTE: No real physical meaning, but it is a good range for the fit
    # psd_cloud_fit.update_individual_model_parameters(
    #     lmfit.Parameter(
    #         name="geometric_sigmas",
    #         value=1.1,
    #         min=0,
    #         max=1.7,
    #     )
    # )

    # data = ds_small_droplets
    # radi2d = shape_dim_as_dataarray(da=data, output_dim="radius")
    # psd_model = psd_cloud_fit.get_model()
    # # fit model parameters and update them
    # model_result = psd_model.fit(
    #     data=data.data, radii=radi2d.data, params=psd_cloud_fit.get_model_parameters(), nan_policy="omit"
    # )
    # psd_cloud_fit.lmfitParameterValues_to_dict(model_result.params)

    # # --------
    # # Combine the fits
    # # --------

    psd_fit = psd_rain_fit  # + psd_cloud_fit

    return psd_fit

In [None]:
r = np.logspace(np.log10(psd["radius"].min()), np.log10(psd["radius"].max()), 100)
radius_equal100 = xr.DataArray(
    r,
    dims=["radius"],
    coords=dict(
        radius=r,
    ),
)

r = np.logspace(-1, 10, 100)
radius_equal100_min = xr.DataArray(
    r,
    dims=["radius"],
    coords=dict(
        radius=r,
    ),
)

r = np.logspace(np.log10(psd["radius"].min()), np.log10(psd["radius"].max()), 30)
radius_equal30 = xr.DataArray(
    r,
    dims=["radius"],
    coords=dict(
        radius=r,
    ),
)

In [None]:
psd_fit_funcs = dict()
psd_fits = dict()
psd_fits2 = dict()
lwc_fits = dict()
lwc_fits2 = dict()

for cloud_id in cloud_ids:
    cloud_id_str = str(cloud_id)
    print(cloud_id_str)
    psd = psd_observations[cloud_id_str]
    # psd = psd.where(psd != 0, drop=False)
    psd = psd.mean("cloud_id")
    psd_fit_func = fit_particle_size_distribution(
        ds_cloudcomposite=psd,
    )
    psd_fit_funcs[cloud_id_str] = psd_fit_func

    psd_fits[cloud_id_str] = psd_fit_func.eval_func(psd["radius"])
    psd_fits2[cloud_id_str] = psd_fit_func.eval_func(radius_equal30)


for cloud_id in cloud_ids:
    cloud_id_str = str(cloud_id)

    psd_fit = psd_fits[cloud_id_str]
    lwc_fit = 1e3 * lwc_from_psd(xr.Dataset(data_vars=dict(particle_size_distribution=psd_fit)))
    lwc_fits[cloud_id_str] = lwc_fit

    psd_fit = psd_fits2[cloud_id_str]
    lwc_fit = 1e3 * lwc_from_psd(xr.Dataset(data_vars=dict(particle_size_distribution=psd_fit)))
    lwc_fits2[cloud_id_str] = lwc_fit

273
355
221
118


In [None]:
fig, axs = plt.subplots(
    figsize=(6, 6),
    sharex=True,
    sharey=True,
    **ncols_nrows_from_N(len(cloud_ids)),
)

for idx, cloud_id in enumerate(cloud_ids):
    cloud_id_str = str(cloud_id)

    psd_obs = psd_observations[cloud_id_str].mean("cloud_id")
    lwc_obs = lwc_observations[cloud_id_str].mean("time")

    psd_fit = psd_fits[cloud_id_str]
    lwc_fit = lwc_fits[cloud_id_str]

    psd_fit2 = psd_fits2[cloud_id_str]
    lwc_fit2 = lwc_fits2[cloud_id_str]

    ax = axs.flatten()[idx]

    ax.plot(
        psd_fit["radius"],
        psd_fit,
        alpha=0.5,
    )

    ax.plot(
        psd_fit2["radius"],
        psd_fit2,
        alpha=0.5,
    )

    ax.plot(
        psd_obs["radius"],
        psd_obs,
        marker=".",
        color="grey",
        alpha=0.1,
        linestyle="None",
    )

    # ax.plot(
    #     sel_cleo_psd_mean["radius_bins"],
    #     sel_cleo_psd_mean,
    #     marker = "x",
    #     linestyle = "-",
    #     color = "r",
    #     label = f"CLEO stationary"
    # )

    ax.legend(loc="upper left")
    ax.set_title(f"Cloud ID: {cloud_id}")


ax = axs.flatten()[0]
ax.set_xscale("log")
ax.set_yscale("symlog", linthresh=1e0, linscale=1)
ax.set_ylim(0, 1e8)

for ax in axs[-1, :]:
    ax.set_xlabel("Radius [µm]")

for ax in axs[:, 0]:
    ax.set_ylabel("Number concentration $[m^{-3} (log(\mu m))^{-1}]$")

add_subplotlabel(axs, location="title")

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


In [None]:
fig, axs = plt.subplots(ncols=1, nrows=3, figsize=(8, 8), sharex=True, sharey=True)


psd = psd_observations[str(cloud_ids[0])].mean("cloud_id")
lwc = lwc_observations[str(cloud_ids[0])].mean("cloud_id")

psd_fit = psd_fits[str(cloud_ids[0])].expand_dims(time=psd.time)
lwc_fit = lwc_fits[str(cloud_ids[0])].expand_dims(time=psd.time)

psd_fit2 = psd_fits2[str(cloud_ids[0])].expand_dims(time=psd.time)
lwc_fit2 = lwc_fits2[str(cloud_ids[0])].expand_dims(time=psd.time)

axpsd = axs[0]
axlwc = axpsd.twinx()

axpsd.pcolormesh(
    psd.time,
    psd.radius,
    psd,
    cmap="Reds",
    shading="nearest",
)
axpsd.set_yscale("log")
axpsd.set_ylabel("Radius [µm]")

axlwc.plot(lwc.time, lwc, color="black", lw=2)
axlwc.set_ylabel("LWC [g/m³]")


axpsd = axs[1]
axlwc = axpsd.twinx()

axpsd.pcolormesh(
    psd_fit.time,
    psd_fit.radius,
    psd_fit.T,
    cmap="Reds",
    shading="nearest",
)
axpsd.set_yscale("log")
axpsd.set_ylabel("Radius [µm]")

axlwc.plot(lwc_fit.time, lwc_fit, color="black", lw=2)
axlwc.set_ylabel("LWC [g/m³]")

axpsd = axs[2]
axlwc = axpsd.twinx()

axpsd.pcolormesh(
    psd_fit2.time,
    psd_fit2.radius,
    psd_fit2.T,
    cmap="Reds",
    shading="nearest",
)
axpsd.set_yscale("log")
axpsd.set_ylabel("Radius [µm]")

axlwc.plot(lwc_fit2.time, lwc_fit2, color="black", lw=2)
axlwc.set_ylabel("LWC [g/m³]")

for ax in axs.flatten():
    plt.colorbar(cax=ax)

In [None]:
cloud_composite["radius"].plot(marker=".", linestyle="None")
radius_equal30.plot(marker=".", linestyle="None")
plt.xscale("log")