In [None]:
import warnings
import numpy as np
from scipy.stats import norm
import xarray as xr
import matplotlib.pyplot as plt
import lmfit
from typing import Union, List, Tuple

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
)
from sdm_eurec4a.identifications import select_individual_cloud_by_id, match_clouds_and_cloudcomposite

from sdm_eurec4a.reductions import mean_and_stderror_of_mean

warnings.filterwarnings("ignore")

default_colors = set_custom_rcParams()

In [None]:
import random


def random_integers_sum_to_n(parts, n):
    """
    Divide an integer n into a given number of diverse/random non-zero integers.

    Parameters:
    n (int): The integer to be divided.
    parts (int): The number of parts to divide the integer into.

    Returns:
    List[int]: A list of integers that sum up to n.
    """
    if parts > n:
        raise ValueError("Number of parts cannot be greater than the integer itself.")

    # Generate random break points
    break_points = sorted(random.sample(range(1, n), parts - 1))
    print(break_points)

    # Create the parts by calculating the differences between break points
    result = [b - a for a, b in zip([0] + break_points, break_points + [n])]

    return result

In [None]:
def ln_normal_distribution(
    x: np.ndarray, scale_factor: float, geometric_mean: float, geometric_sigma: float
) -> np.ndarray:
    sigtilda = np.log(geometric_sigma)
    mutilda = np.log(geometric_mean)

    norm = scale_factor / (np.sqrt(2 * np.pi) * sigtilda)
    exponent = -((np.log(x) - mutilda) ** 2) / (2 * sigtilda**2)

    dn_dlnr = norm * np.exp(exponent)  # eq.5.8 [lohmann intro 2 clouds]

    return dn_dlnr


def normal_distribution(x, mu, sigma, scale_factor):
    return scale_factor * 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))


def diff_same_size(x: np.ndarray):
    """
    This function gives you the width between x values.
    """

    # Calculate differences between consecutive x values
    diffs = np.diff(x)

    # Initialize dx array with zeros
    dx = np.zeros_like(x)

    # For each x value (except the first and last), calculate the average of the differences with its neighbors
    dx[1:-1] = (diffs[:-1] + diffs[1:]) / 2

    # For the first and last x values, use linear interpolation
    dx[0] = diffs[0]
    dx[-1] = diffs[-1]

    return dx

# Create minimum problem example

In [None]:
class TestData:
    def __init__(
        self,
        x: Union[np.ndarray, List, Tuple],
        y: Union[np.ndarray, List, Tuple],
        dx: Union[np.ndarray, List, Tuple],
        name: str = "",
    ):
        self.x = x
        self.y = y
        self.dx = dx

        self.y_normalized = self.y / self.dx
        self.name = name

    def __getitem__(self, key):
        return self.__dict__[key]

    def __setitem__(self, key, value):
        self.__dict__[key] = value

    def resample(self, width: np.ndarray) -> "TestData":
        """
        Resample the data in non uniform intervals given by width array
        The width array elements need to sum up to the length of the data array

        Parameters:
        -----------
        width: array
            array with the width of the intervals

        Returns:
        --------
        TestData object with resampled data
        """

        assert np.sum(width) == len(self.x)

        end = np.cumsum(width)
        start = end - width

        x = []
        y = []
        dx = []
        for i, (s, e) in enumerate(zip(start, end)):

            x.append(np.mean(self.x[s:e]))
            y.append(np.sum(self.y[s:e]))
            dx.append(np.sum(self.dx[s:e]))

        return TestData(np.array(x), np.array(y), np.array(dx))

    def normalize(self) -> "TestData":
        """
        Normalize the data by dividing the y values by the dx values
        """
        return TestData(
            x=self.x,
            y=self.y / self.dx,
            dx=self.dx,
        )

    def __str__(self) -> str:
        return f"{self.name}\nx: {self.x},\ny: {self.y},\ndx: {self.dx}"

    def plot_bar(self, ax=None, normalized=False, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()

        x = self.x
        dx = self.dx

        if normalized:
            y = self.y_normalized
        else:
            y = self.y

        ax.bar(x=x, height=y, width=dx, **kwargs)
        # ax.scatter(
        #     x,
        #     y,
        #     marker = 'x',
        #     color = kwargs.get('edgecolor', 'black'),
        #     )
        return ax

    def plot_scatter(self, ax=None, normalized=False, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()

        x = self.x
        dx = self.dx

        if normalized:
            y = self.y_normalized
        else:
            y = self.y

        ax.scatter(
            x=x,
            y=y,
            # marker = 'o',
            **kwargs,
        )
        # ax.scatter(
        #     x,
        #     y,
        #     marker = 'x',
        #     color = kwargs.get('edgecolor', 'black'),
        #     )
        return ax

    @property
    def fit_result(self):
        return self._fit_result

    @fit_result.setter
    def fit_result(self, fit_result):
        self._fit_result = fit_result

    @fit_result.getter
    def fit_result(self):
        return self._fit_result


# set up normal distribution with observations in equal intervals
x_equal = np.arange(-6, 7, 1, dtype=float)
dx_equal = np.full_like(a=x_equal, fill_value=x_equal[1] - x_equal[0])
y_equal = norm.pdf(x_equal)

td1 = TestData(x_equal, y_equal, dx_equal, name="equal")

# resample data in non uniform intervals
width = np.array((2, 1, 1, 1, 1, 3, 1, 2, 1))
td2 = td1.resample(width)
td2.name = "uneq.1"

# resample data in non uniform intervals
width = np.array((2, 1, 3, 3, 1, 2, 1))
td3 = td1.resample(width)
td3.name = "uneq.2"


td1_normalized = td1.normalize()
td2_normalized = td2.normalize()
td3_normalized = td3.normalize()

for td in (td1_normalized, td2_normalized, td3_normalized):
    print(td.dx)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[2. 1. 1. 1. 1. 3. 1. 2. 1.]
[2. 1. 3. 3. 1. 2. 1.]


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 3), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

style = dict(
    # color = 'None',
    alpha=0.8,
    linewidth=2,
)

markers = ["o", "x", "s"]
for i, td in enumerate([td1, td2, td3]):
    td.plot_scatter(
        ax=ax,
        normalized=False,
        label=td.name,
        marker=markers[i],
        # edgecolor = default_colors[i],
        color=default_colors[i],
        **style,
    )

for i, td in enumerate([td1_normalized, td2_normalized, td3_normalized]):
    td.plot_bar(
        ax=ax_norm, normalized=True, label=td.name, edgecolor=default_colors[i], color="None", **style
    )

for _ax in axs:
    _ax.set_xlabel("x")
    _ax.legend()

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

# ax.set_xscale('log')

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


Text(0, 0.5, 'probability')

### Describe the problem

In [None]:
def normal_distribution(x, mu, sigma, scale_factor):
    return scale_factor * 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))


lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))

params = lmfit.Parameters()
params.add("scale_factor", value=1)
params.add("mu", value=2)
params.add("sigma", value=2)

# fit the log nornmal distribution to the data of all three TestData objects
for td in (td1, td2, td3, td1_normalized, td2_normalized, td3_normalized):
    td.fit_result = lm_mod.fit(data=td.y, x=td.x, **params)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 3.5), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

x = np.arange(-6, 6, 0.1)
for td in (td1, td2, td3):
    # make sure to use the same color for data and fit
    lines = ax.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    ax.plot(x, td.fit_result.eval(x=x), color=color)

for td in (td1_normalized, td2_normalized, td3_normalized):
    # make sure to use the same color for data and fit
    lines = ax_norm.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    ax_norm.plot(x, td.fit_result.eval(x=x), color=color)

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

Text(0, 0.5, 'probability')

### How to maintain the Integral over the quantitiy with different x spacings

The sum of the data is equal in both cases.

In [None]:
assert td2.y.sum() == td1.y.sum() == td3.y.sum()
for td in (td1, td2, td3):
    print(f"{td.name} {td.y.sum()/ td1.y.sum()}")

equal 1.0
uneq.1 1.0
uneq.2 1.0


But the FITS using different x spacings does not give the same sum over the values.

This needs to be solved.
Ask Clara, how she did this.

In [None]:
top = "\t"
for td_x in (td1, td2, td3):
    top += f"{td_x.name}\t"
print(top)

result = np.zeros((3, 3))

for i, td_x in enumerate((td1, td2, td3)):
    for j, td_y in enumerate((td1, td2, td3)):
        result[i, j] = np.sum(td_x.fit_result.eval(x=td_y.x))

    print(f"{td_x.name}\t{np.round(result[i, :], 4)}")

	equal	uneq.1	uneq.2	
equal	[1.     0.5469 0.3005]
uneq.1	[1.9312 0.9871 0.7387]
uneq.2	[3.9657 2.249  1.0003]


### LogNormal Case


In [None]:
np.random.seed(42)

# set up normal distribution with observations in equal intervals
x_equal = np.arange(-4, 6, 0.5, dtype=float)
x_equal = np.arange(-4, 6, 0.25, dtype=float)
x_equal = np.exp(x_equal)

dx_equal = diff_same_size(x_equal)
y_equal = ln_normal_distribution(x_equal, scale_factor=1, geometric_mean=1, geometric_sigma=2)

td1_ln = TestData(x_equal, y_equal, dx_equal, name="equal")

# resample data in non uniform intervals

N = len(x_equal)

random.seed(42)
width = random_integers_sum_to_n(14, N)
td2_ln = td1_ln.resample(width)
td2_ln.name = "uneq.1"

# resample data in non uniform intervals

width = random_integers_sum_to_n(7, N)
td3_ln = td1_ln.resample(width)
td3_ln.name = "uneq.2"


td1_ln_normalized = td1_ln.normalize()
td2_ln_normalized = td2_ln.normalize()
td3_ln_normalized = td3_ln.normalize()

[1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 18, 19, 38]
[2, 13, 14, 15, 27, 33]


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(6, 3), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

style = dict(
    # color = 'None',
    alpha=0.8,
    linewidth=2,
)

markers = ["o", "x", "s"]
for i, td in enumerate([td1_ln, td2_ln, td3_ln]):
    td.plot_scatter(
        ax=ax,
        normalized=False,
        label=rf"{td.name} $\sum$ {td.y.sum():.2f}",
        # edgecolor = default_colors[i],
        marker=markers[i],
        color=default_colors[i],
        **style,
    )

for i, td in enumerate([td1_ln_normalized, td2_ln_normalized, td3_ln_normalized]):
    td.plot_scatter(
        ax=ax_norm,
        # normalized = True,
        label=rf"{td.name} $\sum$ {td.y.sum():.2f}",
        marker=markers[i],
        color=default_colors[i],
        # color = "None",
        **style,
    )

for _ax in axs.flatten():
    _ax.set_xlabel("x")
    _ax.legend(loc="upper left")

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

for _ax in axs:
    _ax.set_xscale("log")

In [None]:
lm_mod = lmfit.Model(ln_normal_distribution, independent_vars=("x",))

params = lmfit.Parameters()
params.add("scale_factor", value=1, min=0)
params.add("geometric_mean", value=3, min=0)
params.add("geometric_sigma", value=2, min=0)

# fit the log nornmal distribution to the data of all three TestData objects
for td in (
    td1_ln,
    td2_ln,
    td3_ln,
    td1_ln_normalized,
    td2_ln_normalized,
    td3_ln_normalized,
):
    try:
        td.fit_result = lm_mod.fit(
            data=td.y,
            x=td.x,
            # nan_policy='omit',
            **params,
        )
    except Exception as e:
        print(e)
        continue

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 3.5), sharex=True, sharey=True)
ax = axs[0]
ax_norm = axs[1]

x = np.arange(-6, 6, 0.01)
x = np.exp(x)
for td in (td1_ln, td2_ln, td3_ln):
    # make sure to use the same color for data and fit
    lines = ax.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    y = td.fit_result.eval(x=x)
    ax.plot(x, y, color=color, label=rf"{td.name} $\sum$ {y.sum():.2f}")

for td in (td1_ln_normalized, td2_ln_normalized, td3_ln_normalized):
    # make sure to use the same color for data and fit
    lines = ax_norm.plot(td.x, td.y, "o")
    color = lines[0].get_color()
    y = td.fit_result.eval(x=x)
    ax_norm.plot(x, y, color=color, label=rf"{td.name} $\sum$ {y.sum():.2f}")

ax.set_title("normal distribution")
ax.set_ylabel("counts")

ax_norm.set_title("normal distribution\nnormalized by bin width")
ax_norm.set_ylabel("probability")

for _ax in axs:
    _ax.set_xscale("log")
    _ax.legend()

In [None]:
fig, ax = plt.subplots()

x = np.exp(np.arange(-6, 6, 0.1))
dx = diff_same_size(x)

td = td1_ln
ax.plot(td.x, td.y, "o", color="black", label=rf"original $\sum$ {td.y.sum():.2f}")


for i, td_norm in enumerate((td1_ln_normalized, td2_ln_normalized, td3_ln_normalized)):

    fit = td_norm.fit_result
    y_ln = fit.eval(x=td.x) * td.dx
    y = fit.eval(x=x) * dx

    ax.plot(td.x, y_ln, ".", color=default_colors[i], label=rf"Obs resolved: $\sum$ {y_ln.sum():.2f}")
    ax.plot(x, y, "x", color=default_colors[i], label=rf"High resolved: $\sum$ {y.sum():.2f}")


ax.set_xscale("log")
ax.legend()

<matplotlib.legend.Legend at 0x7fffa03d0980>

# ATR Observations

In [None]:
cloud_composite = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/cloud_composite_si_units.nc"
)
identified_clouds = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)

attrs = cloud_composite["radius"].attrs.copy()
attrs.update({"units": "µm"})
cloud_composite["radius"] = cloud_composite["radius"]
cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
cloud_composite["radius"].attrs = attrs

# cloud_composite = cloud_composite.sel(radius = slice(10, None))

identified_clouds = identified_clouds.where(
    (
        (identified_clouds.duration.dt.total_seconds() > 50)
        & (identified_clouds.alt < 1300)
        & (identified_clouds.alt > 500)
    ),
    drop=True,
)

cloud_composite = match_clouds_and_cloudcomposite(identified_clouds, cloud_composite)

cloud_composite

In [None]:
coarse_composite = cloud_composite.coarsen(radius=2).sum()
coarse_composite["diameter"] = 2 * coarse_composite["radius"]
coarse_composite.flight_number.plot(marker="o")
coarse_composite

In [None]:
florian = np.array(
    [
        [
            2.500000000000000000e00,
            7.524066670930063765e02,
            5.906341594825936454e02,
            1.531000000000000000e03,
        ],
        [
            3.500000000000000000e00,
            6.857373804622360467e02,
            8.701401747232238222e02,
            1.531000000000000000e03,
        ],
        [
            4.500000000000000000e00,
            2.627194547357931697e03,
            1.149868594710214893e03,
            1.531000000000000000e03,
        ],
        [
            5.500000000000000000e00,
            3.290152446278716525e03,
            1.525809755274765166e03,
            1.531000000000000000e03,
        ],
        [
            6.500000000000000000e00,
            3.358894082843109572e03,
            1.589932407313667227e03,
            1.531000000000000000e03,
        ],
        [
            7.500000000000000000e00,
            4.255396832052146237e03,
            1.514886451437911546e03,
            1.531000000000000000e03,
        ],
        [
            8.500000000000000000e00,
            1.283227208673806444e03,
            1.441732176425029593e03,
            1.531000000000000000e03,
        ],
        [
            9.500000000000000000e00,
            1.927922233153351044e03,
            1.569223026216929611e03,
            1.531000000000000000e03,
        ],
        [
            1.050000000000000000e01,
            2.608506665566600532e03,
            1.276829344293244048e03,
            1.531000000000000000e03,
        ],
        [
            1.150000000000000000e01,
            4.546843793570848902e03,
            1.445092006516387301e03,
            1.531000000000000000e03,
        ],
        [
            1.250000000000000000e01,
            1.530895501746246964e03,
            1.300356932588947529e03,
            1.531000000000000000e03,
        ],
        [
            1.350000000000000000e01,
            2.851085460780006542e03,
            1.030365054478607817e03,
            1.531000000000000000e03,
        ],
        [
            1.500000000000000000e01,
            5.792921982535129928e02,
            5.453403131388952261e02,
            1.531000000000000000e03,
        ],
        [
            1.700000000000000000e01,
            9.274358120761885971e01,
            3.830263568205626257e02,
            1.531000000000000000e03,
        ],
        [
            1.900000000000000000e01,
            2.742140278123054742e01,
            7.827446220615381378e01,
            1.531000000000000000e03,
        ],
        [
            2.100000000000000000e01,
            1.846682337305457011e01,
            1.767619030254932255e01,
            1.531000000000000000e03,
        ],
        [
            2.300000000000000000e01,
            1.143127664169310798e01,
            8.449042853675132747e00,
            1.531000000000000000e03,
        ],
        [
            2.500000000000000000e01,
            1.464226693693925263e01,
            6.260350983966168847e00,
            1.531000000000000000e03,
        ],
        [
            2.700000000000000000e01,
            6.765747028740180191e00,
            4.735553424062196370e00,
            1.531000000000000000e03,
        ],
        [
            2.900000000000000000e01,
            7.027344088101386888e00,
            3.680998814714635969e00,
            1.531000000000000000e03,
        ],
        [
            3.100000000000000000e01,
            4.258879640912515363e00,
            2.283029862306820590e00,
            1.531000000000000000e03,
        ],
        [
            3.300000000000000000e01,
            1.605019736465129521e00,
            1.729194253924844382e00,
            1.531000000000000000e03,
        ],
        [
            3.500000000000000000e01,
            1.616102193901045814e00,
            1.096718855793284986e00,
            1.531000000000000000e03,
        ],
        [
            3.700000000000000000e01,
            1.605150391083352668e00,
            7.279914011009456232e-01,
            1.531000000000000000e03,
        ],
        [
            3.900000000000000000e01,
            1.133988074872151364e00,
            6.327763643932864390e-01,
            1.531000000000000000e03,
        ],
        [
            4.100000000000000000e01,
            8.231062198856311518e-01,
            4.645567842055864372e-01,
            1.531000000000000000e03,
        ],
        [
            4.300000000000000000e01,
            7.178135134426733011e-01,
            2.946604277402520844e-01,
            1.531000000000000000e03,
        ],
        [
            5.000000000000000000e01,
            4.864537681254078177e-02,
            3.408079206320848483e-03,
            1.531000000000000000e03,
        ],
        [
            6.000000000000000000e01,
            2.206950452459015277e-02,
            1.999678338512320564e-03,
            1.525000000000000000e03,
        ],
        [
            7.000000000000000000e01,
            7.571136327868855341e-03,
            5.544005079348928540e-04,
            1.525000000000000000e03,
        ],
        [
            8.000000000000000000e01,
            3.234320786885245061e-03,
            3.179050881822192303e-04,
            1.525000000000000000e03,
        ],
        [
            9.000000000000000000e01,
            2.037113573770491740e-03,
            1.805080958701229437e-04,
            1.525000000000000000e03,
        ],
        [
            1.000000000000000000e02,
            1.243896065573771000e-03,
            1.064224503841847998e-04,
            1.525000000000000000e03,
        ],
        [
            1.050000000000000000e02,
            8.729019672131145416e-04,
            9.198125694458117072e-05,
            1.525000000000000000e03,
        ],
        [
            1.250000000000000000e02,
            1.032246930111039088e-03,
            7.773271868490957861e-05,
            1.531000000000000000e03,
        ],
        [
            1.450000000000000000e02,
            1.104107086871325676e-03,
            8.191704848117048931e-05,
            1.531000000000000000e03,
        ],
        [
            1.650000000000000000e02,
            1.019126472893534200e-03,
            8.027160434019853512e-05,
            1.531000000000000000e03,
        ],
        [
            1.850000000000000000e02,
            9.330923905943825248e-04,
            8.079750434155904062e-05,
            1.531000000000000000e03,
        ],
        [
            2.050000000000000000e02,
            7.557348465055518374e-04,
            7.942522095522638447e-05,
            1.531000000000000000e03,
        ],
        [
            2.250000000000000000e02,
            1.027861724363161606e-03,
            7.816941077823051219e-05,
            1.531000000000000000e03,
        ],
        [
            2.450000000000000000e02,
            9.775259960809927831e-04,
            8.085899304524133790e-05,
            1.531000000000000000e03,
        ],
        [
            2.650000000000000000e02,
            1.025924810581318970e-03,
            8.365862960857671808e-05,
            1.531000000000000000e03,
        ],
        [
            2.850000000000000000e02,
            1.282061293272371586e-03,
            8.446255111053474962e-05,
            1.531000000000000000e03,
        ],
        [
            3.050000000000000000e02,
            1.224197142390594760e-03,
            8.383709673543741328e-05,
            1.531000000000000000e03,
        ],
        [
            3.250000000000000000e02,
            1.172493902677988059e-03,
            8.514027892888675723e-05,
            1.531000000000000000e03,
        ],
        [
            3.450000000000000000e02,
            1.099576120182886108e-03,
            8.370230674004668526e-05,
            1.531000000000000000e03,
        ],
        [
            3.650000000000000000e02,
            1.140493200522534143e-03,
            8.198054179003846280e-05,
            1.531000000000000000e03,
        ],
        [
            3.850000000000000000e02,
            1.166850146962769737e-03,
            8.100113604989519188e-05,
            1.531000000000000000e03,
        ],
        [
            4.050000000000000000e02,
            1.018122811887654973e-03,
            7.840818101802763318e-05,
            1.531000000000000000e03,
        ],
        [
            4.250000000000000000e02,
            1.023494353363813914e-03,
            7.828361940970533957e-05,
            1.531000000000000000e03,
        ],
        [
            4.450000000000000000e02,
            1.054013082952318971e-03,
            7.634275532033203183e-05,
            1.531000000000000000e03,
        ],
        [
            4.650000000000000000e02,
            1.123284532984977118e-03,
            7.432635471483093280e-05,
            1.531000000000000000e03,
        ],
        [
            4.850000000000000000e02,
            9.460474853037236301e-04,
            7.383691279861058488e-05,
            1.531000000000000000e03,
        ],
        [
            5.050000000000000000e02,
            8.762673938602223407e-04,
            7.107222383806529752e-05,
            1.531000000000000000e03,
        ],
        [
            5.250000000000000000e02,
            8.777944774657086722e-04,
            6.653382455394837565e-05,
            1.531000000000000000e03,
        ],
        [
            5.450000000000000000e02,
            7.693160711952968687e-04,
            6.393225266721248054e-05,
            1.531000000000000000e03,
        ],
        [
            5.650000000000000000e02,
            6.820422175048985319e-04,
            6.100893465624011516e-05,
            1.531000000000000000e03,
        ],
        [
            5.850000000000000000e02,
            6.749431645983023330e-04,
            5.766603470807408143e-05,
            1.531000000000000000e03,
        ],
        [
            6.050000000000000000e02,
            5.768249673416066432e-04,
            5.543452724547827263e-05,
            1.531000000000000000e03,
        ],
        [
            6.250000000000000000e02,
            5.297933474853036345e-04,
            5.212099442593621108e-05,
            1.531000000000000000e03,
        ],
        [
            6.450000000000000000e02,
            4.813405551926848718e-04,
            4.841786249201610813e-05,
            1.531000000000000000e03,
        ],
        [
            6.650000000000000000e02,
            3.575885499673416793e-04,
            4.444656600698994217e-05,
            1.531000000000000000e03,
        ],
        [
            6.850000000000000000e02,
            3.484391051600261701e-04,
            4.179529697064478022e-05,
            1.531000000000000000e03,
        ],
        [
            7.050000000000000000e02,
            2.590791835401697204e-04,
            3.862927446550245533e-05,
            1.531000000000000000e03,
        ],
        [
            7.250000000000000000e02,
            3.041522109732201469e-04,
            3.774906544641808804e-05,
            1.531000000000000000e03,
        ],
        [
            7.450000000000000000e02,
            2.347581645983017626e-04,
            3.394372746186007507e-05,
            1.531000000000000000e03,
        ],
        [
            7.650000000000000000e02,
            2.144754506858262736e-04,
            3.286522687163034645e-05,
            1.531000000000000000e03,
        ],
        [
            7.850000000000000000e02,
            1.731584421946439636e-04,
            3.095363380467460986e-05,
            1.531000000000000000e03,
        ],
        [
            8.050000000000000000e02,
            2.088162736773350678e-04,
            3.011834524234931524e-05,
            1.531000000000000000e03,
        ],
        [
            8.250000000000000000e02,
            1.773135924232528303e-04,
            2.922099654397332651e-05,
            1.531000000000000000e03,
        ],
        [
            8.450000000000000000e02,
            1.692328772044415265e-04,
            2.863288803439338192e-05,
            1.531000000000000000e03,
        ],
        [
            8.650000000000000000e02,
            1.940651828870019221e-04,
            2.783432073937176657e-05,
            1.531000000000000000e03,
        ],
        [
            8.850000000000000000e02,
            1.219478935336381422e-04,
            2.682265099641666691e-05,
            1.531000000000000000e03,
        ],
        [
            9.050000000000000000e02,
            1.321090039190072002e-04,
            2.593559980608856392e-05,
            1.531000000000000000e03,
        ],
        [
            9.250000000000000000e02,
            1.623337883736119907e-04,
            2.443665838086170141e-05,
            1.531000000000000000e03,
        ],
        [
            9.450000000000000000e02,
            1.232049738732854491e-04,
            2.477899170400399358e-05,
            1.531000000000000000e03,
        ],
        [
            9.650000000000000000e02,
            1.200955845852384198e-04,
            2.393052911041335353e-05,
            1.531000000000000000e03,
        ],
        [
            9.850000000000000000e02,
            1.097478412802090211e-04,
            2.098336928769877063e-05,
            1.531000000000000000e03,
        ],
        [
            1.100000000000000000e03,
            7.714765512736778547e-05,
            5.659686539392513139e-06,
            1.531000000000000000e03,
        ],
        [
            1.300000000000000000e03,
            4.334716786414110224e-05,
            4.785859959119767400e-06,
            1.531000000000000000e03,
        ],
        [
            1.500000000000000000e03,
            2.690076126714566193e-05,
            3.758751331319620334e-06,
            1.531000000000000000e03,
        ],
        [
            1.700000000000000000e03,
            1.260058654474199705e-05,
            2.810583978553231883e-06,
            1.531000000000000000e03,
        ],
        [
            1.900000000000000000e03,
            6.390691051600261234e-06,
            2.073772743722045472e-06,
            1.531000000000000000e03,
        ],
        [
            2.100000000000000000e03,
            3.791211952971914786e-06,
            1.457369773372318788e-06,
            1.531000000000000000e03,
        ],
        [
            2.300000000000000000e03,
            9.638011103853690653e-07,
            1.077763339286543808e-06,
            1.531000000000000000e03,
        ],
        [
            2.500000000000000000e03,
            1.267031165438089227e-06,
            8.371180926081535443e-07,
            1.531000000000000000e03,
        ],
    ],
)

# also use radius in m
florian[:, 0] = 1e-6 * florian[:, 0] / 2

In [None]:
np.diff(florian[:, 0])

array([5.00e-07, 5.00e-07, 5.00e-07, 5.00e-07, 5.00e-07, 5.00e-07,
       5.00e-07, 5.00e-07, 5.00e-07, 5.00e-07, 5.00e-07, 7.50e-07,
       1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06,
       1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06, 1.00e-06,
       1.00e-06, 1.00e-06, 3.50e-06, 5.00e-06, 5.00e-06, 5.00e-06,
       5.00e-06, 5.00e-06, 2.50e-06, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05,
       1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 1.00e-05, 5.75e-05,
       1.00e-04, 1.00e-04, 1.00e-04, 1.00e-04, 1.00e-04, 1.00e-04,
       1.00e-04])

In [None]:
for i, ds in enumerate([cloud_composite, coarse_composite]):
    x = np.full(ds.radius.shape, i / 2)
    plt.scatter(ds.radius[1:], np.diff(ds.radius), marker="x")

plt.scatter(
    florian[:, 0][1:],
    np.diff(florian[:, 0]),
)

plt.xscale("log")
plt.yscale("log")

In [None]:
lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))
params_rain = lmfit.Parameters()
params_rain.add("mu", value=np.log(300e-6), min=np.log(80e-6), max=np.log(2e-3))
params_rain.add("scale_factor", value=1, min=0)
params_rain.add("sigma", value=0.5, max=1)

params_cloud = lmfit.Parameters()
params_cloud.add("mu", value=np.log(0.1e-6), min=np.log(10e-6), max=np.log(50e-6))
params_cloud.add("scale_factor", value=1e5, min=0)
params_cloud.add("sigma", value=0.5, max=1)

RADIUS = coarse_composite["radius"]


# for cloud_id in identified_clouds.cloud_id:
def fit_both(cloud_id):
    da = select_individual_cloud_by_id(identified_clouds, cloud_id)
    start = da["start"].values[0]
    end = da["end"].values[0]
    ds = coarse_composite.sel(time=slice(start, end))

    ds["particle_size_distribution"] = ds["particle_size_distribution"] / ds["bin_width"]

    ds_rain = ds.sel(radius=slice(50e-6, None))
    ds_cloud = ds.sel(radius=slice(None, 50e-6))

    td_cloud = TestData(
        x=np.log(ds_cloud["radius"].expand_dims(time=ds.time).transpose("time", "radius")),
        y=ds_cloud["particle_size_distribution"].transpose("time", "radius"),
        dx=ds_cloud["bin_width"].expand_dims(time=ds.time).transpose("time", "radius"),
        name="cloud",
    )
    td_rain = TestData(
        x=np.log(ds_rain["radius"].expand_dims(time=ds.time).transpose("time", "radius")),
        y=ds_rain["particle_size_distribution"].transpose("time", "radius"),
        dx=ds_rain["bin_width"].expand_dims(time=ds.time).transpose("time", "radius"),
        name="cloud",
    )

    for td in (td_cloud, td_rain):

        # td.x = td.x.mean('time')
        # td.y = td.y.mean('time')
        # td.dx = td.dx.mean('time')

        td.x = td.x.values.flatten()
        td.y = td.y.values.flatten()
        td.dx = td.dx.values.flatten()
        args = np.isfinite(td.y) & np.isfinite(td.x)
        td.x = td.x[args]
        td.y = td.y[args]
        td.dx = td.dx[args]
    # td.y = td.y * np.exp(td.x) ** 3

    td_cloud.fit_result = lm_mod.fit(data=td_cloud.y, x=td_cloud.x, **params_cloud)
    td_rain.fit_result = lm_mod.fit(data=td_rain.y, x=td_rain.x, **params_rain)
    return td_cloud, td_rain


test_dict = {}

for cloud_id in identified_clouds.cloud_id.values:
    cloud_id_str = str(cloud_id)
    try:
        td_cloud, td_rain = fit_both(cloud_id)
    except TypeError:
        print("error in cloud_id", cloud_id)
    test_dict[cloud_id_str] = dict(
        cloud=td_cloud,
        rain=td_rain,
    )

In [None]:
cloud_id = np.random.choice(identified_clouds.cloud_id.values)
# cloud_id = 356
cloud_id_str = str(cloud_id)

ds = match_clouds_and_cloudcomposite(
    ds_clouds=select_individual_cloud_by_id(identified_clouds, cloud_id),
    ds_cloudcomposite=coarse_composite,
)
radius = ds["radius"]
dx = ds["bin_width"]
psd = ds["particle_size_distribution"]


td_rain = test_dict[cloud_id_str]["rain"]
x = np.log(radius)
psd_fit = td_rain.fit_result.eval(x=x) * dx
lwc_fit = 1000 * psd_fit * 4 / 3 * np.pi * RADIUS**3


fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 3))
ax.plot(psd.radius, psd, marker=".", linestyle="None", color="k", alpha=0.2)
ax.plot(radius, psd_fit, color="r", linestyle=":")
x2 = np.logspace(-6, -3, 10)
dx2 = (x2[2:] - x2[:-2]) / 2
x2 = x2[1:-1]
ax.plot(x2, td_rain.fit_result.eval(x=np.log(x2)) * dx2, color="b", linestyle=":")

ax.set_xscale("log")
ax.set_yscale("symlog", linthresh=1, linscale=0.1)
ax.set_ylim(-1, None)

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 3), width_ratios=[1, 0.07])

ax = axs[0]
cax = axs[1]

msd = psd * psd["radius"] ** 3
lwc = ds["liquid_water_content"]

pcm = ax.pcolormesh(
    psd.time,
    psd.radius,
    1e9 * msd,
    cmap="Blues",
    shading="nearest",
    vmin=0,
    vmax=50,
)
fig.colorbar(pcm, cax=cax, label="MSD [mg/m³]")
ax.set_yscale("log")
ax.set_ylabel("Radius [µm]")

ax2 = ax.twinx()
ax2.plot(lwc.time, 1e3 * lwc, color="black", lw=2, linestyle=":")
ax2.axhline(1e3 * lwc.mean("time"), color="black", lw=2, linestyle="-")
# ax2.fill_between(lwc.time,
#                 1e3 * (lwc.mean('time') - lwc.std('time')),
#                 1e3 * (lwc.mean('time') + lwc.std('time')),
#                 color="black",
#                 alpha = 0.1
#     )
ax2.axhline(1e3 * lwc_fit.sum(), color="red", lw=2)
ax2.set_ylabel("LWC [g/m³]")

ax2.set_ylim(0, 2)

fig.add_axes(ax2)

fig.tight_layout()

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id:

    dsi = select_individual_cloud_by_id(identified_clouds, cloud_id)

    ds = coarse_composite.sel(time=slice(dsi.start.values[0], dsi.end.values[0]))

    msd_cumsum = ds["mass_size_distribution"].cumsum("radius")
    psd_cumsum = ds["particle_size_distribution"].cumsum("radius")

    msd_cumsum = msd_cumsum / msd_cumsum.isel(radius=-1)
    psd_cumsum = psd_cumsum / psd_cumsum.isel(radius=-1)

    ax.plot(
        1e6 * msd_cumsum.radius,
        msd_cumsum.mean("time"),
        # marker = 'o',
        color="blue",
    )

    ax2.plot(
        1e6 * psd_cumsum.radius,
        psd_cumsum.mean("time"),
        # marker = 'x',
        color="red",
    )

ax.set_xscale("log")
ax2.set_xscale("log")

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id.values:

    d = test_dict[str(cloud_id)]
    td_cloud = d["cloud"]
    td_rain = d["rain"]

    radius = coarse_composite.radius

    psd = td_cloud.fit_result.eval(x=np.log(radius)) + td_rain.fit_result.eval(x=np.log(radius))
    msd = 1000 * 4 / 3 * np.pi * psd * radius**3
    # msd_cumsum = ds['mass_size_distribution'].cumsum('radius')
    psd_cumsum = np.cumsum(psd)
    msd_cumsum = np.cumsum(msd)
    msd_cumsum = msd_cumsum / msd_cumsum[-1]
    psd_cumsum = psd_cumsum / psd_cumsum[-1]

    ax.plot(
        1e6 * radius,
        msd_cumsum,
        # marker = 'o',
        color="blue",
    )

    ax2.plot(
        1e6 * radius,
        psd_cumsum,
        # marker = 'x',
        color="red",
    )

ax.set_xscale("log")
ax2.set_xscale("log")

In [None]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

for cloud_id in identified_clouds.cloud_id.values:

    dsi = select_individual_cloud_by_id(identified_clouds, cloud_id)
    ds = coarse_composite.sel(time=slice(dsi.start.values[0], dsi.end.values[0]))
    radius = ds.radius

    m_obs, s_obs = mean_and_stderror_of_mean(ds["mass_size_distribution"].sum("radius"), ("time",))

    fit = test_dict[str(cloud_id)]
    td_cloud = fit["cloud"]
    td_rain = fit["rain"]

    psd = td_rain.fit_result.eval(x=np.log(radius))
    msd = 1000 * 4 / 3 * np.pi * psd * radius**3

    m_fit, s_fit = np.sum(msd), 0

    ax.errorbar(
        x=1e3 * m_obs,
        xerr=1e3 * s_obs,
        y=1e3 * m_fit,
        yerr=1e3 * s_fit,
        marker="o",
    )

ax.set_xlim(0, 4)
ax.set_ylim(0, 4)
ax.plot(
    ax.get_xlim(),
    ax.get_ylim(),
    color="black",
    linestyle="--",
)

[<matplotlib.lines.Line2D at 0x7ffe942e8080>]

In [None]:
coarse_composite

In [None]:
cloud_id = np.random.choice(identified_clouds.cloud_id.values)
# cloud_id = 273
da = select_individual_cloud_by_id(identified_clouds, cloud_id)
start = da["start"].values[0]
end = da["end"].values[0]
ds_match = coarse_composite.sel(time=slice(start, end)).sel(radius=slice(50e-6, None))

lm_mod = lmfit.Model(normal_distribution, independent_vars=("x",))
params = lmfit.Parameters()
params.add("scale_factor", value=1)
params.add("mu", value=-8, min=-11, max=-5)
params.add("sigma", value=0.1, max=5)
RADIUS = ds_match["radius"]

td_cloud = TestData(
    x=np.log(ds_match["radius"].expand_dims(time=ds_match.time).transpose("time", "radius")),
    y=ds_match["particle_size_distribution"].transpose("time", "radius"),
    dx=ds_match["bin_width"].expand_dims(time=ds_match.time).transpose("time", "radius"),
    name="cloud",
)


td_cloud_mean = TestData(
    x=np.log(ds_match["radius"]),
    y=ds_match["particle_size_distribution"].mean(dim="time"),
    dx=ds_match["bin_width"],
    name="cloud",
)

td_cloud_mean_norm = td_cloud_mean.normalize()

for td in (td_cloud, td_cloud_mean, td_cloud_mean_norm):
    td.x = td.x.values.flatten()
    td.y = td.y.values.flatten()
    td.dx = td.dx.values.flatten()

    args = np.isfinite(td.y) & np.isfinite(td.x)
    td.x = td.x[args]
    td.y = td.y[args]
    td.dx = td.dx[args]

# plt.xscale('log')
# fit the log nornmal distribution to the data of all three TestData objects

for td in (td_cloud, td_cloud_mean, td_cloud_mean_norm):
    td.fit_result = lm_mod.fit(data=td.y, x=td.x, **params)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 3))
ax.scatter(
    np.exp(td_cloud.x),
    td_cloud.y,
    marker=".",
    # color = 'b',
)
ax.scatter(
    np.exp(td_cloud_mean.x),
    td_cloud_mean.y,
    marker="o",
    # color = 'r',
)

ax.set_xscale("log")
ax.set_yscale("log")
ax.axvline(50e-6, color="black", linestyle="--")
# td_cloud.x = np.exp(td_cloud.x)

x = np.arange(-12, -6, 0.1)
ax.plot(np.exp(x), td_cloud.fit_result.eval(x=x), color="r")
ax.plot(np.exp(x), td_cloud_mean.fit_result.eval(x=x), color="b", linestyle="--")
ax1 = ax.twinx()
ax1.plot(np.exp(x), td_cloud_mean_norm.fit_result.eval(x=x), color="g", linestyle=":")

psd_fit = td_cloud.fit_result.eval(x=np.log(RADIUS))
lwc_fit = 1000 * psd_fit * 4 / 3 * np.pi * RADIUS**3
# plt.xscale('log')

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 3), width_ratios=[1, 0.07])

ax = axs[0]
cax = axs[1]

psd = ds_match["particle_size_distribution"]
msd = psd * psd["radius"] ** 3
lwc = ds_match["liquid_water_content"]

pcm = ax.pcolormesh(
    psd.time,
    psd.radius,
    msd,
    cmap="Blues",
    shading="nearest",
)
ax.set_yscale("log")
ax.set_ylabel("Radius [µm]")

ax2 = ax.twinx()
ax2.plot(lwc.time, 1e3 * lwc, color="black", lw=2, linestyle=":")
ax2.axhline(1e3 * lwc.mean("time"), color="black", lw=2, linestyle="-")
ax2.fill_between(
    lwc.time,
    1e3 * (lwc.mean("time") - lwc.std("time")),
    1e3 * (lwc.mean("time") + lwc.std("time")),
    color="black",
    alpha=0.1,
)
ax2.axhline(1e3 * lwc_fit.sum(), color="red", lw=2)
ax2.set_ylabel("LWC [g/m³]")

fig.add_axes(ax2)

fig.colorbar(pcm, cax=cax, label="MSD [µm³/m³]")
fig.tight_layout()