In [None]:
import warnings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from importlib import reload
import xarray as xr
from typing import Union, Tuple, List, Dict

from sdm_eurec4a.visulization import set_custom_rcParams, adjust_lightness_array, ncols_nrows_from_N
import sdm_eurec4a.input_processing.models as smodels

from tqdm import tqdm
from typing import Dict
from scipy.optimize import Bounds
import warnings
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

from sdm_eurec4a import RepositoryPath
from pathlib import Path

from sdm_eurec4a.reductions import mean_and_stderror_of_mean

from sdm_eurec4a.visulization import set_custom_rcParams, adjust_lightness_array, label_from_attrs
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite
from sdm_eurec4a.conversions import msd_from_psd_dataarray, psd_from_msd_dataarray

warnings.filterwarnings("ignore")

default_colors = set_custom_rcParams()
dark_colors = adjust_lightness_array(default_colors, 0.7)

In [None]:
reload(smodels)

<module 'sdm_eurec4a.input_processing.models' from '/home/m/m301096/repositories/sdm-eurec4a/src/sdm_eurec4a/input_processing/models.py'>

In [None]:
class LnParameters:

    def __init__(self, mu, sigma, scale):
        self.parameters = dict(
            mu=mu,
            sigma=sigma,
            scale=scale,
        )

    @property
    def custom_parameters(self):
        """Return the parameters in a dictionary with custom keys.
        Keys are:
        - mu1
        - sigma1
        - scale_factor1
        """
        result = dict(
            mu1=self.parameters["mu"],
            sigma1=self.parameters["sigma"],
            scale_factor1=self.parameters["scale"],
        )
        return result

    @property
    def scipy_parameters(self):
        result = dict(
            s=self.parameters["sigma"],
            loc=self.parameters["mu"],
            scale=self.parameters["scale"],
        )
        return result

    @property
    def geom_parameters(self):
        result = dict(
            geomean=self.parameters["mu"],
            geosig=self.parameters["sigma"],
            scalefac=self.parameters["scale"],
        )
        return result

    @property
    def normal_parameters(self):
        result = dict(
            mu=self.parameters["mu"],
            sigma=self.parameters["sigma"],
            scale=self.parameters["scale"],
        )
        return result


def mass_from_number(t: np.ndarray, y: np.ndarray) -> np.ndarray:
    return 4 / 3 * np.pi * t**3 * y

In [None]:
lower = 1e-2
upper = 100
N = 1000
x = np.linspace(lower, upper, N)
x1 = np.linspace(lower, upper, N * 2)
x2 = np.logspace(np.log10(lower), np.log10(upper), N)
x3 = np.logspace(np.log(lower), np.log(upper), N, base=np.exp(1))

params = LnParameters(3, 2, 3)

results = dict()
for ps in ["direct", "geometric", "exact"]:
    results[ps] = dict()
    for space in ["linear", "ln", "cleo"]:
        results[ps][space] = smodels.log_normal_distribution_all(
            x=x,
            parameter_space=ps,
            space=space,
            mu=params.parameters["mu"],
            sigma=params.parameters["sigma"],
            scale=params.parameters["scale"],
            density_scaled=False,
        )

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

ax1 = axs[0]
ax2 = axs[1]

i = 0
for ps in ["geometric", "exact"]:
    color = dark_colors[i]
    for _ax in axs:
        _ax.plot(x, results[ps]["linear"], label=f"{ps} linear", color=color, linestyle="-")
        # _ax.plot(x, results[ps]['ln'], label=f'{ps} ln', color = color, linestyle='--')
        _ax.plot(x, results[ps]["cleo"], label=f"{ps} cleo", color=color, linestyle=":")
    i += 1

for _ax in axs:
    # _ax.axvline(params.parameters['mu'], color = 'red', linestyle='--')
    # _ax.axhline(params.parameters['scale'], color = 'red', linestyle='--')
    # _ax.axvline(np.log(params.parameters['mu']), color = 'blue', linestyle='--')
    _ax.legend()
# ax2.axvline(10, color = 'k', linestyle='--')
# plt.xlim(xlim)
ax1.set_xscale("log")

# ax1.set_ylim(0, 30)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

ax1 = axs[0]
ax2 = axs[1]

i = 0
for ps in ["geometric", "exact"]:
    color = dark_colors[i]
    for _ax in axs:
        _ax.plot(x, results[ps]["linear"], label=f"{ps} linear", color=color, linestyle="-")
        # _ax.plot(x, results[ps]['ln'], label=f'{ps} ln', color = color, linestyle='--')
        _ax.plot(x, results[ps]["cleo"], label=f"{ps} cleo", color=color, linestyle=":")
    i += 1

for _ax in axs:
    _ax.axvline(params.parameters["mu"], color="red", linestyle="--")
    _ax.axhline(params.parameters["scale"], color="red", linestyle="--")
    # _ax.axvline(np.log(params.parameters['mu']), color = 'blue', linestyle='--')
    _ax.legend()
# ax2.axvline(10, color = 'k', linestyle='--')
# plt.xlim(xlim)
ax1.set_xscale("log")

# ax1.set_ylim(0, 30)

In [None]:
data_dir = RepositoryPath("levante").get_data_dir()

cloud_composite = xr.open_dataset(
    data_dir / Path("observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"),
)
identified_clusters = xr.open_dataset(
    data_dir
    / Path(
        "observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
    )
)
identified_clusters = identified_clusters.swap_dims({"time": "cloud_id"})

attrs = cloud_composite["radius"].attrs.copy()
# attrs.update({"units": "µm"})
cloud_composite["radius"] = cloud_composite["radius"]
# cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
cloud_composite["radius"].attrs = attrs

cloud_composite["radius2D"] = cloud_composite["radius"].expand_dims(time=cloud_composite["time"])
cloud_composite = cloud_composite.transpose("radius", ...)


# cloud_composite = cloud_composite.sel(radius = slice(10, None))

identified_clusters = identified_clusters.where(
    (
        (identified_clusters.duration.dt.seconds >= 3)
        & (identified_clusters.altitude < 1200)
        & (identified_clusters.altitude > 500)
    ),
    drop=True,
)

If we want to coarsen the results, we need to make sure to apply the coarsening on the **NON** normalized data.
Then we can normalized afterwards again

In [None]:
radius_split = 95e-6  # 50 µm
coarsen_factor = 3


coarse_composite = cloud_composite.sel(radius=slice(radius_split, None)).copy()

# make sure to have non normalized data to be coarsened
# otherwise, the sum will not be conserved
coarse_composite["particle_size_distribution"] = (
    coarse_composite["particle_size_distribution"] * coarse_composite["bin_width"]
)
coarse_composite["mass_size_distribution"] = (
    coarse_composite["mass_size_distribution"] * coarse_composite["bin_width"]
)

# use mean for radius and radius2D
coarse_composite_radius = coarse_composite["radius"].coarsen(radius=coarsen_factor).mean()
coarse_composite_radius2D = coarse_composite["radius2D"].coarsen(radius=coarsen_factor).mean()
# use the sum for the rest
coarse_composite = coarse_composite.coarsen(radius=coarsen_factor).sum()

coarse_composite["radius"] = coarse_composite_radius
coarse_composite["radius2D"] = coarse_composite_radius2D
coarse_composite["diameter"] = 2 * coarse_composite["radius"]

# make sure to have normalized data again
coarse_composite["particle_size_distribution"] = (
    coarse_composite["particle_size_distribution"] / coarse_composite["bin_width"]
)
coarse_composite["mass_size_distribution"] = (
    coarse_composite["mass_size_distribution"] / coarse_composite["bin_width"]
)

coarse_composite["particle_size_distribution"].attrs = dict(
    long_name="Number concentration",
    unit=cloud_composite["particle_size_distribution"].attrs["unit"],
)
coarse_composite["mass_size_distribution"].attrs = dict(
    long_name="Mass concentration",
    unit=cloud_composite["mass_size_distribution"].attrs["unit"],
)

# merge the two composites with higher resoltion at small radii
# and lower resolution at large radii
coarse_composite = xr.merge(
    [
        coarse_composite.sel(radius=slice(radius_split, None)),
        cloud_composite.sel(radius=slice(None, radius_split)),
    ]
)


# Test liquid water content is conserved
np.testing.assert_allclose(
    (coarse_composite["bin_width"] * coarse_composite["mass_size_distribution"]).sum("radius"),
    (cloud_composite["bin_width"] * cloud_composite["mass_size_distribution"]).sum("radius"),
    rtol=0.001,
)
# Test particle concentration is conserved
np.testing.assert_allclose(
    (coarse_composite["bin_width"] * coarse_composite["particle_size_distribution"]).sum("radius"),
    (cloud_composite["bin_width"] * cloud_composite["particle_size_distribution"]).sum("radius"),
    rtol=0.001,
)

In [None]:
class PSDBoundsWRONG:

    mu1 = np.array([1e-6, 3e-6, 10e-6])
    sig1 = np.array([1.1, 2, 4])
    sc1 = np.log(np.array([0, 1e10, 1e16]))
    mu2 = np.array([200e-6, 300e-6, 500e-6])
    sig2 = np.array([1.1, 2, 3])
    sc2 = np.log(np.array([0, 1e6, 1e10]))

    _x0 = np.array([mu1[1], sig1[1], sc1[1], mu2[1], sig2[1], sc2[1]])

    _x0_micrometer = np.array([mu1[1] * 1e6, sig1[1], sc1[1], mu2[1] * 1e6, sig2[1], sc2[1]])

    _bounds = Bounds(
        # mu1, sig1, sc1, mu2, sig2, sc2
        lb=[mu1[0], sig1[0], sc1[0], mu2[0], sig2[0], sc2[0]],
        ub=[mu1[2], sig1[2], sc1[2], mu2[2], sig2[2], sc2[2]],
        # keep_feasible = [True, True, True, False, True, True]
    )

    _bounds_micrometer = Bounds(
        # mu1, sig1, sc1, mu2, sig2, sc2
        lb=[mu1[0] * 1e6, sig1[0], sc1[0], mu2[0] * 1e6, sig2[0], sc2[0]],
        ub=[mu1[2] * 1e6, sig1[2], sc1[2], mu2[2] * 1e6, sig2[2], sc2[2]],
        # keep_feasible = [True, True, True, False, True, True]
    )

    @staticmethod
    def bounds():
        return PSDBounds._bounds

    @staticmethod
    def bounds_micrometer():
        return PSDBounds._bounds_micrometer

    @staticmethod
    def x0():
        return PSDBounds._x0

    @staticmethod
    def x0_micrometer():
        return PSDBounds._x0_micrometer


class PSDBounds:
    _x0 = np.array([3e-6, 1.5, 1e13, 300e-6, 1.5, 1e3])
    _bounds = Bounds(
        # mu1, sig1, sc1, mu2, sig2, sc2
        lb=[1e-6, 1.3, 1e-20, 100e-6, 1.3, 1e-20],
        ub=[50e6, 3.0, 1e16, 0.5e-3, 3.0, 1e10],
        # keep_feasible = [True, True, True, False, True, True]
    )

    @staticmethod
    def bounds():
        return PSDBounds._bounds

    @staticmethod
    def x0():
        return PSDBounds._x0


class MSDBoundsWRONG:

    mu1 = np.array([1e-6, 3e-6, 10e-6])
    sig1 = np.array([1.1, 2.0, 4.0])
    sc1 = np.log(np.array([0, 1e-1, 1e2]))
    mu2 = np.array([200e-6, 300e-6, 500e-6])
    sig2 = np.array([1.3, 2, 3.0])
    sc2 = np.log(np.array([0, 1e-3, 1e1]))

    _x0 = np.array([mu1[1], sig1[1], sc1[1], mu2[1], sig2[1], sc2[1]])

    _x0_micrometer = np.array([mu1[1] * 1e6, sig1[1], sc1[1], mu2[1] * 1e6, sig2[1], sc2[1]])

    _bounds = Bounds(
        # mu1, sig1, sc1, mu2, sig2, sc2
        lb=[mu1[0], sig1[0], sc1[0], mu2[0], sig2[0], sc2[0]],
        ub=[mu1[2], sig1[2], sc1[2], mu2[2], sig2[2], sc2[2]],
        # keep_feasible = [True, True, True, False, True, True]
    )

    _bounds_micrometer = Bounds(
        # mu1, sig1, sc1, mu2, sig2, sc2
        lb=[mu1[0] * 1e6, sig1[0], sc1[0], mu2[0] * 1e6, sig2[0], sc2[0]],
        ub=[mu1[2] * 1e6, sig1[2], sc1[2], mu2[2] * 1e6, sig2[2], sc2[2]],
        # keep_feasible = [True, True, True, False, True, True]
    )

    @staticmethod
    def bounds():
        return MSDBounds._bounds

    @staticmethod
    def bounds_micrometer():
        return MSDBounds._bounds_micrometer

    @staticmethod
    def x0():
        return MSDBounds._x0

    @staticmethod
    def x0_micrometer():
        return MSDBounds._x0_micrometer


class MSDBounds:
    _x0 = np.array([3e-6, 1.5, 1e-1, 300e-6, 2, 1e-4])
    _bounds = Bounds(
        lb=[1e-6, 1.3, 1e-20, 100e-6, 1.3, 1e-20],
        ub=[50e-6, 3.5, 1e2, 0.5e-3, 3.0, 1e1],
        # keep_feasible = [True, True, True, False, True, True]
    )

    @staticmethod
    def bounds():
        return MSDBounds._bounds

    @staticmethod
    def x0():
        return MSDBounds._x0

In [None]:
def double_ln_normal_distribution(
    t: np.ndarray,
    mu1: float,
    sigma1: float,
    scale_factor1: float,
    mu2: float,
    sigma2: float,
    scale_factor2: float,
) -> np.ndarray:

    result = t * 0

    for mu, sigma, scale_factor in zip(
        (mu1, mu2),
        (sigma1, sigma2),
        (scale_factor1, scale_factor2),
    ):
        sigtilda = np.log(sigma)
        mutilda = np.log(mu)

        norm = scale_factor / (np.sqrt(2 * np.pi) * sigtilda)
        exponent = -((np.log(t) - mutilda) ** 2) / (2 * sigtilda**2)

        dn_dlnr = norm * np.exp(exponent)  # eq.5.8 [lohmann intro 2 clouds]

        result += dn_dlnr

    return result


def double_ln_normal_distribution_cost(
    x: Tuple[float, float, float, float, float, float],
    t: np.ndarray,
    y: np.ndarray,
    variance: Union[None, float, int, np.ndarray] = None,
    variance_scale: float = 0.01,
    variance_minimal: float = 1e-12,
) -> np.ndarray:

    y_pred = double_ln_normal_distribution(t, *x)

    var = 1
    return np.ravel((y_pred - y) / np.sqrt(var))

In [None]:
from numpy import ndarray

from importlib import reload

reload(smodels)


class LinearDoubleLnNormal(smodels.LeastSquareFit):
    """
    A class to perform least squares fitting for a double log-normal distribution.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:

    """

    def __init__(
        self,
        name: str,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: Union[xr.DataArray, np.ndarray],
        y_train: Union[xr.DataArray, np.ndarray],
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
        weighted_cost_use: bool = False,
        func_kwargs: Dict = dict(),
    ):
        """
        Initialize the DoubleLnNormalLeastSquare instance.

        Parameters:
            name (str): The name of the fitting instance.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (np.ndarray): Training data for the independent variable.
            y_train (np.ndarray): Training data for the dependent variable.
        """

        def this_func(
            t: ndarray,
            mu1: float,
            sigma1: float,
            scale_factor1: float,
            mu2: float,
            sigma2: float,
            scale_factor2: float,
        ) -> np.ndarray:

            d1 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu1,
                sigma=sigma1,
                scale=scale_factor1,
                parameter_space="exact",
                space="linear",
                **func_kwargs,
            )
            d2 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu2,
                sigma=sigma2,
                scale=scale_factor2,
                parameter_space="exact",
                space="linear",
                **func_kwargs,
            )

            return d1 + d2

        super().__init__(
            name=name,
            func=this_func,
            x0=x0,
            bounds=bounds,
            t_train=t_train,
            y_train=y_train,
            fit_kwargs=fit_kwargs,
            plot_kwargs=plot_kwargs,
        )

        if weighted_cost_use == True:
            self.cost_func = self.__weighted_cost_func__
        else:
            self.cost_func = self.__default_cost_func__

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff

    def __weighted_cost_func__(
        self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs
    ) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = t**3 * (y - self.func(t, *x))
        # diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff


class CleoDoubleLnNormal(smodels.LeastSquareFit):
    """
    A class to perform least squares fitting for a double log-normal distribution.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:

    """

    def __init__(
        self,
        name: str,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: Union[xr.DataArray, np.ndarray],
        y_train: Union[xr.DataArray, np.ndarray],
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
        weighted_cost_use: bool = False,
        func_kwargs: Dict = dict(),
    ):
        """
        Initialize the DoubleLnNormalLeastSquare instance.

        Parameters:
            name (str): The name of the fitting instance.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (np.ndarray): Training data for the independent variable.
            y_train (np.ndarray): Training data for the dependent variable.
        """

        def this_func(
            t: ndarray,
            mu1: float,
            sigma1: float,
            scale_factor1: float,
            mu2: float,
            sigma2: float,
            scale_factor2: float,
        ) -> np.ndarray:

            d1 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu1,
                sigma=sigma1,
                scale=scale_factor1,
                parameter_space="geometric",
                space="cleo",
                **func_kwargs,
            )
            d2 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu2,
                sigma=sigma2,
                scale=scale_factor2,
                parameter_space="geometric",
                space="cleo",
                **func_kwargs,
            )

            return d1 + d2

        super().__init__(
            name=name,
            func=double_ln_normal_distribution,
            # cost_func=double_ln_normal_distribution_cost,
            x0=x0,
            bounds=bounds,
            t_train=t_train,
            y_train=y_train,
            fit_kwargs=fit_kwargs,
            plot_kwargs=plot_kwargs,
        )

        if weighted_cost_use == True:
            self.cost_func = self.__weighted_cost_func__
        else:
            self.cost_func = self.__default_cost_func__

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff

    def __weighted_cost_func__(
        self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs
    ) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = t**3 * (y - self.func(t, *x))
        # diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff


class BasicDoubleLnNormalLeastSquare(smodels.LeastSquareFit):
    """
    A class to perform least squares fitting for a double log-normal distribution.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:

    """

    def __init__(
        self,
        name: str,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: Union[xr.DataArray, np.ndarray],
        y_train: Union[xr.DataArray, np.ndarray],
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
    ):
        """
        Initialize the DoubleLnNormalLeastSquare instance.

        Parameters:
            name (str): The name of the fitting instance.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (np.ndarray): Training data for the independent variable.
            y_train (np.ndarray): Training data for the dependent variable.
        """

        def this_func(
            t: ndarray,
            mu1: float,
            sigma1: float,
            scale_factor1: float,
            mu2: float,
            sigma2: float,
            scale_factor2: float,
        ) -> np.ndarray:

            d1 = smodels.double_ln_normal_distribution(
                t=t,
                mu1=mu1,
                sigma1=sigma1,
                scale_factor1=scale_factor1,
                mu2=mu2,
                sigma2=sigma2,
                scale_factor2=scale_factor2,
            )

            return d1

        super().__init__(
            name=name,
            func=this_func,
            x0=x0,
            bounds=bounds,
            t_train=t_train,
            y_train=y_train,
            fit_kwargs=fit_kwargs,
            plot_kwargs=plot_kwargs,
        )

        self.cost_func = self.__default_cost_func__

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff


class WeightedDoubleLnNormalLeastSquare(smodels.LeastSquareFit):
    """
    A class to perform least squares fitting for a double log-normal distribution.

    Attributes:
        name (str): The name of the fitting instance.
        func (Callable): The model function to fit.
        cost_func (Callable): The cost function to minimize.
        x0 (np.ndarray): Initial guess for the parameters.
        bounds (Bounds): Bounds on the parameters.
        t_train (Union[np.ndarray, xr.DataArray]): Training data for the independent variable.
        y_train (Union[np.ndarray, xr.DataArray]): Training data for the dependent variable.
        fit_kwargs (Dict): Additional keyword arguments for the least_squares function.
        plot_kwargs (Dict): Additional keyword arguments for plotting.
        fit_result: The result of the fitting process.

    Methods:

    """

    def __init__(
        self,
        name: str,
        x0: np.ndarray,
        bounds: Bounds,
        t_train: Union[xr.DataArray, np.ndarray],
        y_train: Union[xr.DataArray, np.ndarray],
        fit_kwargs: Dict = dict(),
        plot_kwargs: Dict = dict(),
    ):
        """
        Initialize the DoubleLnNormalLeastSquare instance.

        Parameters:
            name (str): The name of the fitting instance.
            x0 (np.ndarray): Initial guess for the parameters.
            bounds (Bounds): Bounds on the parameters.
            t_train (np.ndarray): Training data for the independent variable.
            y_train (np.ndarray): Training data for the dependent variable.
        """

        def this_func(
            t: ndarray,
            mu1: float,
            sigma1: float,
            scale_factor1: float,
            mu2: float,
            sigma2: float,
            scale_factor2: float,
        ) -> np.ndarray:

            d1 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu1,
                sigma=sigma1,
                scale=scale_factor1,
                parameter_space="geometric",
                space="cleo",
            )
            d2 = smodels.log_normal_distribution_all(
                x=t,
                mu=mu2,
                sigma=sigma2,
                scale=scale_factor2,
                parameter_space="geometric",
                space="cleo",
            )

            return d1 + d2

        super().__init__(
            name=name,
            func=this_func,
            x0=x0,
            bounds=bounds,
            t_train=t_train,
            y_train=y_train,
            fit_kwargs=fit_kwargs,
            plot_kwargs=plot_kwargs,
        )

        self.cost_func = self.__default_cost_func__

    def __default_cost_func__(self, x: np.ndarray, t: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        The cost function to minimize.

        Parameters:
            x (np.ndarray): The parameters to estimate.
            t (np.ndarray): The independent variable.
            y (np.ndarray): The dependent variable.

        Returns:
            np.ndarray: The difference between the predicted and the actual data.
        """
        diff = t**3 * (y - self.func(t, *x))
        # diff = y - self.func(t, *x)

        diff = np.ravel(diff)

        # only use the non-NaN values
        idx = np.where(~np.isnan(diff))
        diff = diff[idx]
        return diff

# New approach:

- limit radius range to 15µm to None

In [None]:
list_lwc = []
list_lwc_sem = []
list_lwc_50um = []
list_lwc_50um_sem = []

list_nbc = []
list_nbc_sem = []

for cloud_id in tqdm(identified_clusters["cloud_id"]):
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    lwc = cc["liquid_water_content"]

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(lwc, dims=("time",))

    list_lwc.append(lwc_mean)
    list_lwc_sem.append(lwc_sem)

    lwc_above_50um = (
        (cc["mass_size_distribution"] * cc["bin_width"]).sel(radius=slice(50e-6, None)).sum("radius")
    )
    lwc_above_50um_mean, lwc_above_50um_sem = mean_and_stderror_of_mean(lwc_above_50um, dims=("time",))
    list_lwc_50um.append(lwc_above_50um_mean)
    list_lwc_50um_sem.append(lwc_above_50um_sem)

    nbc = (cc["particle_size_distribution"] * cc["bin_width"]).sum("radius")

    nbc_mean, nbc_sem = mean_and_stderror_of_mean(nbc, dims=("time",))
    list_nbc.append(nbc_mean)
    list_nbc_sem.append(nbc_sem)


da_lwc = xr.concat(
    list_lwc,
    dim="cloud_id",
)
da_lwc.attrs = dict(
    long_name="Liquid water content",
    units="g m^{-3}",
)

da_lwc_sem = xr.concat(
    list_lwc_sem,
    dim="cloud_id",
)
da_lwc_sem.attrs = dict(
    long_name="Standard error of the mean of the liquid water content",
    units="g m^{-3}",
)

da_lwc_50um = xr.concat(
    list_lwc_50um,
    dim="cloud_id",
)
da_lwc_50um.attrs = dict(
    long_name="Liquid water content above 50 µm",
    units="g m^{-3}",
)

da_lwc_50um_sem = xr.concat(
    list_lwc_50um_sem,
    dim="cloud_id",
)
da_lwc_50um_sem.attrs = dict(
    long_name="Standard error of the mean of the liquid water content above 50 µm",
    units="g m^{-3}",
)

da_nbc = xr.concat(
    list_nbc,
    dim="cloud_id",
)
da_nbc.attrs = dict(
    long_name="Number concentration",
    units="m^{-3}",
)

da_nbc_sem = xr.concat(
    list_nbc_sem,
    dim="cloud_id",
)
da_nbc_sem.attrs = dict(
    long_name="Standard error of the mean of the number concentration",
    units="m^{-3}",
)

ds_observations = xr.Dataset(
    dict(
        liquid_water_content=da_lwc,
        liquid_water_content_sem=da_lwc_sem,
        liquid_water_content_50um=da_lwc_50um,
        liquid_water_content_50um_sem=da_lwc_50um_sem,
        particle_size_distribution=da_nbc,
        particle_size_distribution_sem=da_nbc_sem,
    )
)

100%|██████████| 154/154 [00:01<00:00, 80.97it/s]


### Fit all clouds

In [None]:
# start = 1e-6
# end = 1.5e-3
# r = np.geomspace(start, end, 10000)
# t_test = xr.DataArray(data=r, coords={"radius": r}, dims=["radius"])
# w_test = 0.5 * (t_test - t_test.shift(radius=2)).shift(radius=-1)
# t_test = t_test.isel(radius=slice(1, -1))
# w_test = w_test.isel(radius=slice(1, -1))

r = np.geomspace(0.1e-6, 3e-3, 100)
t_test = xr.DataArray(data=r, coords={"radius": r}, dims=["radius"])
w_test = (t_test - t_test.shift(radius=2)).shift(radius=-1)
w_test = w_test.interpolate_na("radius", method="linear", fill_value="extrapolate")

#### Fit in number concentration space

In [None]:
particle_size_distribution_parameters = dict()
fitted_data = []


for cloud_id in tqdm(identified_clusters["cloud_id"]):

    # extract the cloud composite data for the cloud
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    # use the particle size distribution and the radius
    psd = cc["particle_size_distribution"]  # .sel(radius = slice(15e-6, None))
    radius = cc["radius"].expand_dims(time=cc["time"])  # .sel(radius = slice(15e-6, None))

    # make sure to have the same order of dimensions
    psd = psd.transpose("time", "radius")
    radius = radius.transpose("time", "radius")

    # fit the double log-normal distribution
    double_ln = CleoDoubleLnNormal(
        name="PSD",
        x0=PSDBounds.x0(),
        bounds=PSDBounds.bounds(),
        t_train=radius.mean("time", skipna=True),
        y_train=psd.mean("time", skipna=True),
        fit_kwargs=dict(loss="linear", kwargs=dict(variance=1)),
    )
    double_ln.fit(5)

    # save the parameters
    particle_size_distribution_parameters[str(cloud_id)] = double_ln.parameters

    # predict the number concentration
    dimension, prediction = double_ln.predict(t_test)

    # # we can also use only the radii where we have data:
    # radii_measured = cc["particle_size_distribution"].mean("time") > 0
    # radius_end = cc["radius"].sel(radius=radii_measured).max().values
    # radius_start = cc["radius"].sel(radius=radii_measured).min().values
    # radius_end = 1.5 * radius_end
    # prediction = prediction.where(dimension >= radius_start, other = np.nan)
    # prediction = prediction.where(dimension <= radius_end, other = np.nan)

    fitted_data.append(prediction)


# create a data array with the fitted number concentration
particle_size_distribution_fitted_data = xr.concat(
    fitted_data,
    dim="cloud_id",
)
particle_size_distribution_fitted_data["cloud_id"] = identified_clusters["cloud_id"]

100%|██████████| 154/154 [00:06<00:00, 24.44it/s]


In [None]:
weighted_particle_size_distribution_parameters = dict()
fitted_data = []


for cloud_id in tqdm(identified_clusters["cloud_id"]):

    # extract the cloud composite data for the cloud
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    # use the particle size distribution and the radius
    psd = cc["particle_size_distribution"]  # .sel(radius = slice(15e-6, None))
    radius = cc["radius"].expand_dims(time=cc["time"])  # .sel(radius = slice(15e-6, None))

    # make sure to have the same order of dimensions
    psd = psd.transpose("time", "radius")
    radius = radius.transpose("time", "radius")

    # fit the double log-normal distribution
    double_ln = CleoDoubleLnNormal(
        name="PSD weighted",
        x0=PSDBounds.x0(),
        bounds=PSDBounds.bounds(),
        t_train=radius.mean("time", skipna=True),
        y_train=psd.mean("time", skipna=True),
        fit_kwargs=dict(loss="linear", kwargs=dict(variance=1)),
        weighted_cost_use=True,
    )
    double_ln.fit(5)

    # save the parameters
    weighted_particle_size_distribution_parameters[str(cloud_id)] = double_ln.parameters

    # predict the number concentration
    dimension, prediction = double_ln.predict(t_test)

    # # we can also use only the radii where we have data:
    # radii_measured = cc["particle_size_distribution"].mean("time") > 0
    # radius_end = cc["radius"].sel(radius=radii_measured).max().values
    # radius_start = cc["radius"].sel(radius=radii_measured).min().values
    # radius_end = 1.5 * radius_end
    # prediction = prediction.where(dimension >= radius_start, other = np.nan)
    # prediction = prediction.where(dimension <= radius_end, other = np.nan)

    fitted_data.append(prediction)


# create a data array with the fitted number concentration
weighted_particle_size_distribution_fitted_data = xr.concat(
    fitted_data,
    dim="cloud_id",
)
weighted_particle_size_distribution_fitted_data["cloud_id"] = identified_clusters["cloud_id"]

100%|██████████| 154/154 [00:03<00:00, 40.18it/s]


#### Fit in mass concentration space

In [None]:
mass_size_distribution_parameters = dict()
fitted_data = []

for cloud_id in tqdm(identified_clusters["cloud_id"]):

    # extract the cloud composite data for the cloud
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    # use the mass size distribution and the radius
    msd = cc["mass_size_distribution"]  # .sel(radius = slice(15e-6, None))
    radius = cc["radius"].expand_dims(time=cc["time"])  # .sel(radius = slice(15e-6, None))

    # make sure to have the same order of dimensions
    msd = msd.transpose("time", "radius")
    radius = radius.transpose("time", "radius")

    # fit the double log-normal distribution

    double_ln = CleoDoubleLnNormal(
        name="MSD",
        x0=MSDBounds.x0(),
        bounds=MSDBounds.bounds(),
        t_train=radius.mean("time", skipna=True),
        y_train=msd.mean("time", skipna=True),
        fit_kwargs=dict(loss="linear", kwargs=dict(variance=1)),
    )
    double_ln.fit(5)

    # save the parameters
    mass_size_distribution_parameters[str(cloud_id)] = double_ln.parameters

    # predict the mass concentration
    dimension, prediction = double_ln.predict(t_test)

    # # we can also use only the radii where we have data:
    # radii_measured = cc["particle_size_distribution"].mean("time") > 0
    # radius_end = cc["radius"].sel(radius=radii_measured).max().values
    # radius_start = cc["radius"].sel(radius=radii_measured).min().values
    # radius_end = 1.5 * radius_end
    # prediction = prediction.where(dimension >= radius_start, other = np.nan)
    # prediction = prediction.where(dimension <= radius_end, other = np.nan)

    fitted_data.append(prediction)

# create a data array with the fitted mass concentration
mass_size_distribution_fitted_data = xr.concat(
    fitted_data,
    dim="cloud_id",
)
mass_size_distribution_fitted_data["cloud_id"] = identified_clusters["cloud_id"]

100%|██████████| 154/154 [00:07<00:00, 19.96it/s]


#### Combine the data into one dataset

In [None]:
# combine the datasets
dataset_fitted = xr.Dataset(
    dict(
        particle_size_distribution=particle_size_distribution_fitted_data,
        weighted_particle_size_distribution=weighted_particle_size_distribution_fitted_data,
        mass_size_distribution=mass_size_distribution_fitted_data,
    )
)
dataset_fitted["bin_width"] = w_test
dataset_fitted["bin_width"].attrs = dict(
    long_name="Bin width",
    unit="m",
)

# Fit to the number concentration
dataset_fitted["particle_size_distribution"].attrs = dict(
    long_name="Number concentration", unit="m^{-3} m^{-1}", comment="Fit to the number concentration"
)

dataset_fitted["mass_size_distribution_from_nc"] = (
    msd_from_psd_dataarray(
        da=dataset_fitted["particle_size_distribution"] * dataset_fitted["bin_width"],
    )
    / dataset_fitted["bin_width"]
)
dataset_fitted["mass_size_distribution_from_nc"].attrs = dict(
    long_name="Mass concentration from number concentration",
    unit="kg m^{-3} m^{-1}",
    comment="Fit to the number concentration",
)

# Fit to the number concentration with weight
dataset_fitted["weighted_particle_size_distribution"].attrs = dict(
    long_name="Weighted number concentration",
    unit="m^{-3} m^{-1}",
    comment="Fit to the number concentration\nWeighted by the cube of the radius",
)

dataset_fitted["mass_size_distribution_from_wnc"] = (
    msd_from_psd_dataarray(
        da=dataset_fitted["weighted_particle_size_distribution"] * dataset_fitted["bin_width"],
    )
    / dataset_fitted["bin_width"]
)
dataset_fitted["mass_size_distribution_from_wnc"].attrs = dict(
    long_name="Mass concentration from weighted number concentration",
    unit="kg m^{-3} m^{-1}",
    comment="Fit to the number concentration\nWeighted by the cube of the radius",
)


# Fit to the mass concentration
dataset_fitted["mass_size_distribution"].attrs = dict(
    long_name="Mass concentration", unit="kg m^{-3} m^{-1}", comment="Fit to the mass concentration"
)

dataset_fitted["particle_size_distribution_from_mc"] = (
    psd_from_msd_dataarray(
        da=dataset_fitted["mass_size_distribution"] * dataset_fitted["bin_width"],
    )
    / dataset_fitted["bin_width"]
)
dataset_fitted["particle_size_distribution_from_mc"].attrs = dict(
    long_name="Number concentration from mass concentration",
    unit="m^{-3} m^{-1}",
    comment="Fit to the mass concentration",
)

dataset_fitted["radius"].attrs.update(coarse_composite["radius"].attrs)

dataset_fitted["radius_micrometer"] = dataset_fitted["radius"] * 1e6
dataset_fitted["radius_micrometer"].attrs = dict(
    long_name="Radius",
    unit="µm",
)

colors = dict(
    particle_size_distribution="orange",
    mass_size_distribution_from_nc="orange",
    weighted_particle_size_distribution="blue",
    mass_size_distribution_from_wnc="blue",
    particle_size_distribution_from_mc="purple",
    mass_size_distribution="purple",
)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(13, 5))


axs[0].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["particle_size_distribution"].T,
    color=colors["particle_size_distribution"],
    alpha=0.2,
)

axs[0].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["particle_size_distribution_from_mc"].T,
    color=colors["particle_size_distribution_from_mc"],
    alpha=0.2,
)

axs[0].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["weighted_particle_size_distribution"].T,
    color=colors["weighted_particle_size_distribution"],
    alpha=0.2,
)


axs[0].set_xscale("log")
axs[0].set_yscale("symlog", linthresh=1e-1, linscale=0.1)
axs[0].set_yticks((1e0, 1e4, 1e8, 1e12, 1e16))
axs[0].set_ylim(0, 1e14)

axs[0].set_xlabel(label_from_attrs(dataset_fitted["radius_micrometer"]))
axs[0].set_ylabel(label_from_attrs(dataset_fitted["particle_size_distribution"]))


axs[1].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["mass_size_distribution_from_nc"].T,
    color=colors["mass_size_distribution_from_nc"],
    alpha=0.2,
)

axs[1].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["mass_size_distribution_from_wnc"].T,
    color=colors["mass_size_distribution_from_wnc"],
    alpha=0.2,
)

axs[1].plot(
    dataset_fitted["radius_micrometer"],
    dataset_fitted["mass_size_distribution"].T,
    color=colors["mass_size_distribution"],
    alpha=0.2,
)
axs[1].set_xscale("log")
axs[1].set_yscale("symlog", linthresh=1e-10, linscale=0.1)
axs[1].set_yticks((1e-8, 1e-6, 1e-4, 1e-2, 1e0, 1e2, 1e4))
axs[1].set_ylim(0, 1e4)


axs[1].set_xlabel(label_from_attrs(dataset_fitted["radius_micrometer"]))
axs[1].set_ylabel(label_from_attrs(dataset_fitted["mass_size_distribution"]))

fig.suptitle("Comparison of the three different fits")
fig.tight_layout()
fig.savefig("107/fit_comparison.png", dpi=300)

## Analysis of the fits

### Compare the LWC from the fits to the observations

use the full radius range

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4.5), sharex=True, sharey=True)

i = 0
for key in (
    "mass_size_distribution_from_nc",
    "mass_size_distribution_from_wnc",
    "mass_size_distribution",
):

    lwc = 1e3 * (dataset_fitted[key] * dataset_fitted["bin_width"]).sum("radius")

    axs[i].errorbar(
        x=ds_observations["liquid_water_content"],
        xerr=ds_observations["liquid_water_content_sem"],
        y=lwc,
        yerr=0,
        marker=".",
        linestyle="None",
        label=dataset_fitted[key].attrs["comment"],
        color=colors[key],
        alpha=0.5,
    )
    axs[i].set_title(
        dataset_fitted[key].attrs["comment"],
        color=adjust_lightness_array(
            [
                colors[key],
            ],
            0.75,
        )[0],
    )

    # axs[i].set_title(dataset_fitted[key].attrs['comment'])

    i += 1


for _ax in axs:
    _ax.plot([0, 3], [0, 3], color="black", linestyle="--")
    # _ax.set_aspect('equal')
    _ax.set_xlabel("Observed LWC [g m$^{-3}$]")
    _ax.set_ylabel("Fitted LWC [g m$^{-3}$]")

fig.suptitle("Comparison of fitted and observed LWC for multiple double Log-Normal fits")

fig.tight_layout()

_ax.set_xlim(0, 5)
_ax.set_ylim(0, 5)

fig.savefig("107/LWC_fit_comparison.png", dpi=400)

_ax.set_xlim(0, 0.7)
_ax.set_ylim(0, 0.7)

fig.savefig("107/LWC_fit_comparison_zoom.png", dpi=400)

use only radii larger than 50 µm

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4.5), sharex=True, sharey=True)

i = 0
for key in (
    "mass_size_distribution_from_nc",
    "mass_size_distribution_from_wnc",
    "mass_size_distribution",
):

    lwc = 1e3 * (dataset_fitted[key] * dataset_fitted["bin_width"]).sel(radius=slice(50e-6, None)).sum(
        "radius"
    )

    axs[i].errorbar(
        x=1e3 * ds_observations["liquid_water_content_50um"],
        xerr=1e3 * ds_observations["liquid_water_content_50um_sem"],
        y=lwc,
        yerr=0,
        marker=".",
        linestyle="None",
        label=dataset_fitted[key].attrs["comment"],
        color=colors[key],
        alpha=0.5,
    )

    axs[i].set_title(
        dataset_fitted[key].attrs["comment"],
        color=adjust_lightness_array(
            [
                colors[key],
            ],
            0.75,
        )[0],
    )
    i += 1

for _ax in axs:
    _ax.plot([0, 3], [0, 3], color="black", linestyle="--")
    # _ax.set_aspect('equal')
    _ax.set_xlabel("Observed LWC [g m$^{-3}$]")
    _ax.set_ylabel("Fitted LWC [g m$^{-3}$]")

fig.suptitle(
    "Comparison of fitted and observed LWC for multiple double Log-Normal fits\n Only Radii above 50 µm"
)

fig.tight_layout()

_ax.set_xlim(0, 5)
_ax.set_ylim(0, 5)

fig.savefig("107/LWC_fit_comparison-50um.png", dpi=400)

_ax.set_xlim(0, 0.7)
_ax.set_ylim(0, 0.7)

fig.savefig("107/LWC_fit_comparison-50um-zoom.png", dpi=400)

### Plot distributions of some random clouds

In [None]:
small_cloud_ids = np.random.choice(identified_clusters["cloud_id"], 3, replace=False)

large_cloud_ids = identified_clusters["cloud_id"].where(
    identified_clusters["liquid_water_content"] / identified_clusters["duration"].dt.seconds > 0.5,
    drop=True,
)
large_cloud_ids = np.random.choice(large_cloud_ids, 3, replace=False)

cloud_ids = np.concatenate([small_cloud_ids, large_cloud_ids])

ncols_nrows = ncols_nrows_from_N(len(cloud_ids))

particle size distirbution

In [None]:
fig, axs = plt.subplots(figsize=(10, 7), sharex=True, sharey=True, **ncols_nrows)

# plot the PSDs of the selected clouds
for i, cloud_id in enumerate(cloud_ids):
    _ax = axs.flatten()[i]
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(cc["liquid_water_content"], dims=("time",))

    _ax.set_title(
        f"Cloud ID: {cloud_id:.0f}\n{lwc_mean.values:.2f} ± {lwc_sem.values:.2f} g" + "$m^{-3}$"
    )

    observations = cc["particle_size_distribution"]

    _ax.plot(
        1e6 * observations["radius"],
        observations.mean("time"),
        marker=".",
        linestyle="None",
        color="black",
    )

    for key in (
        "particle_size_distribution",
        "weighted_particle_size_distribution",
        "particle_size_distribution_from_mc",
    ):

        fit = dataset_fitted[key].sel(cloud_id=cloud_id)
        _ax.plot(
            dataset_fitted["radius_micrometer"],
            fit,
            color=colors[key],
            linestyle="-",
            label=fit.attrs["comment"],
        )

    _ax.set_xscale("log")
    _ax.set_yscale("symlog", linthresh=1e4, linscale=0.1)
    _ax.set_yticks([0, 1e6, 1e9, 1e12])
# axs.flatten()[0].legend()
_ax.set_ylim(0, None)

fig.supxlabel(label_from_attrs(dataset_fitted["radius_micrometer"]))
fig.supylabel(label_from_attrs(dataset_fitted["particle_size_distribution"]))

fig.suptitle("Comparison of fitted and observed PSDs for multiple double Log-Normal fits")
fig.tight_layout()

fig.savefig("107/PSD_fit_comparison.png", dpi=400)

In [None]:
fig, axs = plt.subplots(figsize=(10, 7), sharex=True, sharey=True, **ncols_nrows)

# plot the PSDs of the selected clouds
for i, cloud_id in enumerate(cloud_ids):
    _ax = axs.flatten()[i]
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(cc["liquid_water_content"], dims=("time",))

    _ax.set_title(
        f"Cloud ID: {cloud_id:.0f}\n{lwc_mean.values:.2f} ± {lwc_sem.values:.2f} g" + "$m^{-3}$"
    )

    observations = cc["mass_size_distribution"]

    _ax.plot(1e6 * cc["radius"], observations.mean("time"), marker=".", linestyle="None", color="black")

    for key in (
        "mass_size_distribution",
        "mass_size_distribution_from_wnc",
        "mass_size_distribution_from_nc",
    ):

        fit = dataset_fitted[key].sel(cloud_id=cloud_id)
        _ax.plot(
            dataset_fitted["radius_micrometer"],
            fit,
            color=colors[key],
            linestyle="-",
            alpha=0.8,
        )

    _ax.set_xscale("log")
    _ax.set_yscale("symlog", linthresh=1e-6, linscale=0.1)
    _ax.set_yticks([0, 1e-3, 1e0, 1e3])
    _ax.set_xlim(1, 3e3)
_ax.set_ylim(0, None)

fig.supxlabel("Radius [$µm$]")
fig.supylabel(label_from_attrs(dataset_fitted["mass_size_distribution"]))

fig.suptitle("Comparison of fitted and observed MSDs for multiple double Log-Normal fits")
fig.tight_layout()

fig.savefig("107/MSD_fit_comparison.png", dpi=400)

In [None]:
fig, axs = plt.subplots(figsize=(10, 7), sharex=True, sharey=False, **ncols_nrows)

# plot the PSDs of the selected clouds
for i, cloud_id in enumerate(cloud_ids):
    _ax = axs.flatten()[i]
    cc = match_clouds_and_cloudcomposite(
        ds_clouds=identified_clusters.sel(cloud_id=cloud_id),
        ds_cloudcomposite=coarse_composite,
    )

    lwc_mean, lwc_sem = mean_and_stderror_of_mean(cc["liquid_water_content"], dims=("time",))

    _ax.set_title(
        f"Cloud ID: {cloud_id:.0f}\n{lwc_mean.values:.2f} ± {lwc_sem.values:.2f} g" + "$m^{-3}$"
    )

    observations = cc["mass_size_distribution"] * cc["bin_width"]

    _ax.plot(
        1e3 * observations["radius"],
        1e3 * observations.mean("time"),
        marker=".",
        linestyle="None",
        color="black",
    )

    for key in (
        "mass_size_distribution_from_nc",
        "mass_size_distribution_from_wnc",
        "mass_size_distribution",
    ):

        fit = dataset_fitted[key].sel(cloud_id=cloud_id)
        _ax.plot(
            1e3 * dataset_fitted["radius"],
            1e3 * fit * dataset_fitted["bin_width"],
            color=colors[key],
            linestyle="-",
            label=fit.attrs["comment"],
            alpha=0.8,
        )
    _ax.set_xlim([0, 3])

    # _ax.set_xscale('log')
# _ax.set_yscale('symlog', linthresh = 1e4)
for _ax in axs[0]:
    _ax.set_ylim(0, 0.008)

for _ax in axs[1]:
    _ax.set_ylim(0, 0.15)


fig.supxlabel("Radius [$mm$]")
fig.supylabel("Mass concentration [$g m^{-3}$]")

fig.suptitle("Comparison of fitted and observed MSDs for multiple double Log-Normal fits")
fig.tight_layout()

fig.savefig("107/MSD_fit_comparison_linear.png", dpi=400)