In [None]:
import os

from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import xarray as xr

from sdm_eurec4a.visulization import set_custom_rcParams, symlog_from_array
from sdm_eurec4a.identifications import (
    select_individual_cloud,
    match_clouds_and_cloudcomposite,
    match_clouds_and_dropsondes,
)
from sdm_eurec4a.reductions import x_y_flatten
from sdm_eurec4a.pySD import probdists

from sdm_eurec4a.conversions import msd_from_psd

In [None]:
plt.style.use("default")
default_colors = set_custom_rcParams()
from matplotlib import rc

# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
# rc('font',**{'family':'serif','serif':['Palatino']})
rc("text", usetex=False)

# THE PATH TO THE SCRIPT DIRECTORY
script_dir = os.path.abspath("/home/m/m301096/repositories/sdm-eurec4a/scripts/CLEO/initalize")
print(script_dir)

REPOSITORY_ROOT = Path(script_dir).parents[2]
print(REPOSITORY_ROOT)

fig_path = REPOSITORY_ROOT / Path("results/CLEO/initilization/fitting_psd")
fig_path.mkdir(parents=True, exist_ok=True)

/home/m/m301096/repositories/sdm-eurec4a/scripts/CLEO/initalize
/home/m/m301096/repositories/sdm-eurec4a


### Load datasets

In [None]:
# Load data
mask_name = "cloud_mask"
chosen_id = 1421

subfig_path = fig_path / Path(f"{mask_name}_{chosen_id}")
subfig_path.mkdir(parents=True, exist_ok=True)
# mask_name = "rain_mask"
# chosen_id = 77

identified_clouds = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(
        f"data/observation/cloud_composite/processed/identified_clouds/identified_clouds_{mask_name}.nc"
    )
)
# select only clouds which are between 800 and 1100 m
identified_clouds = identified_clouds.where(
    (identified_clouds.alt >= 800) & (identified_clouds.alt <= 1100), drop=True
)

distance_IC_DS = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(f"data/observation/combined/distance/distance_dropsondes_clouds_{mask_name}.nc")
)

cloud_composite = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/cloud_composite/processed/cloud_composite.nc"),
    chunks={"time": 1000},
)

drop_sondes = xr.open_dataset(
    REPOSITORY_ROOT
    / Path("data/observation/dropsonde/Level_3/EUREC4A_JOANNE_Dropsonde-RD41_Level_3_v2.0.0.nc")
)
drop_sondes = drop_sondes.rename({"launch_time": "time"})
drop_sondes = drop_sondes.swap_dims({"sonde_id": "time"})
drop_sondes = drop_sondes.sortby("time")
drop_sondes = drop_sondes.chunk({"time": -1})

### Use Total number concentration

Chose an individual cloud to handle.
Use ``chosen_id = 77`` for the rain_mask case
Use ``chosen_id = 1421`` for the cloud_mask case

In [None]:
# select a single cloud
ds_cloud = select_individual_cloud(identified_clouds, chosen_id)
ds_sonde = match_clouds_and_dropsondes(
    ds_cloud=ds_cloud,
    ds_sonde=drop_sondes,
    ds_distance=distance_IC_DS,
    max_temporal_distance=np.timedelta64(1, "h"),
    max_spatial_distance=100,
)

ds_cloudcomposite = match_clouds_and_cloudcomposite(
    ds_cloud=ds_cloud,
    ds_cloudcomposite=cloud_composite,
)

# Make sure to have the total number of particles in the cloud See also #28 on GitHub
attrs = ds_cloudcomposite["particle_size_distribution"].attrs
attrs.update(
    {
        "unit": "#/L",
        "comment": "histogram: each bin gives the number of droplets per liter of air, NOT normalized by the bin width",
    }
)
ds_cloudcomposite["particle_size_distribution"] = (
    ds_cloudcomposite["particle_size_distribution"] * ds_cloudcomposite["bin_width"]
)
ds_cloudcomposite["particle_size_distribution"].attrs = attrs

#### Plot the distributions in linear and lognormal space

In [None]:
style = dict(
    marker=".",
    linestyle="none",
    color="k",
    alpha=0.5,
)
psd = ds_cloudcomposite["particle_size_distribution"]
symlog = symlog_from_array(psd)

fig, axss = plt.subplots(3, 2, figsize=(10, 7), layout="constrained")


fig.suptitle(f"Cloud ID: {chosen_id} - Particle Size Distribution - Different xscales")

for axs in axss.T:
    axs[0].plot(
        psd["diameter"],
        psd,
        **style,
    )
    # axs[0].set_title("Linear of x")
    axs[0].set_xlabel("Diameter [µm]")
    axs[0].set_ylabel("Counts [#/l]")
    axs[1].plot(
        psd["diameter"],
        psd,
        **style,
    )
    axs[1].set_xscale("log")
    # axs[1].set_title("Linear of x on log10 scale")
    axs[1].set_xlabel("Diameter [µm]")
    axs[1].set_ylabel("Counts [#/l]")

    axs[2].plot(
        np.log(psd["diameter"]),
        psd,
        **style,
    )
    # axs[2].set_title("Linear of Ln(x)")
    axs[2].set_xlabel("Ln(Diameter) [Ln(µm)]")
    axs[2].set_ylabel("Counts [#/l]")

for axs in axss.T[1]:
    axs.set_yscale(symlog)

for ax in axss.flatten():
    ax.set_ylabel("#/l")
    ax.set_ylim(0, None)

fig.savefig(subfig_path / Path(f"all_scales_psd_{mask_name}_cloud_{chosen_id}.png"), dpi=300)
fig.savefig(subfig_path / Path(f"all_scales_psd_{mask_name}_cloud_{chosen_id}.svg"))

# Ideas on how to fit a normal distribution

#### Use scipy curve fitting 
https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html#scipy.optimize.curve_fit

https://github.com/nilsnevertree/sdm-eurec4a/blob/c990ed160365230515fb583505a0514630339ef4/src/sdm_eurec4a/pySD/probdists.py#L224

# Combination of fitted cloud droplet distribution and aerosol distribution

## Make sure we use the same units

At the moment we have
- PSD in $\# l^{-1}$
- Diameter in $µm$

We want
- PSD in $\# m^{-3}$
- Radius in $m$
- Total number concentration $N_a$ in $\#$ (this is also the ``scalefacs``)

So the PSD is in #/dm^3, we want it in #/m^3

In [None]:
ds_SI_Units = ds_cloudcomposite.copy()
# Convert from #/l to #/m^3 -> 1e3
ds_SI_Units["particle_size_distribution"] = ds_cloudcomposite["particle_size_distribution"] * 1e3
ds_SI_Units["particle_size_distribution"].attrs.update(
    unit="#/m^3",
    comment="histogram: each bin gives the number of droplets per cubic meter of air, NOT normalized by the bin width",
)
# Convert from µm to m -> 1e-6
ds_SI_Units["radius"] = ds_SI_Units["diameter"] / 2 * 1e-6
ds_SI_Units["radius"].attrs.update(long_name="Radius", unit="m", comment="radius of the droplets")
# Use radius as new dimension
ds_SI_Units = ds_SI_Units.swap_dims({"diameter": "radius"})
# display(ds_SI_Units)

Same plot as before but in SI units

In [None]:
style = dict(
    marker=".",
    linestyle="none",
    color="k",
    alpha=0.5,
)
psd = ds_SI_Units["particle_size_distribution"]
symlog = symlog_from_array(psd)

fig, axss = plt.subplots(3, 2, figsize=(10, 7), layout="constrained")


fig.suptitle(f"Cloud ID: {chosen_id} - Particle Size Distribution - Different xscales")

for axs in axss.T:
    axs[0].plot(
        psd["radius"],
        psd,
        **style,
    )
    # axs[0].set_title("Linear of x")
    axs[0].set_xlabel("Radius [m]")
    axs[0].set_ylabel("Counts [#/m^3]")
    axs[1].plot(
        psd["radius"],
        psd,
        **style,
    )
    axs[1].set_xscale("log")
    # axs[1].set_title("Linear of x on log10 scale")
    axs[1].set_xlabel("Radius [m]")
    axs[1].set_ylabel("Counts [#/m^3]")

    axs[2].plot(
        np.log(psd["radius"]),
        psd,
        **style,
    )
    # axs[2].set_title("Linear of Ln(x)")
    axs[2].set_xlabel("Ln(Radius) [Ln(m)]")
    axs[2].set_ylabel("Counts [#/m^3]")

for axs in axss.T[1]:
    axs.set_yscale(symlog)

for ax in axss.flatten():
    ax.set_ylabel("#/m^3")
    ax.set_ylim(0, None)

fig.savefig(subfig_path / Path(f"SI_all_scales_psd_{mask_name}_cloud_{chosen_id}.png"), dpi=300)
fig.savefig(subfig_path / Path(f"SI_all_scales_psd_{mask_name}_cloud_{chosen_id}.svg"))

lets get some total number of droplets per timestep to get $N_a$ as in 5.2 from Lohmann et al.

The values of the PSD are NOT normalized by the bin width, thus we do NOT need to multiply by them again!

We can use the median of the $N_a$ as a scaling factor later during the fitting.

In [None]:
N_a = ds_SI_Units["particle_size_distribution"].sum(dim="radius")
N_a.attrs.update(
    long_name="N_a total number of particles",
    unit="#",
    comment="total number of particles per cubic meter of air",
)
N_a.name = "N_a"
N_a.plot.hist()
N_a_median = N_a.median(dim="time").data
plt.axvline(
    N_a_median,
    label=f"Median {N_a_median:.2e}",
    color="k",
)
plt.legend()

plt.ylabel("Counts")
plt.title("Histogram of total number of particles N_a for all ATR measurments.")
fig.savefig(subfig_path / Path(f"SI_N_a_hist_{mask_name}_cloud_{chosen_id}.png"), dpi=300)
fig.savefig(subfig_path / Path(f"SI_N_a_hist_{mask_name}_cloud_{chosen_id}.svg"))

DONT DO THIS: ``Because the distribution is normalized  to have sum 1, we need to make sure to use $PSD/N_a$ for the fitting.``

In [None]:
input_psd = ds_SI_Units["particle_size_distribution"]

Prepare the input for the fitting

In [None]:
# extract diameter and counts from the data
x, y = x_y_flatten(input_psd, "radius")

# CONVERT THEM!
# TODO: This needs to be made sure of!
xdata = x  # diameter to radius
ydata = y

# make sure no nans in the dataset
np.nan_to_num(xdata, copy=False, nan=0)
np.nan_to_num(ydata, copy=False, nan=0)

# Use some default uncertainties for the data.
# Tell the function, that 0 values are very uncertain.
sigma = ydata == 0
sigma = sigma.astype(float)
sigma = sigma * 1e5 + 1e-10

# # FIT THE DATA

fig, ax = plt.subplots(figsize=(5, 3.5), layout="constrained")

ax.plot(xdata, ydata, "k.", alpha=0.5, label="ATR data")
# ax.plot(radii, dist_cloud(radii), label="Fitted distribution")
ax.set_xscale("log")
ax.legend()
ax.set_xlabel("Radius [m]")
ax.set_ylabel("Counts [#/m3]")
ax.set_title("Cloud PSD and fitted distribution")

fig.savefig(subfig_path / Path(f"fitting_psd.png"), dpi=300)

p0 = np.asarray([N_a_median, 2e-6, 3e0])
print(f"First estimate of p0 : {p0}")

First estimate of p0 : [2.72522848e+08 2.00000000e-06 3.00000000e+00]


Give a first estimate of the fitting

In [None]:
radii = np.logspace(-8, -3, 100)

# Same Aerosol distribution as given by CLEO example
dist_aerosol = probdists.LnNormal(
    geomeans=[0.02e-6, 0.2e-6],
    geosigs=[1.55, 2.3],
    scalefacs=[1e9, 0.3e9],
)

# initialize the cloud distribution using 1.0
dist_cloud = probdists.LnNormal(
    geomeans=[1e0],
    geosigs=[1e0],
    scalefacs=[1e0],
)

# Fit the parameters
dist_cloud.fit_parameters(xdata, ydata, p0=p0, sigma=sigma)
# Make sure to set the scaling factor of the cloud distribution to the total number of particles.
# Here we use the median of the total number of particles from the ATR measurments.
dist_cloud.scalefacs = [N_a_median]
print(dist_cloud)

# Compare the original data to the fit
fig, ax = plt.subplots(figsize=(5, 3.5), layout="constrained")
ax.plot(ds_SI_Units.radius, ds_SI_Units["particle_size_distribution"], "k.", alpha=0.5)
ax.plot(radii, dist_cloud(radii) * np.sum(dist_cloud.scalefacs), label="Fitted distribution")
ax.set_xscale("log")
ax.legend()
ax.set_xlabel("Radius [m]")
ax.set_ylabel("Counts [#/m3]")
ax.set_title("Cloud PSD and fitted distribution")

fig.savefig(subfig_path / Path(f"fitting_psd.png"), dpi=300)
fig.savefig(subfig_path / Path(f"fitting_psd.svg"))

nmodes = 1.00e+00
geomeans = [3.77e-06, ]
geosigs = [1.38e+00, ]
scalefacs = [2.73e+08, ]
numconc = 2.73e+08


### Important for correct plotting
Multiply by the scaling factor. It ``scalefacs`` is an array the use its sum! 

In [None]:
# COMBINE THE DISTRIBUTIONS
dist_combined = dist_aerosol + dist_cloud

# Create dataset

ds_combined = xr.Dataset(
    data_vars={
        "particle_size_distribution": ("radius", dist_combined(radii) * np.sum(dist_combined.scalefacs)),
    },
    coords={"radius": radii},
    attrs={
        "name": "atr",
    },
)

ds_cloud = xr.Dataset(
    data_vars={
        "particle_size_distribution": ("radius", dist_cloud(radii) * np.sum(dist_cloud.scalefacs)),
    },
    coords={"radius": radii},
    attrs={
        "name": "atr",
    },
)

ds_aerosol = xr.Dataset(
    data_vars={
        "particle_size_distribution": ("radius", dist_aerosol(radii) * np.sum(dist_aerosol.scalefacs)),
    },
    coords={"radius": radii},
    attrs={
        "name": "atr",
    },
)


def add_msd(ds):
    ds["mass_size_distribution"] = msd_from_psd(
        ds=ds,
        psd_name="particle_size_distribution",
        psd_factor=1e6,
        scale_name="radius",
        radius_given=True,
        scale_factor=1e0,
    )


add_msd(ds_SI_Units)
add_msd(ds_combined)
add_msd(ds_cloud)
add_msd(ds_aerosol)



In [None]:
style = dict(linewidth=3.5, alpha=1)


fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(7, 7), layout="constrained", sharex=True)
ax_psd = axs[0]
ax_msd = axs[1]

for ax in axs.flatten():
    ax.set_xscale("log")
    ax.set_xlabel("Radius [m]")


ax_psd.plot(
    ds_SI_Units.radius,
    ds_SI_Units["particle_size_distribution"],
    linestyle="none",
    marker=".",
    color="k",
    alpha=0.5,
)
ax_psd.plot(ds_aerosol.radius, ds_aerosol["particle_size_distribution"], label=f"Aersol PSD", **style)
ax_psd.plot(ds_cloud.radius, ds_cloud["particle_size_distribution"], label=f"Cloud PSD", **style)
ax_psd.plot(
    ds_combined.radius,
    ds_combined["particle_size_distribution"],
    label=f"Combined PSD",
    linestyle=":",
    color="k",
    **style,
)

ax_msd.plot(
    ds_SI_Units.radius,
    ds_SI_Units["mass_size_distribution"],
    linestyle="none",
    marker=".",
    color="k",
    alpha=0.5,
)
ax_msd.plot(ds_aerosol.radius, ds_aerosol["mass_size_distribution"], label=f"Aersol PSD", **style)
ax_msd.plot(ds_cloud.radius, ds_cloud["mass_size_distribution"], label=f"Cloud PSD", **style)

ax_msd.plot(
    ds_combined.radius,
    ds_combined["mass_size_distribution"],
    label=f"Combined PSD",
    linestyle=":",
    color="k",
    **style,
)

ax_psd.set_ylabel("Counts [#/m3]")
ax_msd.set_ylabel("Mass [kg/m3]")
ax_psd.set_title("Particle Size Distribution")
ax_msd.set_title("Mass Size Distribution")

for ax in axs.flatten():
    ax.legend()

fig.suptitle(
    f"Aerosol distribution as in Lohmann et al. 2016 (Fig. 5.5) \n ATR measurment from Cloud {chosen_id}"
)
fig.savefig(subfig_path / Path(f"psd_msd_cloud_and_aerosol.png"), dpi=300)
fig.savefig(subfig_path / Path(f"psd_msd_cloud_and_aerosol.svg"))

In [None]:
print(dist_combined)

nmodes = 3.00e+00
geomeans = [2.00e-08, 2.00e-07, 3.77e-06, ]
geosigs = [1.55e+00, 2.30e+00, 1.38e+00, ]
scalefacs = [1.00e+09, 3.00e+08, 2.73e+08, ]
numconc = 1.57e+09
