In [None]:
import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
from numpy.random import default_rng
from scipy.optimize import Bounds

import warnings
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
)
from sdm_eurec4a.identifications import match_clouds_and_cloudcomposite, select_individual_cloud_by_id


warnings.filterwarnings("ignore")

default_colors = set_custom_rcParams()

In [None]:
def fun_rosenbrock(x):

    return np.array([10 * (x[1] - x[0] ** 2), (1 - x[0])])

In [None]:
x0_rosenbrock = np.array([1.2, 1.2])

res_1 = least_squares(fun_rosenbrock, x0_rosenbrock, verbose=2)

print(res_1.x)
print(res_1.cost)
print(res_1.optimality)
print(res_1.nfev)

   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         2.9000e+00                                    5.78e+01    
       1              2         8.0000e-02      2.82e+00       3.12e-01       8.00e+00    
       2              3         0.0000e+00      8.00e-02       4.00e-02       0.00e+00    
`gtol` termination condition is satisfied.
Function evaluations 3, initial cost 2.9000e+00, final cost 0.0000e+00, first-order optimality 0.00e+00.
[1. 1.]
0.0
0.0
3


In [None]:
rng = default_rng()


def analytic(t, a, b, c):
    return a + b * np.exp(c * t)


def gen_data(t, a, b, c, noise=0.0, n_outliers=0, seed=None):
    rng = default_rng(seed)

    y = analytic(t, a, b, c)

    error = noise * rng.standard_normal(t.size)
    outliers = rng.integers(0, t.size, n_outliers)
    error[outliers] *= 10

    return y + error


a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15

t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)


def fun(x, t, y):
    y_is = analytic(t, x[0], x[1], x[2])

    return y_is - y


x0 = np.array([1.0, 1.0, 0.0])

In [None]:
def double_ln_normal_distribution(
    t: np.ndarray,
    mu1: float,
    sigma1: float,
    scale_factor1: float,
    mu2: float,
    sigma2: float,
    scale_factor2: float,
) -> np.ndarray:

    result = np.zeros(t.size)

    for mu, sigma, scale_factor in zip(
        (mu1, mu2),
        (sigma1, sigma2),
        (scale_factor1, scale_factor2),
    ):
        sigtilda = np.log(sigma)
        mutilda = np.log(mu)

        norm = scale_factor / (np.sqrt(2 * np.pi) * sigtilda)
        exponent = -((np.log(t) - mutilda) ** 2) / (2 * sigtilda**2)

        dn_dlnr = norm * np.exp(exponent)  # eq.5.8 [lohmann intro 2 clouds]

        result += dn_dlnr

    return result


rng = default_rng()


def gen_data(
    t: np.ndarray,
    mu1: float,
    sigma1: float,
    scale_factor1: float,
    mu2: float,
    sigma2: float,
    scale_factor2: float,
    noise=0.0,
    n_outliers=0,
    seed=None,
):
    rng = default_rng(seed)

    y = double_ln_normal_distribution(
        t=t,
        mu1=mu1,
        sigma1=sigma1,
        scale_factor1=scale_factor1,
        mu2=mu2,
        sigma2=sigma2,
        scale_factor2=scale_factor2,
    )
    error = noise * rng.standard_normal(t.size)
    outliers = rng.integers(0, t.size, n_outliers)
    error[outliers] = np.sqrt(t[outliers]) * error[outliers]

    return y + error


mu1 = 1e-2
sigma1 = 2
scale_factor1 = 5
mu2 = 0.5e1
sigma2 = 3
scale_factor2 = 1

t_min = 0.1
t_max = 10
n_points = 40
n_outliers = 5
t_train = np.logspace(-3, 2, n_points)
# t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(
    t=t_train,
    mu1=mu1,
    sigma1=sigma1,
    scale_factor1=scale_factor1,
    mu2=mu2,
    sigma2=sigma2,
    scale_factor2=scale_factor2,
    noise=0.1,
    n_outliers=n_outliers,
)


def fun(x, t, y):
    y_is = double_ln_normal_distribution(t, x[0], x[1], x[2], x[3], x[4], x[5])
    return y_is - y


x0 = np.array([1e-1, 2.0, 1.0, 10.0, 2.0, 1.0])
bounds = Bounds(
    lb=[1e-10, 1e-10, -np.inf, 2e-2, 1e-10, -np.inf],
    ub=[5e-1, np.inf, np.inf, np.inf, np.inf, np.inf],
    keep_feasible=[True, True, True, False, True, True],
)
res_lsq = least_squares(fun, x0, bounds=bounds, args=(t_train, y_train))
res_soft_l1 = least_squares(fun, x0, loss="soft_l1", f_scale=0.1, bounds=bounds, args=(t_train, y_train))
res_log = least_squares(fun, x0, loss="cauchy", f_scale=0.1, bounds=bounds, args=(t_train, y_train))
t_test = np.logspace(-5, 2, n_points * 10)

y_true = gen_data(
    t=t_test,
    mu1=mu1,
    sigma1=sigma1,
    scale_factor1=scale_factor1,
    mu2=mu2,
    sigma2=sigma2,
    scale_factor2=scale_factor2,
)

y_lsq = gen_data(t_test, *res_lsq.x)
y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
y_log = gen_data(t_test, *res_log.x)

plt.plot(t_train, y_train, "o")
plt.plot(t_test, y_true, "k", linewidth=2, label="true")
plt.plot(t_test, y_lsq, label="linear loss")
plt.plot(t_test, y_soft_l1, label="soft_l1 loss")
plt.plot(t_test, y_log, label="cauchy loss")
plt.xlabel("t")
plt.ylabel("y")
plt.legend()
plt.xscale("log")

In [None]:
cloud_composite = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/cloud_composite_si_units.nc"
)
identified_clouds = xr.open_dataset(
    "/home/m/m301096/repositories/sdm-eurec4a/data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)

attrs = cloud_composite["radius"].attrs.copy()
attrs.update({"units": "µm"})
cloud_composite["radius"] = cloud_composite["radius"]
cloud_composite["radius_micro"] = 1e6 * cloud_composite["radius"]
cloud_composite["radius"].attrs = attrs

# cloud_composite = cloud_composite.sel(radius = slice(10, None))

identified_clouds = identified_clouds.where(
    (
        (identified_clouds.duration.dt.total_seconds() > 50)
        & (identified_clouds.alt < 1300)
        & (identified_clouds.alt > 500)
    ),
    drop=True,
)

cloud_composite = match_clouds_and_cloudcomposite(identified_clouds, cloud_composite)

In [None]:
cloud_composite.to_netcdf("train_cc.nc")

In [None]:
cloud_composite["mass_size_distribution"].mean("time").plot(x="radius", yscale="log", aspect=2, size=4)
plt.xscale("log")

In [None]:
coarse_composite = cloud_composite.coarsen(radius=2).sum()
coarse_composite["diameter"] = 2 * coarse_composite["radius"]
# normalize the particle size distirbution
attrs = coarse_composite["particle_size_distribution"].attrs.copy()
attrs["units"] = "m^-3 m^-1"
attrs["long_name"] = "Particle size distribution"
attrs["comment"] = "Each bin gives the number of droplets per cubic meter of air per meter of radius"

coarse_composite["particle_size_distribution"] = (
    coarse_composite["particle_size_distribution"] / coarse_composite["bin_width"] / 2
)
coarse_composite["particle_size_distribution"].attrs = attrs

# normalize the mass size distribution
attrs_mass = coarse_composite["mass_size_distribution"].attrs.copy()
attrs_mass["units"] = "kg m^-3 m^-1"
attrs_mass["long_name"] = "Mass size distribution"
attrs_mass["comment"] = "Each bin gives the mass of droplets per cubic meter of air per meter of radius"

coarse_composite["mass_size_distribution"] = (
    coarse_composite["mass_size_distribution"] / coarse_composite["bin_width"] / 2
)
coarse_composite["mass_size_distribution"].attrs = attrs_mass

coarse_composite["mass_size_distribution"].mean("time").plot(x="radius", yscale="log", aspect=2, size=4)
plt.xscale("log")

In [None]:
t = np.arange(0, len(coarse_composite["time"]))
# plt.plot(
#     t,
#     coarse_composite['radius'],
#     coarse_composite['mass_size_distribution'],
#     shading='auto',
#     cmap = 'Blues', vmax = 1e-1, vmin = 1e-3)
plt.plot(
    coarse_composite["radius"],
    coarse_composite["particle_size_distribution"],
    color="k",
    alpha=0.1,
)
plt.plot(
    coarse_composite["radius"],
    coarse_composite["particle_size_distribution"].mean("time"),
    color="r",
    alpha=1,
)
plt.yscale("log")
plt.xscale("log")

In [None]:
coarse_composite["radius2D"] = coarse_composite["radius"].expand_dims(time=coarse_composite["time"])
coarse_composite = coarse_composite.transpose("radius", ...)

In [None]:
# chose random time step

# np.random.seed(42)
train_data = match_clouds_and_cloudcomposite(
    identified_clouds.isel(time=np.random.random_integers(0, len(identified_clouds.time) - 1)),
    coarse_composite,
)

t_train = train_data["radius2D"]  # .mean('time')
y_train = train_data["particle_size_distribution"]  # .mean('time')


def double_ln_normal_distribution(
    t: np.ndarray,
    mu1: float,
    sigma1: float,
    scale_factor1: float,
    mu2: float,
    sigma2: float,
    scale_factor2: float,
) -> np.ndarray:

    result = np.zeros(t.size)

    for mu, sigma, scale_factor in zip(
        (mu1, mu2),
        (sigma1, sigma2),
        (scale_factor1, scale_factor2),
    ):
        sigtilda = np.log(sigma)
        mutilda = np.log(mu)

        norm = scale_factor / (np.sqrt(2 * np.pi) * sigtilda)
        exponent = -((np.log(t) - mutilda) ** 2) / (2 * sigtilda**2)

        dn_dlnr = norm * np.exp(exponent)  # eq.5.8 [lohmann intro 2 clouds]

        result += dn_dlnr

    return result


def fun(x, t, y):
    y_is = double_ln_normal_distribution(t, x[0], x[1], x[2], x[3], x[4], x[5])
    return y_is - y


x0 = np.array([3e-6, 2, 1e10, 100e-6, 2, 1e6])
bounds = Bounds(
    lb=[1e-10, 1.1, 1e7, 50e-6, 1.1, 1e0],
    ub=[50e-6, 3, 1e13, 5e-3, 3, 1e13],
    # keep_feasible = [True, True, True, False, True, True]
)
# res_lsq = least_squares(
#     fun,
#     x0,
#     bounds= bounds,
#     args=(t_train, y_train)
# )
res_soft_l1 = least_squares(
    fun,
    x0,
    loss="soft_l1",
    f_scale=0.5,
    bounds=bounds,
    verbose=2,
    args=(t_train.values.flatten(), y_train.values.flatten()),
)
# res_log = least_squares(
#     fun,
#     x0,
#     loss='cauchy',
#     f_scale=0.5,
#     bounds= bounds,
#     args=(t_train, y_train)
#     )
t_test = np.logspace(-6, -2.5, 1000)


y_lsq = gen_data(t_test, *res_lsq.x)
y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
y_log = gen_data(t_test, *res_log.x)

plt.scatter(t_train, y_train, marker="o", color="grey")
# plt.plot(t_test, y_lsq, label='linear loss')
plt.plot(t_test, y_soft_l1, label="soft_l1 loss")
# plt.plot(t_test, y_log, label='cauchy loss')
plt.xlabel("t")
plt.ylabel("y")
plt.legend()
plt.xscale("log")
plt.yscale("symlog")
plt.ylim(0, 1e11)
print(res_soft_l1.x)

   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         3.0502e+12                                    3.71e+14    
       1              2         2.5881e+12      4.62e+11       3.14e+11       1.81e-57    
`gtol` termination condition is satisfied.
Function evaluations 2, initial cost 3.0502e+12, final cost 2.5881e+12, first-order optimality 1.81e-57.
[1.68558226e-08 1.10502562e+00 9.61009977e+09 4.96913450e-03
 1.10450000e+00 3.14378431e+11]
