In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

from sdm_eurec4a.visulization import (
    set_custom_rcParams,
    adjust_lightness_array,
    ncols_nrows_from_N,
    handler_map_alpha,
)


from sdm_eurec4a import RepositoryPath

In [None]:
def adjust_spines(ax, visible_spines, position=("outward", 5)):
    ax.label_outer(remove_inner_ticks=False)

    for loc, spine in ax.spines.items():
        if loc in visible_spines:
            spine.set_position(position)  # outward by 10 points
        else:
            spine.set_visible(False)

In [None]:
plt.style.use("default")
default_colors = set_custom_rcParams()
darker_colors = adjust_lightness_array(default_colors, 0.75)

REPOSITORY_ROOT = RepositoryPath("levante").get_repo_dir()
script_dir = REPOSITORY_ROOT / Path("scripts/others/compare_cluster_and_clouds")
fig_path = REPOSITORY_ROOT / "results" / script_dir.relative_to(REPOSITORY_ROOT) / "cluster-properties"
fig_path.mkdir(parents=True, exist_ok=True)

# fig_path = REPOSITORY_ROOT / Path("results/compare_cluster_and_clouds/shallow_only")
# fig_path.mkdir(parents=True, exist_ok=True)

In [None]:
mask_name = "rain_mask"
min_holes = 5
clusters = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(
        f"data/observation/cloud_composite/processed/identified_clouds/identified_clusters_{mask_name}_{min_holes}.nc"
    )
)
clouds = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(
        f"data/observation/cloud_composite/processed/identified_clouds/identified_clouds_{mask_name}.nc"
    )
)

# # select only clouds which are between 800 and 1100 m
# clouds = clouds.where((clouds.alt <= 1500), drop=True)
# clusters = clusters.where((clusters.alt <= 1500), drop=True)

distance_clouds = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(f"data/observation/combined/distance/distance_dropsondes_identified_clouds_rain_mask.nc")
)

distance_clusters = xr.open_dataset(
    REPOSITORY_ROOT
    / Path(f"data/observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc")
)

cloud_composite = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/cloud_composite/processed/cloud_composite_si_units.nc"),
    chunks={"time": 1000},
)


drop_sondes = xr.open_dataset(
    REPOSITORY_ROOT / Path("data/observation/dropsonde/processed/drop_sondes.nc")
)

In [None]:
fig, ax = plt.subplots(1, 1)
ax.scatter(
    clouds["time"],
    clouds["alt"],
    clouds["liquid_water_content"],
    marker="o",
    linestyle="None",
    color=default_colors[0],
    alpha=0.75,
    label="Clouds",
)
ax.scatter(
    clusters["time"],
    clusters["alt"],
    clusters["liquid_water_content"],
    marker="o",
    linestyle="None",
    color=default_colors[1],
    alpha=0.75,
    label="Clusters",
)

ax.set_xlabel("Time")
ax.set_ylabel("Altitude in m")
plt.setp(ax.xaxis.get_majorticklabels(), rotation=-45, ha="left");
# # fig.savefig(fig_path / Path("cloud_properties.png"))

Plot the altitude of clouds and cloud clusters in the ATR dataset

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(9, 4.5))
# bins = np.arange(0,300,50)
ax.hist(clouds["alt"], bins=100, orientation="horizontal", alpha=0.7)  # , bins)
ax.hist(clusters["alt"], bins=100, orientation="horizontal", alpha=0.7)  # bins)

ax.set_ylabel("Altitude [m]")
ax.set_xlabel("Frequency")

fig.suptitle(f"Altitude distribution of identified clouds and clusters based on {mask_name}")

fig.savefig(fig_path / Path(f"altitude_distribution_{mask_name}.png"), dpi=300)

Create bar plot of 
- duration 
- horizontal extent
- vertical extent

In [None]:
from matplotlib.legend_handler import HandlerLine2D, HandlerTuple

In [None]:
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=np.array([8, 4.5]) * 1.5)

from matplotlib import gridspec

axs_top = axs[0]
axs_mid = axs[1]
axs_bot = axs[2]

# At the

variables = ["duration", "horizontal_extent", "vertical_extent"]
axs_vari = {"duration": axs_top, "horizontal_extent": axs_mid, "vertical_extent": axs_bot}
small_bins_by_variables = {
    "duration": np.arange(0, 71, 1),
    "horizontal_extent": np.arange(0, 7001, 100),
    "vertical_extent": np.arange(0, 41, 1),
}
bins_by_variables = {
    "duration": np.arange(0, 351, 5),
    "horizontal_extent": np.arange(0, 35001, 1000),
    "vertical_extent": np.arange(0, 1001, 20),
}
xlabels = {
    "duration": "ATR fligth duration [s]",
    "horizontal_extent": "Horizontal extent [m]",
    "vertical_extent": "Vertical extent [m]",
}

ylabels = {
    "duration": "Frequency",
    "horizontal_extent": "Frequency",
    "vertical_extent": "Frequency",
}

accuracy = {
    "duration": 1,
    "horizontal_extent": 0,
    "vertical_extent": 1,
}

axs_top[0].set_title("Full dataset")
axs_top[1].set_title("Zoomed in")


for var in variables:
    if isinstance(clouds[var].dtype, np.dtypes.TimeDelta64DType):
        cloud_data = clouds[var].dt.seconds
        cluster_data = clusters[var].dt.seconds
    else:
        cloud_data = clouds[var]
        cluster_data = clusters[var]

    # plot distribution of clouds and clusters
    count_clouds = axs_vari[var][0].hist(
        cloud_data, bins=bins_by_variables[var], alpha=0.5, color=default_colors[0], label="Clouds"
    )[0]
    count_clusters = axs_vari[var][0].hist(
        cluster_data, bins=bins_by_variables[var], alpha=0.5, color=default_colors[1], label="Clusters"
    )[0]

    axs_vari[var][0].fill_betweenx(
        [0, np.max([count_clouds, count_clusters])],
        np.min(small_bins_by_variables[var]),
        np.max(small_bins_by_variables[var]),
        color="None",
        edgecolor="k",
        alpha=0.5,
        linewidth=2,
        linestyle=":",
        label=r"Zoomed in $\rightarrow$",
    )

    axs_vari[var][1].hist(
        cloud_data,
        bins=small_bins_by_variables[var],
        alpha=0.5,
        color=default_colors[0],  # , label="Clouds"
    )
    axs_vari[var][1].hist(
        cluster_data,
        bins=small_bins_by_variables[var],
        alpha=0.5,
        color=default_colors[1],  # , label="Clusters"
    )

    quantile_lines = []
    quantile_labels = []
    handles, labels = axs_vari[var][1].get_legend_handles_labels()
    for data, name in zip(
        [cloud_data, cluster_data],
        ["Clouds", "Clusters"],
    ):
        q5, q50, q95 = data.quantile([0.05, 0.5, 0.95])

        if name == "Clouds":
            color = darker_colors[0]
        else:
            color = darker_colors[1]

        axs_vari[var][1].axvline(q5, color=color, linestyle="--", alpha=0.85)
        axs_vari[var][1].axvline(q50, color=color, linestyle="-", alpha=0.85)
        axs_vari[var][1].axvline(q95, color=color, linestyle="--", alpha=0.85)

        line = plt.Line2D(
            [0], [0], color=color, linewidth=2, linestyle=(0, (5, 1, 1, 1, 1, 1, 1, 1, 1, 1))
        )
        label = f"{name}: 5, 50, 95%:\n{q5:.1f},  {q50:.1f},  {q95:.1f}"
        quantile_lines.append(line)
        quantile_labels.append(label)

    handles.extend(quantile_lines)
    labels.extend(quantile_labels)

    for idx, ax in enumerate(axs_vari[var]):
        if idx == 1:
            ax.legend(
                handles=handles,
                labels=labels,
                loc="upper right",
                handler_map={tuple: HandlerTuple(ndivide=None)},
            )
        else:
            ax.legend(loc="upper right")

        ax.set_xlabel(xlabels[var])
        ax.set_ylabel(ylabels[var])
        ax.set_yscale("log")
fig.suptitle(
    f"Distriubtion of extent of clouds and cloud clusters. Identification mask: '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored. Fligth speed of ATR ca. 100 m/s."
)
fig.tight_layout()
fig.savefig(fig_path / Path(f"extent_distribution_{mask_name}.png"), dpi=300)

In [None]:
research_days = np.unique(clouds["time"].dt.dayofyear)

# do the plotting
fig, axs = plt.subplots(
    sharex=True, sharey=True, **ncols_nrows_from_N(len(research_days)), figsize=(16, 9)
)
axs_flat = axs.flatten()


bins = np.arange(0, 30000, 1000)
for ax, day in zip(axs_flat, research_days):
    t = clouds["time"].where(clouds["time"].dt.dayofyear == day, drop=True)
    ds_clouds = clouds.sel(time=t)
    ax.hist(
        ds_clouds["horizontal_extent"], bins=bins, alpha=0.5, color=default_colors[0], label="Clouds"
    )

    t = clusters["time"].where(clusters["time"].dt.dayofyear == day, drop=True)
    ds_clusters = clusters.sel(time=t)
    ax.hist(
        ds_clusters["horizontal_extent"], bins=bins, alpha=0.5, color=default_colors[1], label="Clusters"
    )

    ax.set_title(ds_clouds.time[0].dt.strftime("%Y-%m-%d").data)
    ax.legend()
    ax.set_yscale("log")

for ax in axs[-1, :]:
    ax.set_xlabel("Horizontal extent [m]")
for ax in axs[:, 0]:
    ax.set_ylabel("Frequency")

# tidy up the figure
fig.suptitle(
    f"Horizontal extent of clouds. Clouds and clusters per day. Based on '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored."
)

fig.tight_layout()

fig.savefig(fig_path / f"horizontal_extent_{mask_name}_per_day_full_range.png", dpi=300)

In [None]:
research_days = np.unique(clouds["time"].dt.dayofyear)

# do the plotting
fig, axs = plt.subplots(
    sharex=True, sharey=True, **ncols_nrows_from_N(len(research_days)), figsize=(16, 9)
)
axs_flat = axs.flatten()


bins = np.arange(0, 1000, 50)
for ax, day in zip(axs_flat, research_days):
    t = clouds["time"].where(clouds["time"].dt.dayofyear == day, drop=True)
    ds_clouds = clouds.sel(time=t)
    ax.hist(
        ds_clouds["horizontal_extent"], bins=bins, alpha=0.5, color=default_colors[0], label="Clouds"
    )

    t = clusters["time"].where(clusters["time"].dt.dayofyear == day, drop=True)
    ds_clusters = clusters.sel(time=t)
    ax.hist(
        ds_clusters["horizontal_extent"], bins=bins, alpha=0.5, color=default_colors[1], label="Clusters"
    )

    ax.set_title(ds_clouds.time[0].dt.strftime("%Y-%m-%d").data)
    ax.legend()

for ax in axs[-1, :]:
    ax.set_xlabel("Horizontal extent [m]")
for ax in axs[:, 0]:
    ax.set_ylabel("Frequency")

# tidy up the figure
fig.suptitle(
    f"Horizontal extent of clouds of less then 1000 m. Clouds and clusters per day. Based on '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored."
)

fig.tight_layout()

fig.savefig(fig_path / f"horizontal_extent_{mask_name}_per_day_less_1000m.png", dpi=300)

## Identify number of clouds per cluster

In [None]:
result = np.empty_like(clusters["time"], dtype=np.float64)
for idx, t in enumerate(clusters["time"]):
    dt = 0.5 * clusters["duration"].sel(time=t)
    id = clusters["cloud_id"].sel(time=t).data
    time_slice = slice(t - dt, t + dt)

    result[idx] = (
        clouds["cloud_id"]
        .where((clouds["time"] >= time_slice.start) & (clouds["time"] <= time_slice.stop), drop=True)
        .count()
        .data
    )

In [None]:
clusters["clouds_in_cluster"] = xr.DataArray(result, dims="time")

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)

var = "clouds_in_cluster"
axs_vari = {"duration": ax}
bins_by_variables = {
    "clouds_in_cluster": np.arange(0.5, 33, 1),
}
xlabels = {
    "clouds_in_cluster": "Clouds part of a cloud cluster",
}

ylabels = {
    "clouds_in_cluster": "Frequency",
}

cluster_data = clusters["clouds_in_cluster"]

# plot distribution of clouds and clusters
count_clusters = ax.hist(
    cluster_data, bins=bins_by_variables[var], alpha=0.5, color=default_colors[1], align="mid"
)[0]


quantile_lines = []
quantile_labels = []
handles, labels = ax.get_legend_handles_labels()
q5, q50, q95 = cluster_data.quantile([0.05, 0.5, 0.95])

color = darker_colors[1]

ax.axvline(q5, color=color, linestyle="--", alpha=1)
ax.axvline(q50, color=color, linestyle="-", alpha=1)
ax.axvline(q95, color=color, linestyle="--", alpha=1)

line = plt.Line2D([0], [0], color=color, linewidth=2, linestyle=(0, (5, 1, 1, 1, 1, 1, 1, 1, 1, 1)))
label = f"{name}: 5, 50, 95%:\n{q5:.1f},  {q50:.1f},  {q95:.1f}"

handles.append(line)
labels.append(label)


ax.legend(
    handles=handles, labels=labels, loc="upper right", handler_map={tuple: HandlerTuple(ndivide=None)}
)


ax.set_xlabel(xlabels[var])
ax.set_ylabel(ylabels[var])
ax.set_yscale("log")
fig.suptitle(
    f"Distriubtion of number of clouds per cloud clusters.\nIdentification mask: '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored."
)
fig.tight_layout()
fig.savefig(fig_path / Path(f"clouds_per_cluster_{mask_name}.png"), dpi=300)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.scatter(
    clusters["clouds_in_cluster"], clusters["liquid_water_content"], alpha=0.5, color="grey", marker="."
)

for n in np.arange(1, np.max(clusters["clouds_in_cluster"])):
    data = clusters.where(clusters["clouds_in_cluster"] == n, drop=True)
    if data["time"].count() == 0:
        pass
    else:
        ax.violinplot(
            data["liquid_water_content"],
            positions=[n],
            showmeans=False,
            showmedians=True,
        )

ax.set_xlabel("Clouds per cluster")
ax.set_ylabel("Liquid water content [g/m^3]")

fig.suptitle(
    f"Clouds per cluster vs liquid water content.\nIdentification mask: '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored."
)
fig.tight_layout()
plt.savefig(fig_path / Path(f"clouds_per_cluster_vs_liquid_water_content_{mask_name}.png"), dpi=300)

In [None]:
fig, ax = plt.subplots(1, 1)
ax.scatter(
    clouds["horizontal_extent"], clouds["liquid_water_content"], alpha=0.5, marker=".", label="Clouds"
)
ax.scatter(
    clusters["horizontal_extent"],
    clusters["liquid_water_content"],
    alpha=0.5,
    marker=".",
    label="Clusters",
)
ax.set_xscale("log")
ax.set_xlabel("Horizontal extent [m]")
ax.set_ylabel("Liquid water content [g/m^3]")
ax.legend(loc="upper left")

fig.suptitle(
    f"Horizontal extent vs liquid water content.\nIdentification mask: '{mask_name}'\nFor clusters: Holes of {min_holes} seconds are ignored."
)
fig.tight_layout()
plt.savefig(fig_path / Path(f"horizontal_extent_vs_liquid_water_content_{mask_name}.png"), dpi=300)