In [None]:
import numpy as np
import xarray as xr
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sdm_eurec4a import RepositoryPath
from sdm_eurec4a.conversions import relative_humidity_partial_density, relative_humidity_dewpoint
from sdm_eurec4a.visulization import set_custom_rcParams, label_from_attrs
from sdm_eurec4a.reductions import mean_and_stderror_of_mean
from sdm_eurec4a.identifications import match_clouds_and_dropsondes, match_clouds_and_cloudcomposite

default_colors = set_custom_rcParams()
repo_dir = RepositoryPath("levante").get_repo_dir()

In [None]:
drop_sondes = xr.open_dataset(repo_dir / "data/observation/dropsonde/processed/drop_sondes.nc")
distance = xr.open_dataset(
    repo_dir
    / "data/observation/combined/distance/distance_dropsondes_identified_clusters_rain_mask_5.nc"
)
safire = xr.open_dataset(repo_dir / "data/observation/safire_core/processed/safire_core.nc")
cloud_composite = xr.open_dataset(
    repo_dir / "data/observation/cloud_composite/processed/cloud_composite_SI_units_20241025.nc"
)
identified_clusters = xr.open_dataset(
    repo_dir
    / "data/observation/cloud_composite/processed/identified_clusters/identified_clusters_rain_mask_5.nc"
)


def select_subset(ds: xr.Dataset, altitude_name="altitude", drop=False):

    mask = (ds[altitude_name] <= 1200) & (ds[altitude_name] >= 500)
    return ds.where(mask, drop=drop)


safire = select_subset(safire, "altitude")
cloud_composite = select_subset(cloud_composite, "alt")
identified_clusters = select_subset(identified_clusters, "alt", drop=True)

safire["relative_humidity_1"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_1"],
    specific_gas_constant=461.5,
)
safire["relative_humidity_2"] = relative_humidity = relative_humidity_partial_density(
    temperature=safire["temperature"],
    partial_density=safire["absolute_humidity_2"],
    specific_gas_constant=461.5,
)

# Use the SAFIRE-CORE relative humidity values

In [None]:
shared_times = np.intersect1d(safire["time"], cloud_composite["time"])
safire_shared = safire.sel(time=shared_times)
cloud_composite_shared = cloud_composite.sel(time=shared_times)

### Liquid water content

It seems that the liquid water content for the SAFIRE-CORE is not good. There are negative values!

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    x=safire_shared["liquid_water_content"],
    y=cloud_composite_shared["liquid_water_content"],
    marker=".",
    alpha=0.2,
)
ax.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax.set_ylabel("COMPOSITE " + label_from_attrs(safire_shared["liquid_water_content"]))

Text(0, 0.5, 'COMPOSITE Liquid Water Content $\\left[  g m^{-3}  \\right]$')

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(1, 11, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[0:9])
cax = fig.add_subplot(gs[-1])

range = [[-0.05, 3], [-0.05, 3]]

_, _, _, mappable = ax_cc.hist2d(
    safire_shared["liquid_water_content"],
    cloud_composite_shared["liquid_water_content"],
    range=range,
    bins=100,
    # marker=".",
    # alpha=0.2,
    norm=mcolors.LogNorm(vmax=100, vmin=1),
    cmap="plasma",
)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

# ax.set_yscale('log')
ax_cc.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax_cc.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

corr = xr.corr(safire_shared["liquid_water_content"], cloud_composite_shared["liquid_water_content"])

fig.suptitle(f"Liquid Water Content comparison with R = {corr.data:.2f}", fontsize="medium")
# fig.tight_layout()

Text(0.5, 0.98, 'Liquid Water Content comparison with R = 0.31')

It can be seen, that the application of ta rolling mean does not help the problem

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(1, 11, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[0:9])
cax = fig.add_subplot(gs[-1])

range = [[-0.05, 3], [-0.05, 3]]

_, _, _, mappable = ax_cc.hist2d(
    safire_shared["liquid_water_content"].rolling(time=10).mean(skipna=True),
    cloud_composite_shared["liquid_water_content"].rolling(time=10).mean(skipna=True),
    range=range,
    bins=100,
    # marker=".",
    # alpha=0.2,
    norm=mcolors.LogNorm(vmax=100, vmin=1),
    cmap="plasma",
)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

# ax.set_yscale('log')
ax_cc.set_xlabel("SAFIRE-CORE " + label_from_attrs(safire_shared["liquid_water_content"]))
ax_cc.set_ylabel("COMPOSITE " + label_from_attrs(cloud_composite_shared["liquid_water_content"]))

corr = xr.corr(safire_shared["liquid_water_content"], cloud_composite_shared["liquid_water_content"])

fig.suptitle(f"Liquid Water Content comparison with R = {corr.data:.2f}", fontsize="medium")
# fig.tight_layout()

Text(0.5, 0.98, 'Liquid Water Content comparison with R = 0.31')

In [None]:
lags = np.arange(-20, 21, 1)
corr = []
corr_cc = []
corr_ds = []


ds1 = safire_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)
ds2 = cloud_composite_shared["liquid_water_content"].rolling(time=10).mean(skipna=True)

for lag in lags:
    corr.append(
        xr.corr(
            ds1.shift(time=lag),
            ds2,
        )
    )
    corr_cc.append(
        xr.corr(
            ds2.shift(time=lag),
            ds2,
        )
    )
    corr_ds.append(
        xr.corr(
            ds1.shift(time=lag),
            ds1,
        )
    )

plt.scatter(lags, corr)
plt.scatter(lags, corr_cc, marker="x")
plt.scatter(lags, corr_ds, marker="+")

<matplotlib.collections.PathCollection at 0x7ffebc132d80>

#### Relation of the Relative humidity to the LWCs

In [None]:
mask = safire_shared["relative_humidity_1"] > 100

ds = safire_shared  # .where(mask)
cc = cloud_composite_shared  # .sel(time = ds.time)

above_102 = (ds["relative_humidity_1"] > 102).sum()
above_100 = (ds["relative_humidity_1"] > 100).sum()

fraction_100 = 100 * above_100 / np.isfinite(ds["relative_humidity_1"]).sum()
fraction_102 = 100 * above_102 / np.isfinite(ds["relative_humidity_1"]).sum()

# fig, axs = plt.subplots(ncols = 2, figsize = (9, 6))

fig = plt.figure()
gs = fig.add_gridspec(11, 21, hspace=1, wspace=1)

ax_cc = fig.add_subplot(gs[1:, 0:9])
ax_ds = fig.add_subplot(gs[1:, 10:19], sharey=ax_cc, sharex=ax_cc)
cax = fig.add_subplot(gs[1:, -1])

style = dict(
    range=[[10, 130], [-0.55, 2.5]],
    bins=[30, 50],
    norm=mcolors.LogNorm(vmax=1e3, vmin=1),
    cmap="plasma",
)


ax_cc.hist2d(ds["relative_humidity_1"], cc["liquid_water_content"], **style)
_, _, _, mappable = ax_ds.hist2d(ds["relative_humidity_1"], ds["liquid_water_content"], **style)

fig.colorbar(mappable=mappable, cax=cax, label="Occurence")

for ax in [ax_cc, ax_ds]:
    ax.axvline(100, color="black", linestyle="--")
    ax.axhline(0.0, color="black", linestyle="--", alpha=0.5)


# ax.set_yscale('log')
ax_cc.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
ax_cc.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_ds.set_xlabel(label_from_attrs(ds["relative_humidity_1"]))
# ax_ds.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax_cc.set_title("Cloud Composite LWC")
ax_ds.set_title("SAFIRE-CORE LWC")

fig.suptitle(
    f"Relative Humidity vs Liquid Water Content\n{fraction_100.data:.2f}% of measurements has RH > 100%\n{fraction_102.data:.2f}% of measurements has RH > 102%",
    fontsize="medium",
)
# fig.tight_layout()

Text(0.5, 0.98, 'Relative Humidity vs Liquid Water Content\n3.50% of measurements has RH > 100%\n1.45% of measurements has RH > 102%')

### Single flight track

We can see very high relative humidity values, exceeding 110%, which should not be the case.
There can be multiple explanations for this:
1. The calculations we use are not good. Maybe we need to consider the temperature dependecy of $L_V$ more. But we already use the most accurate estimation by Murphy and Koop!
2. The temperature and absolute humidity values are not well related to each other. BUT the values exceed 100% for a consistent timeperiod  

In [None]:
single_flight_time_slice = slice("2020-02-13T08:45:00", "2020-02-13T11:00")
ds = safire.sel(time=single_flight_time_slice)
cc = cloud_composite.sel(time=single_flight_time_slice)
ic = identified_clusters.sel(time=single_flight_time_slice)

Liquid water content

In [None]:
plt.figure(figsize=(3, 3))
plt.plot(
    ds["longitude"],
    ds["latitude"],
    marker="None",
    label="Flight track",
)
plt.scatter(
    ic.lon,
    ic.lat,
    color="k",
    label="identified clusters",
)

plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Flight track and identified clusters")

Text(0.5, 1.0, 'Flight track and identified clusters')

In [None]:
fig, ax = plt.subplots()

ax.plot(
    ds["time"],
    ds["liquid_water_content"],
    label="SAFIRE-CORE",
)

ax.plot(
    cc["time"],
    cc["liquid_water_content"],
    label="Composite",
)

ax.set_xlabel("Time")
ax.set_ylabel(label_from_attrs(ds["liquid_water_content"]))
ax.legend()
# ax.set_yscale("log")
ax.set_ylim(3e-2, None)

(0.03, 0.5614138745723619)

In [None]:
fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(9, 6))

for ax in axs:
    i = 0
    for s, e in zip(ic["start"], ic["end"]):
        if i == 0:
            label = "Rain Clouds"
        else:
            label = None
        ax.axvspan(s.data, e.data, color="grey", alpha=0.5, label=label)
        i += 1
twinx_axs = [ax.twinx() for ax in axs]


ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

# SETUP SECOND PLOT

ds["relative_humidity_2"].plot(
    ax=axs[1],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_2"].where(ds["relative_humidity_2"] > 100).plot(
    ax=axs[1],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_2"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[1],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_2"] > 100).plot(
    ax=twinx_axs[1],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)


fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[1].set_ylabel(label_from_attrs(ah))
axs[1].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])
axs[1].set_title(ds["absolute_humidity_2"].attrs["comment"])

fig.tight_layout()

In [None]:
fig, axs = plt.subplots(nrows=1, sharex=True, figsize=(9, 4.5))
axs = [axs]
twinx_axs = [ax.twinx() for ax in axs]

ds["relative_humidity_1"].plot(
    ax=axs[0],
    linestyle="-",
    marker="None",
    label="RH",
    color=default_colors[0],
)
ds["relative_humidity_1"].where(ds["relative_humidity_1"] > 100).plot(
    ax=axs[0],
    linestyle="None",
    marker=".",
    label="RH > 100 %",
    color=default_colors[1],
)

ah = 1e3 * ds["absolute_humidity_1"]
ah.attrs.update(units="g/m^3", long_name="Absolute humidity")

ah.plot(
    ax=twinx_axs[0],
    linestyle="-",
    marker="None",
    label="AH",
    color=default_colors[2],
)
ah.where(ds["relative_humidity_1"] > 100).plot(
    ax=twinx_axs[0],
    linestyle="None",
    marker=".",
    label="AH for RH > 100 %",
    color=default_colors[3],
)

fig.suptitle(
    f"SAFIRE-CORE relative humidity and absolute humidity\nFlight number {ds.flight_number.mean().data}",
    fontsize="medium",
)
axs[0].set_ylabel(label_from_attrs(ds["relative_humidity_1"]))
axs[0].set_xlabel("Time")
axs[0].legend(loc="lower left")
twinx_axs[0].legend(loc="lower right")

axs[0].set_title(ds["absolute_humidity_1"].attrs["comment"])

fig.tight_layout()

In [None]:
# plot the trajectory of the ATR aircraft in 3D with colors indicating the relative humidity

longitude = ds["longitude"]
latitude = ds["latitude"]
altitude = ds["altitude"]
# color = ds['time'].where(mask)
color = ds["relative_humidity_1"].rolling(time=10).mean()


fig = plt.figure(figsize=(8, 6))
gs = fig.add_gridspec(10, 100)

ax = fig.add_subplot(gs[:, :], projection="3d")
cax = fig.add_subplot(gs[2:-2, -3:])

sc = ax.scatter(longitude, latitude, altitude, c=color, cmap="Blues", marker=".")
ax.scatter(
    longitude.where(color >= 100),
    latitude.where(color >= 100),
    altitude.where(color >= 100),
    color="red",
    marker="X",
    zorder=10,
)
fig.colorbar(cax=cax, mappable=sc, label=label_from_attrs(relative_humidity))

ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_zlabel("Altitude")

flight_Numbers = np.unique((ds["flight_number"])[~np.isnan((ds["flight_number"]))])
ax.set_title(f"ATR trajectory with relative humidity for flight NUM {flight_Numbers}")

# fig.tight_layout()

Text(0.5, 0.92, 'ATR trajectory with relative humidity for flight NUM [19.]')

# Optimize the linear fits of the thermodynamic profiles

identified_clusters

In [None]:
cloud_id = np.random.choice(identified_clusters["cloud_id"].values)


# time_slice = slice(ic.start.data, ic.end.data)
# extended_time_slice = slice(ic.start.data - np.timedelta64(20, "s"), ic.end.data + np.timedelta64(20, "s"))
# extended_time_slice = time_slice
# ic = identified_clusters.sel(time = single_flight_time_slice)
ic = identified_clusters.swap_dims({"time": "cloud_id"}).sel(cloud_id=cloud_id)

drop_sondes_selected = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(2, "h"),
    max_spatial_distance=0.5e2,
)
drop_sondes_selected_large = match_clouds_and_dropsondes(
    ds_clouds=ic,
    ds_sonde=drop_sondes,
    ds_distance=distance,
    max_temporal_distance=np.timedelta64(12, "h"),
    max_spatial_distance=1e2,
)

if drop_sondes_selected["time"].size == 0:
    raise ValueError("No dropsondes found for the selected cloud")

cloud_composite_selected = match_clouds_and_cloudcomposite(
    ds_clouds=ic,
    ds_cloudcomposite=cloud_composite,
)


# cloud_composite_selected = cloud_composite_selected.sel(time = time_slice)
safire_selected = safire.sel(time=cloud_composite_selected["time"])

plt.plot(
    1e2 * drop_sondes_selected["relative_humidity"].T,
    drop_sondes_selected["alt"],
    color="grey",
    alpha=0.5,
)

plt.plot(
    1e2 * drop_sondes_selected_large["relative_humidity"].T,
    drop_sondes_selected_large["alt"],
    color="grey",
    alpha=0.1,
)


plt.axhline(cloud_composite_selected["alt"].mean(), color="grey", linestyle="-", alpha=0.3, zorder=0)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_1"], dims=("time",))
m, sem = safire_selected["relative_humidity_1"].mean(skipna=True), safire_selected[
    "relative_humidity_1"
].std(skipna=True)

plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="red",
)

m, sem = mean_and_stderror_of_mean(safire_selected["relative_humidity_2"], dims=("time",))
m, sem = safire_selected["relative_humidity_2"].mean(skipna=True), safire_selected[
    "relative_humidity_2"
].std(skipna=True)
plt.errorbar(
    x=m,
    xerr=sem,
    y=safire_selected["altitude"].mean(),
    marker=".",
    alpha=0.3,
    color="blue",
)
plt.ylim(0, 1200)

(0.0, 1200.0)

In [None]:
drop_sondes_selected