In [None]:
# External packages

import intake
from easygems import healpix as egh

import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
import cartopy.crs as ccrs

import calendar

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
# Functions from our repo

import tools.utils as sc

In [None]:
# Time period
time = ('2020-04-01','2020-04-30')
# time = ('2020-08-01','2020-08-31')

# Region
map_domain = sc.domains10x10['namibian']

In [None]:
# Load dataset

hknode = 'EU'
sim_id = 'icon_d3hp003'
sim_opt = {'zoom':11, 'time':'PT6H', 'time_method':'inst'}

cat = intake.open_catalog("https://digital-earths-global-hackathon.github.io/catalog/catalog.yaml")[hknode]
ds = cat[sim_id](**sim_opt).to_dask().pipe(egh.attach_coords)

In [None]:
# Read EarthCare file

ecfile = './stats_earthcare_April2025.csv'

month = calendar.month_name[np.datetime64(time[0]).item().month]
if month=='April':
    earthcare = sc.read_earthcare_csv(ecfile) 
else:
    earthcare = None

In [None]:
# Select data for the time period and region
# (some little margin on each side is needed for a smooth remapping into lon-lat)

cells = egh.isel_extent(ds,map_domain+np.array([-1,1,-1,1])*0.1)
ds = ds.sel(time=slice(*time)).isel(cell=cells)

In [None]:
%%time

# Integrate qall

ds['lwp'] = sc.integrate_wrt_pressure(ds['qall']) \
    .assign_attrs(long_name='Column condensate',units='kg/m^2')

In [None]:
# Calculate basic stats

ds = sc.basic_stats(ds,'lwp')

In [None]:
# Remap to regular lon-lat

# Lon/lat ranges
dlon = map_domain[1]-map_domain[0]
dlat = map_domain[3]-map_domain[2]

# Estimate number of needed grid points
nlon = np.ceil(dlon/360 * 4*2**sim_opt['zoom'] *np.sqrt(2)).astype(int)
nlat = np.ceil(nlon * dlat/dlon).astype(int)

# Approximate grid cell size in km
res = dlat*111/nlat

# Supersampling to avoid aliasing following easygems
supersampling = {"lon": 4, "lat": 4}

idx = sc.get_nn_lon_lat_index(
    2**sim_opt['zoom'],
    np.linspace(map_domain[0], map_domain[1], supersampling["lon"] * nlon),
    np.linspace(map_domain[2], map_domain[3], supersampling["lat"] * nlat),
)

# Remap and assign into a new dataset
remap = xr.Dataset({
    'lwp': ds['lwp'].drop_vars(('lon','lat')).sel(cell=idx) \
                    .drop_vars(('crs','cell')).coarsen(supersampling).mean() 
    }).assign_attrs(resolution=res)

In [None]:
# Cloud mask based on threshold

remap['cloud_mask']  = (remap['lwp'] > 1e-1)\
    .assign_attrs(long_name='Cloud mask')
remap['cloud_cover'] = remap['cloud_mask'].mean(dim=('lat','lon')) \
    .assign_attrs(long_name='Cloud cover')

In [None]:
%%time

# Calculate LvL score

remap = sc.LvL2dataset(remap)

In [None]:
# Plot timeseries

plot_path = f"./figures/"

variables = ['lwp_mean','lwp_std','lwp_skw',
             'cloud_cover',['ks_cloud','ks_void']]
labels = ['LWP [kg/m2] mean', 'LWP [kg/m2] std', 'LWP skw',
          'Cloud cover','Deviation from randomness']

Npanel = len(variables)
fig, axs = plt.subplots(Npanel,1, figsize=(12,3*Npanel),
                        sharex=True, constrained_layout=True)

for ax, var, lab in zip(axs,variables,labels):
    if type(var)==str:
        var=[var]
        ll=['']
    else:
        ll=[v+' ' for v in var]
    for v,l in zip(var,ll):
        if v in ds:
            ax.plot(ds.time, ds[v], label=l+sim_id)
        elif v in remap:
            ax.plot(ds.time, remap[v], label=l+sim_id)
        if type(earthcare)==xr.core.dataset.Dataset and v in earthcare:
            ax.plot(earthcare.time, earthcare[v], label=l+'EarthCare',marker='o')
    ax.set_ylabel(lab)
    ax.legend()
    ax.grid()
    ax.autoscale(tight=True)
    
plt.savefig(plot_path+'lvl_timeseries_'+month,bbox_inches='tight',dpi=300)

In [None]:
# Plot scatter KS_cloud vs KS_void

plot_path = f"./figures/"

gr = remap.groupby('time.hour')

plt.figure()
for h in gr.groups.keys():
    plt.plot(gr[h]['ks_cloud'],gr[h]['ks_void'],'o',label=f"{h:02d}:00")
if type(earthcare)==xr.core.dataset.Dataset:
    plt.plot(earthcare['ks_cloud'],earthcare['ks_void'],'o',label='EarthCare',color='black')
plt.grid()
plt.legend()
plt.xlabel('Deviation from randomness: cloud chord lengths')
plt.ylabel('Deviation from randomness: void chord lengths')
plt.title(sim_id+' in '+month)
plt.savefig(plot_path+'lvl_scatter_'+month,bbox_inches='tight',dpi=300)

In [None]:
# Accumulate distributions at each hour and overall

lvl_hour = sc.LvL2groupby(remap.groupby('time.hour'))
lvl_all  = sc.LvL2groupby(remap).expand_dims({'hour':np.array([25])})

In [None]:
# Plot accumulated distributions

lvl = xr.merge((lvl_hour,lvl_all))

Ncol = 5
Nrow = (lvl.dims['hour']+1)//Ncol
fig, axs = plt.subplots(Nrow,Ncol, figsize=(4*Ncol,4*Nrow),
                        sharey=True, sharex=True, constrained_layout=True)

for ax, h in zip(axs.flat,lvl.hour):
    sc.plot_LvL_dist(ax,lvl.sel(hour=h))
    ax.set_ylim(1e-6,1)
    ax.set_xlim(lvl.attrs['resolution'],1e3)
    if h>24:
        ax.set_title(f"all "+ax.get_title())
    else:
        ax.set_title(f"{h:d}:00 "+ax.get_title())
axs.flat[0].set_ylabel('Probability')
axs.flat[0].legend()

plt.suptitle(sim_id+' in '+month)
plt.savefig(plot_path+'lvl_distributions_'+month,bbox_inches='tight',dpi=300)

In [None]:
# Plot example maps and distributions

plot_path = f"./figures/lvl/"

time_step = np.timedelta64(5,'D') 

for t in np.arange(remap.time[0].values,remap.time[-1].values,time_step):

    fig, axs = plt.subplots(1,3,figsize=(4*3,4),
                            subplot_kw={"projection": ccrs.PlateCarree()},
                            constrained_layout=True)

    # LWP in original healpix grid
    da = ds['lwp'].sel(time=t)
    ax = axs[0]
    sc.draw_map(ax,map_domain)
    im = egh.healpix_show(da, ax=ax, cmap='Blues_r',vmin=0,vmax=1)
    sc.annotate_map(ax,da,im)

    # Cloudmask in lon-lat
    da = remap['cloud_mask'].sel(time=t)
    ax = axs[1]
    sc.draw_map(ax,map_domain)
    im = ax.imshow(da, extent=ax.get_xlim() + ax.get_ylim(), origin="lower",
                   cmap='Blues_r',vmin=0,vmax=1)
    sc.annotate_map(ax,da,im)

    # Cloud/void chord length distribution
    axs[-1].remove()
    ax = fig.add_subplot(1,3,3)
    sc.plot_LvL_dist(ax,remap.sel(time=t))
    ax.set_ylim(1e-3,1)
    ax.set_xlim(remap.attrs['resolution'],1e3)

    plt.suptitle(sim_id)
    datestr = t.astype('datetime64[h]').item().strftime('%Y-%m-%dT%H')
    plt.savefig(plot_path+datestr,bbox_inches='tight',dpi=300)
    print(datestr)
    # plt.close(fig)