In [None]:
import os

import cmasher as cmr
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from mpl_toolkits.basemap import Basemap

In [None]:
PATH_DATA = os.path.join('..', 'data')
PATH_PROCESSED = os.path.join(PATH_DATA, "processed_data")

FOLDERS = [folder for folder in os.listdir(PATH_PROCESSED) if folder.startswith('dataparquet')]
FOLDERS.sort(reverse=True)

DIR_DATABUNDLE = FOLDERS[0]

PATH_OUTPUT = os.path.join(PATH_DATA, "hail_eda_plots", DIR_DATABUNDLE)

In [None]:
target_col = "data_mehs_orig"

In [None]:
os.makedirs(PATH_OUTPUT, exist_ok=True)

In [None]:
df_full = pd.read_parquet(os.path.join(PATH_PROCESSED, DIR_DATABUNDLE))

df_full["lon"] /= 1000000
df_full["lat"] /= 1000000

if "mehs" in target_col:
    df_full[target_col] /= 10  # convert to cm

In [None]:
cmap = cmr.get_sub_cmap('plasma', 0.05, 0.9)
cmap.set_extremes(over=plt.colormaps.get_cmap('plasma')(1.0))

# Number of hail days

In [None]:
vmin = 1
  
print(f"Computing plots for column {target_col}", flush=True)
    
## Prepare dataframe
df_speccolumn = df_full[["lon", "lat", target_col]].copy()

df_speccolumn.loc[:, "sees_hail"] = df_speccolumn[target_col] > 0

df_agg = df_speccolumn.groupby(['lon', 'lat']).agg({"sees_hail" : ['sum']}).reset_index()
df_agg.columns = [c[0] for c in list(df_agg.columns)[:2]] + ['hail_events']

filename = f"hailriskat_{target_col}_nr_observations"

df_agg.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"), index=False)

vmax = df_agg['hail_events'].max()

## Plot
plt.rcParams.update({'font.size': 18})
ax = plt.figure(figsize=(15, 10))

m = Basemap(projection='lcc', resolution='f', lat_0=47.7, lon_0=13.3, width=6.0E5, height=3.35E5)
m.drawmapboundary()
m.drawcountries(linewidth=2)

m.scatter(df_agg['lon'], df_agg['lat'], c=df_agg['hail_events'], cmap=cmap, s=2, latlon=True, vmin=vmin, vmax=vmax)

plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.png"), bbox_inches="tight")
plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.pdf"), bbox_inches="tight")

plt.close()

plt.rcParams.update({'font.size': 30})
fig = plt.figure(figsize=(3, 8))
ax1 = fig.add_axes([0.05, 0.80, 0.2, 0.9])

cb1 = mpl.colorbar.ColorbarBase(
                                    ax1,
                                    cmap=cmap,
                                    norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax),
                                    orientation='vertical'
)

cb1.set_label('hail days')

plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.png"), bbox_inches="tight")
plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.pdf"), bbox_inches="tight")

plt.close()

print("Fin.", flush=True)

# Hail frequency maps

In [None]:
hailsizes_to_plot = [0, 1, 2, 3, 4, 5]
vmin = 0
vmax = 2

os.makedirs(PATH_OUTPUT, exist_ok=True)

for hailsize in hailsizes_to_plot:
    print(f"Computing plots for hailsize {hailsize}", flush=True)
        
    ## Prepare dataframe
    df_speccolumn = df_full[["lon", "lat", target_col]].copy()

    df_speccolumn.loc[:, "hail_larger_threshold"] = (df_speccolumn[target_col] >= hailsize) if hailsize > 0 else (df_speccolumn[target_col] > hailsize)

    df_agg = df_speccolumn.groupby(['lon', 'lat']).agg({"hail_larger_threshold" : ['sum']}).reset_index()
    df_agg.columns = [c[0] for c in list(df_agg.columns)[:2]] + ['hail_events_larger_threshold']
    df_agg = df_agg[df_agg['hail_events_larger_threshold'] > 0]
    
    filename = f"hailriskat_{hailsize}_{target_col}"
    
    df_agg.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"), index=False)

    ## Plot
    plt.rcParams.update({'font.size': 35})

    fig = plt.figure(figsize = (15, 10))

    m = Basemap(projection='lcc', resolution='f', lat_0=47.7, lon_0=13.3, width=6.0E5, height=3.35E5)
    m.drawmapboundary()
    m.drawcountries(linewidth=2)

    m.scatter(df_agg['lon'], df_agg['lat'], c=(df_agg['hail_events_larger_threshold'] / 14.0), cmap=cmap, s=2, latlon=True, vmin=vmin, vmax=vmax)

    plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.png"), bbox_inches="tight")
    plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.pdf"), bbox_inches="tight")

    plt.close()

    plt.rcParams.update({'font.size': 8})
    fig = plt.figure(figsize=(8, 3))
    ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.05])

    cb1 = mpl.colorbar.ColorbarBase(
                                        ax1,
                                        cmap=cmap,
                                        extend='max',
                                        norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax),
                                        orientation='horizontal'
    )
    
    cb1.set_label('Frequency per year')
    
    plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.png"), bbox_inches="tight")
    plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.pdf"), bbox_inches="tight")

    plt.close()

print("Fin.", flush=True)

# Maximum hailstone size in observation period

In [None]:
vmin = 0
vmax = 5
       
print(f"Computing maximum estimated hailstone size plot", flush=True)
    
## Prepare dataframe
df_speccolumn = df_full[["lon", "lat", target_col]].copy()

df_agg = df_speccolumn.groupby(['lon', 'lat']).agg({target_col : ['max']}).reset_index()
df_agg.columns = [c[0] for c in list(df_agg.columns)[:2]] + ['hail_max_observed']

filename = f"hailriskat_max_{target_col}"
    
df_agg.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"), index=False)

## Plot
plt.rcParams.update({'font.size': 18})
ax = plt.figure(figsize = (15, 10))

m = Basemap(projection='lcc', resolution='f', lat_0=47.7, lon_0=13.3, width=6.0E5, height=3.35E5)
m.drawmapboundary()
m.drawcountries(linewidth=2)

m.scatter(df_agg['lon'], df_agg['lat'], c=df_agg['hail_max_observed'], cmap=cmap, s=2, latlon=True, vmin=vmin, vmax=vmax)

plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.png"), bbox_inches="tight")
plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.pdf"), bbox_inches="tight")

plt.close()

plt.rcParams.update({'font.size': 30})
fig = plt.figure(figsize=(3, 8))
ax1 = fig.add_axes([0.05, 0.80, 0.2, 0.9])

cb1 = mpl.colorbar.ColorbarBase(
                                    ax1,
                                    cmap=cmap,
                                    extend='max',
                                    norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax),
                                    orientation='vertical'
)

cb1.set_label('hailstone size [cm]')
    
plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.png"), bbox_inches="tight")
plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.pdf"), bbox_inches="tight")

plt.close()

print("Fin.", flush=True)

# Computing sampled return level plots (bootstrapping)

In [None]:
plt.rcParams.update({'font.size': 18})

n_iterations = 10
years = np.arange(2009, 2023)
n_years_samples = [10]

for n_years_sample in n_years_samples:       
    print(f"Computing sampled return levels for {n_years_sample} years", flush=True)

    ## Prepare dataframe
    df_speccolumn = df_full[["lon", "lat", target_col]].copy()
    df_speccolumn["year"] = df_full["date"].dt.year

    df_agg = df_speccolumn.groupby(['lon', 'lat', 'year']).agg({target_col : ['max']}).reset_index()
    df_agg.columns = [c[0] for c in list(df_agg.columns)[:3]] + ['hail_max_observed']

    bootstrap_results = []

    for _ in range(n_iterations):
        sampled_years = np.random.choice(years, n_years_sample, replace=False)
        df_sampled = df_agg[df_agg['year'].isin(sampled_years)]
        df_grouped = df_sampled.groupby(['lon', 'lat']).agg({'hail_max_observed': 'max'}).reset_index()
        bootstrap_results.append(df_grouped)

    print(f"Summarizing results", flush=True)
    df_bootstrap = pd.concat(bootstrap_results)
    df_bootstrap_mean = df_bootstrap.groupby(['lon', 'lat']).agg({'hail_max_observed': ['median', 'std']}).reset_index()
    df_bootstrap_mean.columns = [c[0] for c in list(df_bootstrap_mean.columns)[:2]] + [c[1] for c in list(df_bootstrap_mean.columns)[2:]]

    print(f"Computing statistics", flush=True)
    df_bootstrap_mean["2std"] = 2 * df_bootstrap_mean['std']
    df_bootstrap_mean["median_1lc"] = np.maximum(df_bootstrap_mean['median'] - df_bootstrap_mean['std'], 0)
    df_bootstrap_mean["median_1uc"] = df_bootstrap_mean['median'] + df_bootstrap_mean['std']
    df_bootstrap_mean["median_2lc"] = np.maximum(df_bootstrap_mean['median'] - df_bootstrap_mean['2std'], 0)
    df_bootstrap_mean["median_2uc"] = df_bootstrap_mean['median'] + df_bootstrap_mean['2std']

    print(f"Plotting", flush=True)
    for column in df_bootstrap_mean.columns:
        if column in ['lon', 'lat', 'std']:
            continue

        if "std" in column:
            vmin = 0
            vmax = df_bootstrap_mean[column].max()

            if column == "2std":
                plttitle = f"Two standard deviation of sampled {n_years_sample} year return level"
            elif column == "std":
                plttitle = f"Standard deviation of sampled {n_years_sample} year return level"
            else:
                raise ValueError(f"Unknown column {column}")
        else:
            vmin = 1
            vmax = 5
            plttitle = f"Sampled {n_years_sample} year return level"

        filename = f"hailriskat_sampled_return_level_{n_years_sample}_{target_col}_{column}"

        df_bootstrap_mean.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"), index=False)

        ## Plot
        plt.rcParams.update({'font.size': 18})
        fig = plt.figure(figsize = (15, 10))

        m = Basemap(projection='lcc', resolution='f', lat_0=47.7, lon_0=13.3, width=6.0E5, height=3.35E5)
        m.drawmapboundary()
        m.drawcountries(linewidth=2)

        m.scatter(df_bootstrap_mean['lon'], df_bootstrap_mean['lat'], c=df_bootstrap_mean[column], cmap=cmap, s=2, latlon=True, vmin=vmin, vmax=vmax)

        plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.png"), bbox_inches="tight")
        plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.pdf"), bbox_inches="tight")

        plt.close()

        plt.rcParams.update({'font.size': 24})
        fig = plt.figure(figsize=(3, 8))
        ax1 = fig.add_axes([0.05, 0.80, 0.2, 0.9])

        cb1 = mpl.colorbar.ColorbarBase(
                                            ax1,
                                            cmap=cmap,
                                            extend='max',
                                            norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax),
                                            orientation='vertical'
        )
        
        if not 'std' in column:
            cb1.set_label('hailstone size [cm]')
            
        plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.png"), bbox_inches="tight")
        plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}_cmap.pdf"), bbox_inches="tight")

        plt.close()

print("Fin.", flush=True)