In [None]:
import os
import random

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point

In [None]:
PATH_DATA = os.path.join('..', 'data')
PATH_DATA_SOURCE = os.path.join(PATH_DATA, 'processed_data')
PATH_SHAPEFILE = os.path.join(PATH_DATA, 'raw_data', 'geodata', 'AUT_adm0.shp')

RETURNLEVELS = [2, 3, 5, 10, 14]  # number of years to be sampled
NR_SAMPLES = 10 # number of samples per return level (we only have 14 years of data, so we will likely fish in the same pond for some returnlevel-nrsamples combinations)

In [None]:
FOLDERS = [d for d in os.listdir(PATH_DATA_SOURCE) if "dataparquet" in d]
FOLDERS.sort(reverse=True)

DIR_DATABUNDLE = FOLDERS[0]
PATH_DATABUNDLE = os.path.join(PATH_DATA_SOURCE, DIR_DATABUNDLE)

print(PATH_DATABUNDLE)

In [None]:
PATH_OUTPUT = os.path.join(PATH_DATA, "empirical_mesh_returnlevels", DIR_DATABUNDLE)

print(PATH_OUTPUT)

In [None]:
df = pd.read_parquet(PATH_DATABUNDLE)
df["lat"] /= 1000000
df["lon"] /= 1000000

df["year"] = df.date.dt.year

meshvars = [t for t in df.columns if ("data_mehs_" in t)]
targetvars = meshvars + ["data_mehs2poh", "data_poh_max"]
groupvars = ["lon", "lat"]

avail_years = list(df.year.unique())

for meshvar in meshvars:
    df[meshvar] /= 10  # convert to cm

In [None]:
austria = gpd.read_file(PATH_SHAPEFILE)    

In [None]:
os.makedirs(PATH_OUTPUT, exist_ok=True)

for mehs_column in targetvars:
    print(f"Computing plots for column {mehs_column}", flush=True)
        
    for years in RETURNLEVELS:
        print(f"Computing plots for {years} years return level", flush=True)
        dfs_max = []
        
        for count in range(NR_SAMPLES):
            print("-", flush=True, end="")
            filter_years = random.sample(avail_years, years)
            dfs_max.append(df[df["year"].isin(filter_years)].groupby(groupvars)[mehs_column].max())

        print("\n Preparing and saving plot", flush=True)
        
        df_max = pd.concat(dfs_max, axis=1).agg("mean", axis=1).reset_index()  # resulting column has no name, but lands at index 2
        df_max.columns = list(df_max.columns)[:2] + ["max_cal_hs"]
    
        for clip_to_austria in [False, True]:
            print(f"Clipping to Austria: {clip_to_austria}", flush=True)
            
            if clip_to_austria:
                geometry = [Point(xy) for xy in zip(df_max['lon'], df_max['lat'])]
                gdf = gpd.GeoDataFrame(df_max, geometry=geometry, crs="EPSG:4326")
                df_max = gpd.sjoin(gdf, austria, how="inner", predicate='within')[df_max.columns]
    
            infix_clipped = "_clipped_AUT" if clip_to_austria else ""
    
            filename = f"hailriskat_y{years}_s{NR_SAMPLES}_{mehs_column}{infix_clipped}"
    
            df_max_export = df_max.copy()
            df_max_export["max_cal_hs"] *= 10  # calibrated hailsizes in export should be in mm
            df_max_export["max_cal_hs"] = pd.to_numeric(df_max_export["max_cal_hs"].round(), downcast='integer')
            
            df_max_export.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"), index=False)
            
            fig = plt.figure(figsize = (15, 10))
            
            #initialize the Basemap
            m = Basemap(projection = 'lcc', resolution='f', lat_0=47.5, lon_0=13.3, width=0.6E6, height=3.7E5)
            m.drawmapboundary()
            m.drawcountries(linewidth=2)
            
            m.scatter(df_max['lon'], df_max['lat'], c=df_max['max_cal_hs'], cmap="jet", s=0.5, latlon=True, vmin=1, vmax=6)
            
            plt.colorbar(label=mehs_column, extend="max")
            
            plt.savefig(os.path.join(PATH_OUTPUT, f"{filename}.png"), bbox_inches="tight")
    
            plt.close()
            
            df_max["max_cal_hs"] *= 10  # We want to use mm in csv export
            df_max.to_csv(os.path.join(PATH_OUTPUT, f"{filename}.csv"))

print("FIN.")