In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import seaborn as sns
import geopandas as gpd

import cartopy.crs as ccrs
import scipy.stats as stats

from matplotlib import patches
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.colors as mcolors
from matplotlib.cm import ScalarMappable

import shapely.geometry as sg
from shapely.geometry import Point

import sys
sys.path.append(os.path.abspath(".."))
from function import DOWN_raw
from function import ART_preprocessing as ART_pre

import warnings
warnings.filterwarnings('ignore')

In [2]:
# product = 'ENSEMBLE_mean'
# product = 'ENSEMBLE_median'
product = 'ENSEMBLE_weighted'
# product = 'ENSEMBLE_trimean'

# product, time_reso = 'IMERG', '1dy'
# product, time_reso = 'CMORPH', '3h'
# product, time_reso = 'ERA5', '3h'
# product, time_reso = 'MSWEP', '3h'
# product, time_reso = 'CHIRPS', '1dy'

# product, time_reso = 'GSMaP', '3h'
# product, time_reso = 'GSMaP_NoCorrection', '3h'

In [3]:
QC_method = 'QCv1_Flag1'

In [4]:
lon_min, lon_max, lat_min, lat_max, area, toll = 6.5, 19, 36.5, 48, 'ITALY', 0.002

Tr = [5,  10,  20,  50, 100, 200]
Fi = 1 - 1/np.array(Tr)

In [5]:
veneto_dir = os.path.join('/','media','arturo','T9','Data','shapes','Europa','Italy')

if os.path.exists(veneto_dir):
    REGIONS = gpd.read_file(os.path.join(veneto_dir,'Italy_regions.geojson'))
else:
    raise SystemExit(f"File not found: {veneto_dir}")

In [6]:
obs_base = os.path.join('/','media','arturo','T9','Data','Italy', 'stations', 'Italy_raingauges', 'QC_MIX')

sat_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite','5_DOWN')

In [7]:
METADATA = pd.read_csv(os.path.join(obs_base, 'METADATA', 'METADATA_INTENSE_UPDATE.csv'))
METADATA["Lat"] = np.round(METADATA["Lat"], 6)
METADATA["Lon"] = np.round(METADATA["Lon"], 6)

In [8]:
ISO_names = pd.read_csv(os.path.join('/','media','arturo','T9','Data','Italy', 'stations', 'Italy_raingauges', 'ISO_IT_REGION_EN.csv'))

In [9]:
print(f'Load {product}')
if product == 'ENSEMBLE_mean':
    dir_in = os.path.join(sat_base,'ITALY_ENSEMBLE_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_mean.nc')
elif product == 'ENSEMBLE_median':
    dir_in = os.path.join(sat_base,'ITALY_ENSEMBLE_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_median.nc')
elif product == 'ENSEMBLE_weighted':
    dir_in = os.path.join(sat_base,'ITALY_ENSEMBLE_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_weighted.nc')
elif product == 'ENSEMBLE_trimean':
    dir_in = os.path.join(sat_base,'ITALY_ENSEMBLE_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_trimean.nc')
elif product == 'GSMaP_NoCorrection':
    dir_in = os.path.join(sat_base, f'ITALY_DOWN_GSMaP_{time_reso}_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_NoCorrection.nc')
else:
    dir_in = os.path.join(sat_base, f'ITALY_DOWN_{product}_{time_reso}_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc')

print(f'Reading: {dir_in.split('/')[-1]}')
data = xr.open_dataset(dir_in)

Load ENSEMBLE_weighted
Reading: ITALY_ENSEMBLE_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_weighted.nc


In [10]:
Tr_index = 3
print(f'Tr: {Tr[Tr_index]} years')

Tr: 50 years


In [11]:
ISO_names["ISO_num"] = ISO_names["ISO"].str.split("-").str[1].astype(int)
ISO_names = ISO_names.sort_values("ISO_num").drop(columns="ISO_num").reset_index(drop=True)

In [12]:
INFO_region = {}
WEIBULL_region = {}
QUANTILES_region = {}

for rr in range(len(ISO_names)):#len(ISO_names)
    region_ISO = ISO_names.iloc[rr]['ISO']
    region_label = ISO_names[ISO_names['ISO']==region_ISO]

    INFO_dict = {}
    WEIBULL_dict = {}
    QUANTILES_dict = {}

    region_ISO = ISO_names.iloc[rr]['ISO']
    region_label = ISO_names[ISO_names['ISO']==region_ISO]

    print(f'{rr+1}: {region_label['Region'].values[0]} ({region_ISO})')

    METADATA_clear = METADATA[METADATA['ISO']==region_ISO].reset_index(inplace=False)

    for nn in range(len(METADATA_clear)):#len(METADATA_clear)
        filename = f'{METADATA_clear['File'].values[nn]}'
        lat_obs = METADATA_clear['Lat'][nn]
        lon_obs = METADATA_clear['Lon'][nn]
        elev_obs = METADATA_clear['Elevation_update'][nn]

        OBS_pd = pd.read_csv(os.path.join(obs_base, 'DATA_1dy', 'statistics', QC_method, region_ISO, filename))
        OBS_pd = OBS_pd[(OBS_pd['Year']>=2002)&(OBS_pd['Year']<=2023)].reset_index(drop=True)
        
        if len(OBS_pd) == 0:
                continue
            
        else:
            OBS_N = OBS_pd['N'].values
            OBS_C = OBS_pd['C'].values
            OBS_W = OBS_pd['W'].values
            OBS_Y = OBS_pd['Year'].values

            mask = ~np.isnan(OBS_N)

            OBS_N = OBS_N[mask]
            OBS_C = OBS_C[mask]
            OBS_W = OBS_W[mask]
            OBS_Y = OBS_Y[mask]

            if len(OBS_Y) >= 8: # greather than 8 years

                x0 = np.nanmean(OBS_C)
                OBS_Q, flag = DOWN_raw.mev_quant_update(Fi, x0, OBS_N, OBS_C, OBS_W, thresh=1)
                OBS_Q2 = np.where(flag, OBS_Q, np.nan)

                PREC_SAT = data.sel(lat=lat_obs, lon=lon_obs, method='nearest')
                lat_ref = float(PREC_SAT.lat.values)
                lon_ref = float(PREC_SAT.lon.values)

                INFO = pd.DataFrame({'lat_obs':[lat_obs], 'lon_obs':[lon_obs], 'elev_obs':[elev_obs], 'lat_ref':[lat_ref], 'lon_ref':[lon_ref]})

                Sat_raw_pd = pd.DataFrame({'Year':PREC_SAT.year.values, 'N':PREC_SAT.NYs.values, 'C':PREC_SAT.CYs.values, 'W':PREC_SAT.WYs.values})
                Sat_raw_pd = Sat_raw_pd.set_index('Year').loc[OBS_pd['Year']].reset_index()
                x0 = np.nanmean(Sat_raw_pd.C.values)
                SAT_raw_Q, flag = DOWN_raw.mev_quant_update(Fi, x0, Sat_raw_pd.N.values, Sat_raw_pd.C.values, Sat_raw_pd.W.values, thresh=1)
                SAT_raw_Q2 = np.where(flag, SAT_raw_Q, np.nan)

                Sat_down_pd = pd.DataFrame({'Year':PREC_SAT.year.values, 'N':PREC_SAT.NYd.values, 'C':PREC_SAT.CYd.values, 'W':PREC_SAT.WYd.values})
                Sat_down_pd = Sat_down_pd.set_index('Year').loc[OBS_pd['Year']].reset_index()
                x0 = np.nanmean(Sat_down_pd.C.values)
                SAT_down_Q, flag = DOWN_raw.mev_quant_update(Fi, x0, Sat_down_pd.N.values, Sat_down_pd.C.values, Sat_down_pd.W.values, thresh=1)
                SAT_down_Q2 = np.where(flag, SAT_down_Q, np.nan)

                WEIBULL = pd.DataFrame({'Year':OBS_pd.Year, 
                            'N_obs':OBS_pd.N, 'C_obs':OBS_pd.C, 'W_obs':OBS_pd.W,
                            'N_raw':Sat_raw_pd.N, 'C_raw':Sat_raw_pd.C, 'W_raw':Sat_raw_pd.W,
                            'N_down':Sat_down_pd.N, 'C_down':Sat_down_pd.C, 'W_down':Sat_down_pd.W})

                re_raw = (SAT_raw_Q2 - OBS_Q2)/OBS_Q2
                re_down = (SAT_down_Q2 - OBS_Q2)/OBS_Q2

                QUANTILES = pd.DataFrame({'Tr':Tr, 'OBS':OBS_Q2, 'SAT_raw':SAT_raw_Q2, 'SAT_down':SAT_down_Q2, 'RE_raw':re_raw, 'RE_down':re_down})

                INFO_dict[filename] = INFO
                WEIBULL_dict[filename] = WEIBULL
                QUANTILES_dict[filename] = QUANTILES
    
    INFO_region[region_ISO] = INFO_dict
    WEIBULL_region[region_ISO] = WEIBULL_dict
    QUANTILES_region[region_ISO] = QUANTILES_dict

1: Piedmont (IT-210)
2: Aosta Valley (IT-230)
3: Lombardy (IT-250)
4: Trentino (IT-321)
5: Alto Adige (IT-322)
6: Veneto (IT-340)
7: Friuli-Venezia Giulia (IT-360)
8: Liguria (IT-420)
9: Emilia-Romagna (IT-450)
10: Tuscany (IT-520)
11: Umbria (IT-550)
12: Marche (IT-570)
13: Lazio (IT-620)
14: Abruzzo (IT-650)
15: Molise (IT-670)
16: Campania (IT-720)
17: Apulia (IT-750)
18: Basilicata (IT-770)
19: Calabria (IT-780)
20: Sicily (IT-820)
21: Sardinia (IT-880)


In [13]:
# if product == 'GSMaP_NoCorrection':
#     Mev_d_OLD = np.where(Mev_d_OLD>=700, 500, Mev_d_OLD)

In [14]:
if product == 'GSMaP_NoCorrection':
    hdf5_file = f"/media/arturo/T9/Data/Italy/statistics/statistics_obs_GSMaP_NoCorrection.h5"
else:
    hdf5_file = f"/media/arturo/T9/Data/Italy/statistics/statistics_obs_{product}.h5"

print(f'Export as: {hdf5_file}')

with pd.HDFStore(hdf5_file, mode='w') as store:

    for region_ISO in INFO_region.keys():
        stations = INFO_region[region_ISO].keys()  # las estaciones de la regi√≥n

        for station in stations:

            info_df = INFO_region[region_ISO][station]
            weibull_df = WEIBULL_region[region_ISO][station]
            quantiles_df = QUANTILES_region[region_ISO][station]

            store[f"/{region_ISO}/{station}/INFO"] = info_df
            store[f"/{region_ISO}/{station}/WEIBULL"] = weibull_df
            store[f"/{region_ISO}/{station}/QUANTILES"] = quantiles_df

Export as: /media/arturo/T9/Data/Italy/statistics/statistics_obs_ENSEMBLE_weighted.h5
