In [12]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import seaborn as sns
import geopandas as gpd

import cartopy.crs as ccrs
import scipy.stats as stats

from matplotlib import patches
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.colors as mcolors
from matplotlib.cm import ScalarMappable

import shapely.geometry as sg
from shapely.geometry import Point

import sys
sys.path.append(os.path.abspath(".."))
from function import DOWN_raw
from function import ART_preprocessing as ART_pre

from playsound import playsound

import warnings
warnings.filterwarnings('ignore')

In [13]:
# product, time_reso = 'IMERG', '1dy'
product, time_reso = 'CMORPH', '3h'
# product, time_reso = 'ERA5', '3h'
# product, time_reso = 'MSWEP', '3h'
# product, time_reso = 'CHIRPS', '1dy'
# product, time_reso = 'GSMaP', '3h'
# product, time_reso = 'ENSEMBLE_median', '1dy'

In [14]:
def get_parameters(product, dir_base, val_max=1.1, corrected=False, corr_method=None):
    # The list bellow is the rain gauges with suspect data
    list_remove = [
            'IT-820_1424_FTS_1440_QCv4.csv', 'IT-250_602781_FTS_1440_QCv4.csv', 
            'IT-250_602779_FTS_1440_QCv4.csv', 'IT-780_2370_FTS_1440_QCv4.csv', 
            'IT-750_450_FTS_1440_QCv4.csv', 'IT-520_TOS11000099_FTS_1440_QCv4.csv',
            'IT-520_TOS11000080_FTS_1440_QCv4.csv', 'IT-520_TOS11000072_FTS_1440_QCv4.csv',
            'IT-520_TOS11000060_FTS_1440_QCv4.csv', 'IT-520_TOS11000025_FTS_1440_QCv4.csv',
            'IT-520_TOS09001200_FTS_1440_QCv4.csv', 'IT-520_TOS02000237_FTS_1440_QCv4.csv',
            'IT-230_1200_FTS_1440_QCv4.csv'
            ]

    if corrected == True:
        if corr_method == 'QQc':
            print(f"Loading {product} corrected statistics...")
            hdf5_file = os.path.join(dir_base,'statistics','QQc',f'statistics_obs_{product}_corrected_{corr_method}.h5')
        else:
            raise ValueError("corr_method must be specified between 'QQc' or 'LRC' when corrected=True")
    else:
        hdf5_file = os.path.join(dir_base,'statistics',f'statistics_obs_{product}.h5')
    data = pd.HDFStore(hdf5_file, mode='r')

    keys = data.keys()
    keys_QUANTILES = [k for k in keys if k.endswith("/QUANTILES")]
    keys_INFO = [k for k in keys if k.endswith('/INFO')]

    stations = []
    lats, lons, elevs = [], [], []
    Nobs, Cobs, Wobs = [], [], []
    OBS, MEVd = [], []
    for nn in range(len(keys_INFO)):
        station = keys_INFO[nn].split('/')[2]
        
        if station in list_remove:
            continue
        else:
            lat = data[keys_INFO[nn]]['lat_obs'].values[0]
            lon = data[keys_INFO[nn]]['lon_obs'].values[0]
            elev = data[keys_INFO[nn]]['elev_obs'].values[0]
            Obs_ = data[keys_QUANTILES[nn]].OBS.values[3]
            Down_ = data[keys_QUANTILES[nn]].SAT_down.values[3] 

            stations.append(station)
            lats.append(lat)
            lons.append(lon)
            elevs.append(elev)
            OBS.append(Obs_)
            MEVd.append(Down_)

    DF_DATA = pd.DataFrame({'STATION':stations, 'LON':lons, 'LAT':lats, 'ELEV':elevs, 'OBS':OBS, 'MEVd':MEVd})

    return DF_DATA

In [None]:
frac = 0.7
seeds_list = [7, 19, 31, 53, 89, 127, 211, 307, 401, 509, 613, 727, 839, 947, 1051]

for seed in seeds_list:
    print(f'Seed: {seed}')
    
    dir_ = os.path.join('/','media','arturo','T9','Data','Italy')
    DF_INPUT = get_parameters('ENSEMBLE_ALL_MEDIAN',dir_)

    obs_base = os.path.join('/','media','arturo','T9','Data','Italy','Rain_Gauges_QC')
    METADATA = pd.read_csv(os.path.join(obs_base, 'data', 'METADATA', 'METADATA_FTS_QCv4_Case1_wAIRHO_v3_1dy.csv'))
    METADATA_CLEAR = METADATA[METADATA['File'].isin(DF_INPUT['STATION'])].reset_index(drop=True)

    Q_train_list = []
    Q_val_list = []

    for iso in METADATA_CLEAR['ISO'].unique():

        META_iso = METADATA_CLEAR[METADATA_CLEAR['ISO'] == iso]

        # Si una regi贸n tiene muy pocas estaciones, evita errores
        if len(META_iso) < 2:
            Q_train_list.append(META_iso)
            continue

        META_80 = META_iso.sample(frac=frac, random_state=seed)
        META_20 = META_iso.drop(META_80.index)

        Q_train_list.append(META_80)
        Q_val_list.append(META_20)

    Q_train = pd.concat(Q_train_list, ignore_index=True)
    Q_val = pd.concat(Q_val_list, ignore_index=True)
    list_train = Q_train.File.values

    dir_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite','6_DOWN_BCorrected','QUANTILE')
    dir_input = os.path.join(os.path.join(dir_base, f'ITALY_DOWN_{product}_{time_reso}_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc_LLc_{str(seed).zfill(4)}.nc'))
    DATA = xr.open_dataset(dir_input)


    Tr_index = 3
    # print(f'Tr: {Tr[Tr_index]} years')
    Tr = [5,  10,  20,  50, 100, 200]
    Fi = 1 - 1/np.array(Tr)
    ISO_names = np.unique(METADATA.ISO.values)

    INFO_region = {}
    WEIBULL_region = {}
    QUANTILES_region = {}

    for rr in range(len(ISO_names)):
        region_ISO = ISO_names[rr]

        INFO_dict = {}
        QUANTILES_dict = {}

        # print(f'{rr+1}: {region_ISO}')

        METADATA_clear = Q_val[Q_val['ISO']==region_ISO].reset_index(inplace=False) # only validation METADATA

        for nn in range(len(METADATA_clear)):#len(METADATA_clear)
            filename = f'{METADATA_clear['File'].values[nn]}'
            lat_obs = METADATA_clear['Lat'][nn]
            lon_obs = METADATA_clear['Lon'][nn]
            elev_obs = METADATA_clear['DEM_Elevation'][nn]

            OBS_pd = pd.read_csv(os.path.join(obs_base, 'Weibull', '1dy', region_ISO, filename))
            OBS_pd = OBS_pd[(OBS_pd['Year']>=2002)&(OBS_pd['Year']<=2023)].reset_index(drop=True)
            
            if len(OBS_pd) == 0:
                    continue

            else:
                OBS_N = OBS_pd['N'].values
                OBS_C = OBS_pd['C'].values
                OBS_W = OBS_pd['W'].values
                OBS_Y = OBS_pd['Year'].values

                mask = ~np.isnan(OBS_N)

                OBS_N = OBS_N[mask]
                OBS_C = OBS_C[mask]
                OBS_W = OBS_W[mask]
                OBS_Y = OBS_Y[mask]

                if len(OBS_Y) >= 8: # greather than 8 years

                    x0 = np.nanmean(OBS_C)
                    OBS_Q, flag = DOWN_raw.mev_quant_update(Fi, x0, OBS_N, OBS_C, OBS_W, thresh=1)
                    OBS_Q2 = np.where(flag, OBS_Q, np.nan)

                    PREC_SAT = DATA.sel(lat=lat_obs, lon=lon_obs, method='nearest')
                    lat_ref = float(PREC_SAT.lat.values)
                    lon_ref = float(PREC_SAT.lon.values)

                    INFO = pd.DataFrame({'lat_obs':[lat_obs], 'lon_obs':[lon_obs], 'elev_obs':[elev_obs], 'lat_ref':[lat_ref], 'lon_ref':[lon_ref]})
                    
                    Mevd_tmp = PREC_SAT.MEVd_Down.data
                    Mevd_LLc_tmp = PREC_SAT.MEVd_LLc.data
                    Mevd_QQc_tmp = PREC_SAT.MEVd_QQc.data
                    
                    QUANTILES = pd.DataFrame({'Tr':[50], 'OBS':OBS_Q2[3], 'DOWN':Mevd_tmp, 'LLc':Mevd_LLc_tmp, 'QQc':Mevd_QQc_tmp})
                    
                    INFO_dict[filename] = INFO
                    QUANTILES_dict[filename] = QUANTILES
        
        INFO_region[region_ISO] = INFO_dict
        QUANTILES_region[region_ISO] = QUANTILES_dict

    dir_out = os.path.join('/','media','arturo','T9','Data','Italy','statistics','QUANTILE')
    hdf5_file = os.path.join(dir_out, f'statistics_obs_{product}_corrected_QQc_LLc_{str(seed).zfill(4)}.h5')

    print(f'Export as: {hdf5_file}')

    with pd.HDFStore(hdf5_file, mode='w') as store:

        for region_ISO in INFO_region.keys():
            stations = INFO_region[region_ISO].keys()  # las estaciones de la regi贸n

            for station in stations:

                info_df = INFO_region[region_ISO][station]
                quantiles_df = QUANTILES_region[region_ISO][station]

                store[f"/{region_ISO}/{station}/INFO"] = info_df
                store[f"/{region_ISO}/{station}/QUANTILES"] = quantiles_df
    print()

Seed: 7
Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_CMORPH_corrected_QQc_LLc_0007.h5

Seed: 19
Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_CMORPH_corrected_QQc_LLc_0019.h5

Seed: 31
Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_CMORPH_corrected_QQc_LLc_0031.h5

Seed: 53
Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_CMORPH_corrected_QQc_LLc_0053.h5

Seed: 89
Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_CMORPH_corrected_QQc_LLc_0089.h5

Seed: 127


In [None]:
playsound("../sound/HOMER_DOH.mp3")

In [None]:
sys.exit()

SystemExit: 

## For Individual seed

In [None]:
frac = 0.7
seeds_list = [7, 19, 31, 53, 89, 127, 211, 307, 401, 509, 613, 727, 839, 947, 1051]
# seed = seeds_list[0]

In [None]:
def get_parameters(product, dir_base, val_max=1.1, corrected=False, corr_method=None):
    # The list bellow is the rain gauges with suspect data
    list_remove = [
            'IT-820_1424_FTS_1440_QCv4.csv', 'IT-250_602781_FTS_1440_QCv4.csv', 
            'IT-250_602779_FTS_1440_QCv4.csv', 'IT-780_2370_FTS_1440_QCv4.csv', 
            'IT-750_450_FTS_1440_QCv4.csv', 'IT-520_TOS11000099_FTS_1440_QCv4.csv',
            'IT-520_TOS11000080_FTS_1440_QCv4.csv', 'IT-520_TOS11000072_FTS_1440_QCv4.csv',
            'IT-520_TOS11000060_FTS_1440_QCv4.csv', 'IT-520_TOS11000025_FTS_1440_QCv4.csv',
            'IT-520_TOS09001200_FTS_1440_QCv4.csv', 'IT-520_TOS02000237_FTS_1440_QCv4.csv',
            'IT-230_1200_FTS_1440_QCv4.csv'
            ]

    if corrected == True:
        if corr_method == 'QQc':
            print(f"Loading {product} corrected statistics...")
            hdf5_file = os.path.join(dir_base,'statistics','QQc',f'statistics_obs_{product}_corrected_{corr_method}.h5')
        else:
            raise ValueError("corr_method must be specified between 'QQc' or 'LRC' when corrected=True")
    else:
        hdf5_file = os.path.join(dir_base,'statistics',f'statistics_obs_{product}.h5')
    data = pd.HDFStore(hdf5_file, mode='r')

    keys = data.keys()
    keys_QUANTILES = [k for k in keys if k.endswith("/QUANTILES")]
    keys_INFO = [k for k in keys if k.endswith('/INFO')]

    stations = []
    lats, lons, elevs = [], [], []
    Nobs, Cobs, Wobs = [], [], []
    OBS, MEVd = [], []
    for nn in range(len(keys_INFO)):
        station = keys_INFO[nn].split('/')[2]
        
        if station in list_remove:
            continue
        else:
            lat = data[keys_INFO[nn]]['lat_obs'].values[0]
            lon = data[keys_INFO[nn]]['lon_obs'].values[0]
            elev = data[keys_INFO[nn]]['elev_obs'].values[0]
            Obs_ = data[keys_QUANTILES[nn]].OBS.values[3]
            Down_ = data[keys_QUANTILES[nn]].SAT_down.values[3] 

            stations.append(station)
            lats.append(lat)
            lons.append(lon)
            elevs.append(elev)
            OBS.append(Obs_)
            MEVd.append(Down_)

    DF_DATA = pd.DataFrame({'STATION':stations, 'LON':lons, 'LAT':lats, 'ELEV':elevs, 'OBS':OBS, 'MEVd':MEVd})

    return DF_DATA

In [None]:
dir_ = os.path.join('/','media','arturo','T9','Data','Italy')
DF_INPUT = get_parameters('ENSEMBLE_ALL_MEDIAN',dir_)

In [None]:
obs_base = os.path.join('/','media','arturo','T9','Data','Italy','Rain_Gauges_QC')
METADATA = pd.read_csv(os.path.join(obs_base, 'data', 'METADATA', 'METADATA_FTS_QCv4_Case1_wAIRHO_v3_1dy.csv'))
METADATA_CLEAR = METADATA[METADATA['File'].isin(DF_INPUT['STATION'])].reset_index(drop=True)

Q_train_list = []
Q_val_list = []

for iso in METADATA_CLEAR['ISO'].unique():

    META_iso = METADATA_CLEAR[METADATA_CLEAR['ISO'] == iso]

    # Si una regi贸n tiene muy pocas estaciones, evita errores
    if len(META_iso) < 2:
        Q_train_list.append(META_iso)
        continue

    META_80 = META_iso.sample(frac=frac, random_state=seed)
    META_20 = META_iso.drop(META_80.index)

    Q_train_list.append(META_80)
    Q_val_list.append(META_20)

Q_train = pd.concat(Q_train_list, ignore_index=True)
Q_val = pd.concat(Q_val_list, ignore_index=True)
list_train = Q_train.File.values

In [None]:
dir_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite','6_DOWN_BCorrected','QUANTILE')
dir_input = os.path.join(os.path.join(dir_base, f'ITALY_DOWN_{product}_{time_reso}_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc_LLc_{str(seed).zfill(4)}.nc'))
DATA = xr.open_dataset(dir_input)

In [None]:

Tr_index = 3
# print(f'Tr: {Tr[Tr_index]} years')
Tr = [5,  10,  20,  50, 100, 200]
Fi = 1 - 1/np.array(Tr)
ISO_names = np.unique(METADATA.ISO.values)

INFO_region = {}
WEIBULL_region = {}
QUANTILES_region = {}

for rr in range(len(ISO_names)):
    region_ISO = ISO_names[rr]

    INFO_dict = {}
    QUANTILES_dict = {}

    # print(f'{rr+1}: {region_ISO}')

    METADATA_clear = Q_val[Q_val['ISO']==region_ISO].reset_index(inplace=False) # only validation METADATA

    for nn in range(len(METADATA_clear)):#len(METADATA_clear)
        filename = f'{METADATA_clear['File'].values[nn]}'
        lat_obs = METADATA_clear['Lat'][nn]
        lon_obs = METADATA_clear['Lon'][nn]
        elev_obs = METADATA_clear['DEM_Elevation'][nn]

        OBS_pd = pd.read_csv(os.path.join(obs_base, 'Weibull', '1dy', region_ISO, filename))
        OBS_pd = OBS_pd[(OBS_pd['Year']>=2002)&(OBS_pd['Year']<=2023)].reset_index(drop=True)
        
        if len(OBS_pd) == 0:
                continue

        else:
            OBS_N = OBS_pd['N'].values
            OBS_C = OBS_pd['C'].values
            OBS_W = OBS_pd['W'].values
            OBS_Y = OBS_pd['Year'].values

            mask = ~np.isnan(OBS_N)

            OBS_N = OBS_N[mask]
            OBS_C = OBS_C[mask]
            OBS_W = OBS_W[mask]
            OBS_Y = OBS_Y[mask]

            if len(OBS_Y) >= 8: # greather than 8 years

                x0 = np.nanmean(OBS_C)
                OBS_Q, flag = DOWN_raw.mev_quant_update(Fi, x0, OBS_N, OBS_C, OBS_W, thresh=1)
                OBS_Q2 = np.where(flag, OBS_Q, np.nan)

                PREC_SAT = DATA.sel(lat=lat_obs, lon=lon_obs, method='nearest')
                lat_ref = float(PREC_SAT.lat.values)
                lon_ref = float(PREC_SAT.lon.values)

                INFO = pd.DataFrame({'lat_obs':[lat_obs], 'lon_obs':[lon_obs], 'elev_obs':[elev_obs], 'lat_ref':[lat_ref], 'lon_ref':[lon_ref]})
                
                Mevd_tmp = PREC_SAT.MEVd_Down.data
                Mevd_LLc_tmp = PREC_SAT.MEVd_LLc.data
                Mevd_QQc_tmp = PREC_SAT.MEVd_QQc.data
                
                QUANTILES = pd.DataFrame({'Tr':[50], 'OBS':OBS_Q2[3], 'DOWN':Mevd_tmp, 'LLc':Mevd_LLc_tmp, 'QQc':Mevd_QQc_tmp})
                
                INFO_dict[filename] = INFO
                QUANTILES_dict[filename] = QUANTILES
    
    INFO_region[region_ISO] = INFO_dict
    QUANTILES_region[region_ISO] = QUANTILES_dict

In [None]:
dir_out = os.path.join('/','media','arturo','T9','Data','Italy','statistics','QUANTILE')
hdf5_file = os.path.join(dir_out, f'statistics_obs_{product}_corrected_QQc_LLc_{str(seed).zfill(4)}.h5')

print(f'Export as: {hdf5_file}')

with pd.HDFStore(hdf5_file, mode='w') as store:

    for region_ISO in INFO_region.keys():
        stations = INFO_region[region_ISO].keys()  # las estaciones de la regi贸n

        for station in stations:

            info_df = INFO_region[region_ISO][station]
            quantiles_df = QUANTILES_region[region_ISO][station]

            store[f"/{region_ISO}/{station}/INFO"] = info_df
            store[f"/{region_ISO}/{station}/QUANTILES"] = quantiles_df
print()

Export as: /media/arturo/T9/Data/Italy/statistics/QUANTILE/statistics_obs_IMERG_corrected_QQc_LLc_0008.h5

