In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd

import shapely.vectorized as sv

import cartopy.crs as ccrs
import cartopy.feature as cfeature

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from playsound import playsound

import sys
sys.path.append(os.path.abspath(".."))
from function import DOWN_raw
from function import ART_statistic as ART_sta
from function import ART_downscale as ART_down

import warnings
warnings.filterwarnings('ignore')

playsound is relying on another python subprocess. Please use `pip install pygobject` if you want playsound to run more efficiently.


In [2]:
def correction_quantile_quantile(product, time_reso, seed, frac=0.7):
    # ==================================================================================================
    # VERIFY COMPUTER
    COMPUTERNAME = os.environ['COMPUTERNAME']
    # print(f'Computer     : {COMPUTERNAME}')

    if COMPUTERNAME == 'BR_DELL':
        dir_font = os.path.join('/','run')
    else:
        dir_font = os.path.join('/')

    veneto_dir = os.path.join(dir_font,'media','arturo','T9','Data','shapes','Europa','Italy')

    # ==================================================================================================
    # LOAD ITALY SHAPEFILE
    if os.path.exists(veneto_dir):
        ITALY = gpd.read_file(os.path.join(veneto_dir,'Italy_clear.geojson'))
    else:
        raise SystemExit(f"File not found: {veneto_dir}")

    obs_base = os.path.join(dir_font,'media','arturo','T9','Data','Italy','Rain_Gauges_QC')
    weibul_base = os.path.join(dir_font,'media','arturo','T9','Data','Italy','Rain_Gauges_QC','Weibull','1dy')

    sat_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite','5_DOWN')
    # ENS_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite','5_ENSEMBLE')

    # ==================================================================================================
    # READ OBS METADATA
    METADATA = pd.read_csv(os.path.join(obs_base, 'data', 'METADATA', 'METADATA_FTS_QCv4_Case1_wAIRHO_v3_1dy.csv'))
    METADATA["Lat"] = np.round(METADATA["Lat"], 6)
    METADATA["Lon"] = np.round(METADATA["Lon"], 6)

    ISO_names = np.unique(METADATA.ISO.values)

    # ==================================================================================================
    # CREATE TRAIN AND TEST DATASETS
    Q_train_list = []
    Q_val_list = []

    for iso in METADATA['ISO'].unique():
        
        META_iso = METADATA[METADATA['ISO'] == iso]

        # Si una regi√≥n tiene muy pocas estaciones, evita errores
        if len(META_iso) < 2:
            Q_train_list.append(META_iso)
            continue

        META_80 = META_iso.sample(frac=frac, random_state=seed)
        META_20 = META_iso.drop(META_80.index)

        Q_train_list.append(META_80)
        Q_val_list.append(META_20)

    Q_train = pd.concat(Q_train_list, ignore_index=True)
    Q_val = pd.concat(Q_val_list, ignore_index=True)

    # print(f'Stations     : {len(METADATA)}')
    # print(f'Train Dataset: {len(Q_train)}')
    # print(f'Valid Dataset: {len(Q_val)}')

    N_obs, C_obs, W_obs = [], [], []
    for nn in range(len(Q_train)):
        file_ = os.path.join(weibul_base, Q_train.ISO[nn], Q_train.File[nn])
        statistic = pd.read_csv(file_)
        statistic = statistic[(statistic['Year']>=2002)&(statistic['Year']<=2023)].reset_index(drop=True)
        N_obs.extend(statistic.N.values)
        C_obs.extend(statistic.C.values)
        W_obs.extend(statistic.W.values)

    # ==================================================================================================
    # LOAD RSR PRODUCT
    dir_in = os.path.join(sat_base, f'ITALY_DOWN_{product}_{time_reso}_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc')
    print(f'Reading    : {dir_in.split('/')[-1]}')
    DATA = xr.open_dataset(dir_in)

    if product == 'CHIRPS':
        DATA['NYs'] = DATA.NYs.where(DATA.NYs != 0)
        DATA['CYs'] = DATA.CYs.where(DATA.CYs != 0)
        DATA['WYs'] = DATA.WYs.where(DATA.WYs != 0)
        
        DATA['NYd'] = DATA.NYd.where(DATA.NYd != 0)
        DATA['CYd'] = DATA.CYd.where(DATA.CYd != 0)
        DATA['WYd'] = DATA.WYd.where(DATA.WYd != 0)

    lats = DATA.lat
    lons = DATA.lon
    lon2d, lat2d = np.meshgrid(lons, lats)
    Sat_year = DATA.year.values

    italy_geom = ITALY.union_all()
    mask_italy = sv.contains(italy_geom, lon2d, lat2d)

    NYs = DATA.NYs.where(mask_italy)
    CYs = DATA.CYs.where(mask_italy)
    WYs = DATA.WYs.where(mask_italy)

    NYd = DATA.NYd.where(mask_italy)
    CYd = DATA.CYd.where(mask_italy)
    WYd = DATA.WYd.where(mask_italy)

    ntime, nlat, nlon = NYd.shape

    # ==================================================================================================
    # REMOVE NAN VALUES
    mask = ~np.isnan(NYs.values)
    NYs_valid = NYs.values[mask]

    mask = ~np.isnan(CYs.values)
    CYs_valid = CYs.values[mask]

    mask = ~np.isnan(WYs.values)
    WYs_valid = WYs.values[mask]

    mask = ~np.isnan(NYd.values)
    NYd_valid = NYd.values[mask]

    mask = ~np.isnan(CYd.values)
    CYd_valid = CYd.values[mask]

    mask = ~np.isnan(WYd.values)
    WYd_valid = WYd.values[mask]

    # ==================================================================================================
    # APPLY QQ BIAS CORRECTION
    NYs_full = ART_sta.ISIMIP_QM(N_obs, NYs_valid)
    CYs_full = ART_sta.ISIMIP_QM(C_obs, CYs_valid)
    WYs_full = ART_sta.ISIMIP_QM(W_obs, WYs_valid)

    NYd_full = ART_sta.ISIMIP_QM(N_obs, NYd_valid)
    CYd_full = ART_sta.ISIMIP_QM(C_obs, CYd_valid)
    WYd_full = ART_sta.ISIMIP_QM(W_obs, WYd_valid)

    # ==================================================================================================
    # RESHAPE THE RESULTS
    NYs_corrected = np.full(NYs.values.shape, np.nan)
    NYs_corrected[mask] = NYs_full

    CYs_corrected = np.full(CYs.values.shape, np.nan)
    CYs_corrected[mask] = CYs_full

    WYs_corrected = np.full(WYs.values.shape, np.nan)
    WYs_corrected[mask] = WYs_full

    NYd_corrected = np.full(NYd.values.shape, np.nan)
    NYd_corrected[mask] = NYd_full

    CYd_corrected = np.full(CYd.values.shape, np.nan)
    CYd_corrected[mask] = CYd_full

    WYd_corrected = np.full(WYd.values.shape, np.nan)
    WYd_corrected[mask] = WYd_full

    Tr = [5,  10,  20,  50, 100, 200]
    Fi = 1 - 1/np.array(Tr)

    # ==================================================================================================
    # COMPUTE THE EXTREME QUANTILES
    Mevs_corrected = ART_down.pre_quantiles_array(
                        NYs_corrected, 
                        CYs_corrected, 
                        WYs_corrected, 
                        Tr, 
                        lats, lons,
                        1)

    Mevd_corrected = ART_down.pre_quantiles_array(
                            NYd_corrected, 
                            CYd_corrected, 
                            WYd_corrected, 
                            Tr, 
                            lats, lons,
                            1)

    # ==================================================================================================
    # CREATE THE NETCDF
    DOWN_corrected = xr.Dataset(
    data_vars={
        "NYs": (("year","lat","lon"), NYs_corrected),
        "CYs": (("year","lat","lon"), CYs_corrected),
        "WYs": (("year","lat","lon"), WYs_corrected),
        "NYd": (("year","lat","lon"), NYd_corrected),
        "CYd": (("year","lat","lon"), CYd_corrected),
        "WYd": (("year","lat","lon"), WYd_corrected),
        "Mev_d": (("Tr","lat","lon"), Mevd_corrected),
        "Mev_s": (("Tr","lat","lon"), Mevs_corrected),
        },
    coords={
        'year': Sat_year, 
        'lat': lats, 
        'lon': lons
        },
        attrs=dict(description=f"{product} Weibull parameters and MEV corrected applying ISIMIP_QM method using 80% of stations in Italy",))

    DOWN_corrected.NYs.attrs["units"] = "# days"
    DOWN_corrected.NYs.attrs["long_name"] = "Corrected Raw Number of Wet Days"
    DOWN_corrected.NYs.attrs["origname"] = "Raw Wet Days"

    DOWN_corrected.CYs.attrs["units"] = "nondimensional"
    DOWN_corrected.CYs.attrs["long_name"] = "Corrected Raw Scale Parameter"
    DOWN_corrected.CYs.attrs["origname"] = "Raw Scale"

    DOWN_corrected.WYs.attrs["units"] = "nondimensional"
    DOWN_corrected.WYs.attrs["long_name"] = "Corrected Raw Shape Parameter"
    DOWN_corrected.WYs.attrs["origname"] = "Raw Shape"

    DOWN_corrected.Mev_s.attrs["units"] = "mm/day"
    DOWN_corrected.Mev_s.attrs["long_name"] = "Corrected Raw Extreme Quantiles"
    DOWN_corrected.Mev_s.attrs["origname"] = "Raw Ext-Quant"

    DOWN_corrected.NYd.attrs["units"] = "# days"
    DOWN_corrected.NYd.attrs["long_name"] = "Corrected Downscaled Number of Wet Days"
    DOWN_corrected.NYd.attrs["origname"] = "Down Wet Days"

    DOWN_corrected.CYd.attrs["units"] = "nondimensional"
    DOWN_corrected.CYd.attrs["long_name"] = "Corrected Downscaled Scale Parameter"
    DOWN_corrected.CYd.attrs["origname"] = "Down Scale"

    DOWN_corrected.WYd.attrs["units"] = "nondimensional"
    DOWN_corrected.WYd.attrs["long_name"] = "Corrected Downscaled Shape Parameter"
    DOWN_corrected.WYd.attrs["origname"] = "Down Shape"

    DOWN_corrected.Mev_d.attrs["units"] = "mm/day"
    DOWN_corrected.Mev_d.attrs["long_name"] = "Corrected Downscaled Extreme Quantiles"
    DOWN_corrected.Mev_d.attrs["origname"] = "Down Ext-Quant"

    DOWN_corrected.lat.attrs["units"] = "degrees_north"
    DOWN_corrected.lat.attrs["long_name"] = "Latitude"

    DOWN_corrected.lon.attrs["units"] = "degrees_east"
    DOWN_corrected.lon.attrs["long_name"] = "Longitude"

    # ==================================================================================================
    # EXPORT AS NETCDF
    dir_base = os.path.join('/','media','arturo','T9','Data','Italy','Satellite')
    PRE_out = os.path.join(os.path.join(dir_base, '6_DOWN_BCorrected', dir_in.split('/')[-1].replace('_pearson',f'_pearson_QQc')))
    print(f'Exportin as: {PRE_out.split('/')[-1]}')
    DOWN_corrected.to_netcdf(PRE_out)
    print()

In [3]:
seed, frac = 23, 0.7

product, time_reso = 'IMERG', '1dy'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

product, time_reso = 'CMORPH', '3h'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

product, time_reso = 'ERA5', '3h'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

product, time_reso = 'MSWEP', '3h'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

product, time_reso = 'CHIRPS', '1dy'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

product, time_reso = 'GSMaP', '3h'
correction_quantile_quantile(product, time_reso, seed, frac=0.7)

Reading    : ITALY_DOWN_IMERG_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_IMERG_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc.nc

Reading    : ITALY_DOWN_CMORPH_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_CMORPH_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc.nc

Reading    : ITALY_DOWN_ERA5_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_ERA5_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc.nc

Reading    : ITALY_DOWN_MSWEP_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_MSWEP_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc.nc

Reading    : ITALY_DOWN_CHIRPS_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_CHIRPS_1dy_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson_QQc.nc

Reading    : ITALY_DOWN_GSMaP_3h_2002_2023_npix_2_thr_1_acf_mar_genetic_pearson.nc
Exportin as: ITALY_DOWN_GSMaP_3h_2002_2023_npix_2_thr_1_

In [4]:
playsound("../sound/HOMER_DOH.mp3")