In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import seaborn as sns
import geopandas as gpd

import cartopy.crs as ccrs
import cartopy.feature as cf
import cartopy.feature as cfeature

import scipy.stats as stats

from matplotlib import patches
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.colors as mcolors
import matplotlib.gridspec as gridspec

import shapely.geometry as sg
from shapely.geometry import Point

import sys
sys.path.append(os.path.abspath(".."))
from function import DOWN_raw

import warnings
warnings.filterwarnings('ignore')

## Export Weibull parameters and percent of data per year

In [2]:
yy_s, yy_e = 2002, 2023
years_num = yy_e - yy_s + 1

In [3]:
lon_min, lon_max, lat_min, lat_max, area, toll = 6.5, 19, 36.5, 48, 'ITALY', 0.002

In [4]:
veneto_dir = os.path.join('/','media','arturo','T9','Data','shapes','Europa','Italy')

if os.path.exists(veneto_dir):
    REGIONS = gpd.read_file(os.path.join(veneto_dir,'Italy_regions.geojson'))
else:
    raise SystemExit(f"File not found: {veneto_dir}")

In [5]:
obs_base = os.path.join('/','media','arturo','T9','Data','Italy','Rain_Gauges_QC')

In [6]:
METADATA = pd.read_csv(os.path.join(obs_base,'data','METADATA', 'METADATA_FTS_QCv4_Case1_wAIRHO_v3_1dy.csv'))
METADATA["Lat"] = np.round(METADATA["Lat"], 6)
METADATA["Lon"] = np.round(METADATA["Lon"], 6)
METADATA.head(2)

Unnamed: 0,File,ISO,Region,Code,Name,Elevatn,Or_EPSG,CRS_E,CRS_N,Lat,...,TargetDist_H_NOQC,TargetElev_H_NOQC,SameRegion_H_NOQC,Rho_H_QC,AI_H_QC,Overlap_H_QC,TargetName_H_QC,TargetDist_H_QC,TargetElev_H_QC,SameRegion_H_QC
0,IT-210_PIE-001030-900_FTS_1440_QCv4.csv,IT-210,Piedmont,PIE-001030-900,BORGOFRANCODIVREA,337.0,32632,410005.0,5040731.0,45.514302,...,6857.364241,633.857117,1.0,0.647599,0.614767,36.363014,IT-210_PIE-001151-900_FTS_0060_QCv4.csv,6857.364241,633.857117,1.0
1,IT-210_PIE-001097-900_FTS_1440_QCv4.csv,IT-210,Piedmont,PIE-001097-900,CUMIANA,327.0,32632,373092.0,4980177.0,44.963696,...,20507.220598,474.270203,1.0,0.40523,0.501574,36.621918,IT-210_PIE-001139-900_FTS_0060_QCv4.csv,20507.220598,474.270203,1.0


In [7]:
# dir_read = os.path.join(dir_base, 'data_1h')
dir_read = os.path.join(obs_base, 'data', 'raw', '1dy')
regions_ISO = [d for d in os.listdir(dir_read) if os.path.isdir(os.path.join(dir_read, d))]
print(f'Number of directories found: {len(regions_ISO)}')

Number of directories found: 21


In [8]:
for nn in range(len(regions_ISO)):
    # print(f'Directory: {regions_ISO[nn]}')
    dir_tmp = os.path.join(obs_base, 'Weibull', '1dy',regions_ISO[nn])
    if not os.path.exists(dir_tmp):
        os.makedirs(dir_tmp)

In [17]:
for rr in range(len(regions_ISO)):#len(regions_ISO)

    region_ = regions_ISO[rr] 
    METADATA_clear = METADATA[METADATA['ISO']==region_].reset_index(inplace=False)

    print(f'Region: {region_} has {len(METADATA_clear)} stations')

    names, lats, lons, Q5, Q10, Q20, Q50, Q100 = [], [], [], [], [], [], [], []

    for nn in range(len(METADATA_clear)):#len(METADATA_clear)

        filename = f'{METADATA_clear['File'].values[nn]}'
        lat_ref = METADATA_clear['Lat'].values[nn]
        lon_ref = METADATA_clear['Lon'].values[nn]

        RAIN_pd = pd.read_csv(os.path.join(obs_base, 'data', 'raw', '1dy', region_, filename))
        RAIN_pd['Datetime'] = pd.to_datetime(RAIN_pd['Datetime'].values)
        RAIN_pd['Year'] = RAIN_pd['Datetime'].dt.year

        ss, ee = RAIN_pd['Year'].min(), RAIN_pd['Year'].max()

        TIME_vector = pd.date_range(start=f'{ss}-01-01', end=f'{ee}-12-31', freq='1D')
        pd_TIME = pd.DataFrame({'Datetime': TIME_vector})
        pd_TIME['Year'] = pd_TIME['Datetime'].dt.year
        
        RAIN_full = pd.merge(pd_TIME, RAIN_pd, on='Datetime', how='left')


        nan_per_year = RAIN_full.groupby('Year_x')['pre'].apply(lambda x: x.isna().sum())
        RAIN_xr = xr.DataArray(
                        RAIN_pd['pre'].values,
                        coords={'time':RAIN_pd['Datetime'].values}, 
                        dims=('time'))

        nan_per_year = ((nan_per_year / 366) * 100).round(1)

        NCWy, YEARSy = DOWN_raw.fit_yearly_weibull(RAIN_xr,1)
        N = NCWy[:,0]
        C = NCWy[:,1]
        W = NCWy[:,2]
        Y = YEARSy

        STATISTIC = pd.DataFrame({'Year':Y.astype(int), 'N': N, 'C': C, 'W':W})
        STATISTIC['NaN'] = STATISTIC['Year'].map(nan_per_year)

        dir_out = os.path.join(obs_base, 'Weibull', '1dy', region_, filename)
        STATISTIC.to_csv(dir_out, header=True, index=False)

Region: IT-210 has 38 stations
Region: IT-230 has 72 stations
Region: IT-250 has 236 stations
Region: IT-321 has 120 stations
Region: IT-322 has 57 stations
Region: IT-340 has 190 stations
Region: IT-360 has 206 stations
Region: IT-420 has 168 stations
Region: IT-450 has 320 stations
Region: IT-520 has 427 stations
Region: IT-550 has 66 stations
Region: IT-570 has 155 stations
Region: IT-620 has 213 stations
Region: IT-650 has 35 stations
Region: IT-670 has 22 stations
Region: IT-720 has 153 stations
Region: IT-750 has 39 stations
Region: IT-770 has 54 stations
Region: IT-780 has 159 stations
Region: IT-820 has 96 stations
Region: IT-880 has 117 stations


In [18]:
STATISTIC

Unnamed: 0,Year,N,C,W,NaN
0,2017,18.0,4.770636,0.78233,0.0
1,2018,81.0,8.707291,0.746719,0.0
2,2019,58.0,5.958613,0.834282,0.0
3,2020,48.0,7.265827,0.609843,0.0
4,2021,62.0,6.416977,0.652689,0.0
5,2022,38.0,7.581254,0.792616,0.0
6,2023,50.0,6.765738,0.874391,0.0
