# calculates the seasonal anomalies and the seasonal tercile categories from the gridded VCSN monthly files, using the NZ 6 regions shapefiles 

In [1]:
# Paramaters 

var_name = 'Rain_bc'
# var_name = 'Tmin_N'
# var_name = 'Tmax_N'
# var_name = 'Tmean_N'
# var_name = 'SoilM'
# var_name = 'Wind'
# var_name = 'Rad'

in ['Agent', 'Lat', 'Longt', 'Date', 'MSLP', 'PET', 'Rain', 'RH', 'SoilM',
       'ETmp', 'Rad', 'TMax', 'Tmin', 'VP', 'Wind', 'Rain_bc', 'Tmax_N',
       'Tmin_N']

In [2]:
import os
import sys
import pathlib

In [3]:
%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
from scipy.spatial import cKDTree
from itertools import product

In [4]:
import salem
import geopandas as gpd

In [5]:
import xarray as xr

### function to calculate the anomalies with respect to the 1981 - 2010 climatology 

In [6]:
def demean(x): 
    return x - x.loc['1981':'2010',].mean()

In [7]:
var_name

'Rain_bc'

In [8]:
big_var = var_name.split('_')[0].upper()

In [9]:
HOME = pathlib.Path.home()

In [10]:
dpath = HOME / 'operational/VCSN/data/NC/MONTHLY/' / var_name.upper()

In [11]:
var_name.upper()

'RAIN_BC'

In [12]:
dpath

PosixPath('/home/nicolasf/operational/VCSN/data/NC/MONTHLY/RAIN_BC')

In [13]:
dset = salem.open_xr_dataset(dpath / f'VCSN_gridded_{var_name}_1979-01_2019-12.nc') 

In [14]:
dset

### calculates the seasonal average (or sum if Rain_bc is the variable )

In [15]:
if var_name == 'Rain_bc': 
    dset = dset.rolling(time=3, min_periods=3).sum()
else: 
    dset = dset.rolling(time=3, min_periods=3).mean()

In [16]:
dset = dset.isel(time=slice(2,None))

In [17]:
nz_regions = gpd.read_file(HOME / 'research' / 'Smart_Ideas' / 'data' / 'shapefiles' / 'NZ_regions' / 'NZ_6_regions' / 'NZ_regions_corrected.shp') 

In [18]:
nz_regions

Unnamed: 0,OBJECTID,Id,gridcode,Shape_Leng,Shape_Area,Location,geometry
0,1,1,1,85.215338,5.032753,NNI,"MULTIPOLYGON (((174.70530 -38.17377, 174.70545..."
1,2,2,2,12.336015,2.994028,WNI,"MULTIPOLYGON (((175.13516 -41.37745, 175.13507..."
2,3,3,3,14.235493,3.775388,ENI,"MULTIPOLYGON (((175.85595 -41.35970, 175.85595..."
3,4,4,4,34.656463,3.06628,NSI,"MULTIPOLYGON (((171.32620 -42.12355, 171.32602..."
4,5,5,6,20.191504,4.827228,ESI,"MULTIPOLYGON (((170.21675 -46.05955, 170.21609..."
5,6,6,5,42.941379,9.05741,WSI,"MULTIPOLYGON (((169.20749 -46.66371, 169.20742..."


In [19]:
opath_root = HOME / 'research' / 'Smart_Ideas' / 'outputs' / 'targets' / 'NZ_regions' / 'NZ_6_regions'

In [20]:
if not opath_root.exists(): 
    opath_root.mkdir(parents=True)

### defines the number of quantiles we want 

In [21]:
num_quantiles = 3

In [22]:
quant_values = np.linspace(0, 1, num_quantiles + 1, endpoint=True)

In [23]:
quant_values = quant_values[1:-1]

In [24]:
quant_values

array([0.33333333, 0.66666667])

In [25]:
col_labs = [f"Q{int(x)}" for x in (quant_values*100)]

In [26]:
col_labs

['Q33', 'Q66']

In [27]:
# f, axes = plt.subplots(nrows=3, ncols=2)
# axes = axes.flatten()

quantiles_dict = {}

for i, region_name in enumerate(['NNI','ENI','WNI','NSI','WSI','ESI']): 
    
    shape = nz_regions.query(f"Location == '{region_name}'")
    
    region = dset.salem.subset(shape=shape)

    region = region.salem.roi(shape=shape, all_touched=True)
    
#     region[var_name].isel(time=0).plot(ax=axes[i])
    
    ts = region.mean(dim=['lat','lon'])
    
    ts_df = ts[var_name].to_dataframe()
            
    ts_series = ts_df.loc[:,var_name]
    
    ts_series_cat = []
    
    quantiles_list = []
    
    for month in range(1, 13):
        
        ts_series_m = ts_series[ts_series.index.month == month]
        
        clim = ts_series_m.loc['1981':'2010']
        
        quantiles = [clim.quantile(q=q) for q in quant_values.tolist()]
        
        quantiles_list.append(quantiles.copy())
        
        quantiles.insert(0, -np.inf)
        
        quantiles.append(np.inf)
        
        ts_series_m_cats = pd.cut(ts_series_m, quantiles, labels=list(range(1, num_quantiles + 1)))
        
        ts_series_cat.append(ts_series_m_cats)
        
        del(quantiles)
     
    quantiles_dict[region_name]  = np.array(quantiles_list)
    
    ts_series_cat = pd.concat(ts_series_cat, axis=0)
    
    ts_series_cat = ts_series_cat.sort_index()
    
    ts_df.loc[:,f'cat_{num_quantiles}'] = ts_series_cat
    
    ts_df.loc[:,'anomalies'] = ts_df.loc[:,var_name].groupby(ts_df.index.month).apply(demean)
    
    opath = opath_root / big_var / region_name 
    
    if not opath.exists(): 
        opath.mkdir(parents=True)
        
    ts_df.to_csv(opath / f'TS_NZ_region_{region_name}_{big_var}_{num_quantiles}_quantiles_anoms.csv')
    
    #descriptive statistics per quantile category 
    
    ts_df.groupby(ts_df.loc[:,f'cat_{num_quantiles}']).describe().to_csv(opath / f'descriptive_stats_{region_name}__{big_var}_{num_quantiles}.csv')
    
    print(f"region {region_name} processed for variable {big_var}")
    

region NNI processed for variable RAIN
region ENI processed for variable RAIN
region WNI processed for variable RAIN
region NSI processed for variable RAIN
region WSI processed for variable RAIN
region ESI processed for variable RAIN


### saves the climatological terciles calculated from the VCSN regional aggregates

In [28]:
quantiles_list = []
for region_name in ['NNI','ENI','WNI','NSI','WSI','ESI']: 
    df = pd.DataFrame(quantiles_dict[region_name])
    df.index = range(1, 13)
    df.index.name = 'season'
    df.columns = pd.MultiIndex.from_product([[region_name],col_labs])
    quantiles_list.append(df)

In [29]:
quantiles_df = pd.concat(quantiles_list, axis=1)

In [30]:
quantiles_df

Unnamed: 0_level_0,NNI,NNI,ENI,ENI,WNI,WNI,NSI,NSI,WSI,WSI,ESI,ESI
Unnamed: 0_level_1,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66
season,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,240.299546,278.529469,265.411789,307.37686,330.177509,377.324029,414.024039,501.015876,622.472699,773.931314,152.919959,180.243479
2,208.414508,279.729167,235.260423,284.496569,280.65353,343.018331,371.357113,432.859094,589.754267,707.41307,146.467965,175.910272
3,217.093619,277.925908,240.17305,302.892519,242.881341,314.612834,331.816089,411.656688,530.911738,644.961253,137.784267,167.271257
4,235.801031,300.832563,248.861859,328.553242,251.985956,311.712658,336.380184,421.582449,531.844084,599.625668,138.677794,171.876955
5,260.802915,321.998845,278.948414,355.041013,303.589914,345.130635,361.74284,467.804978,571.61863,683.189705,140.414387,176.716967
6,331.203163,384.842877,310.147596,398.181329,369.926425,425.016314,443.447936,517.646237,559.705826,671.157257,136.499035,166.867917
7,359.751829,436.828122,365.192579,439.14634,406.718835,479.052824,442.162067,543.142652,560.58864,655.220968,138.438272,177.386182
8,372.300646,454.474422,374.852164,474.410912,403.485174,507.590066,467.729087,547.045742,539.477812,618.242451,139.523552,175.198259
9,353.006064,429.441543,360.959266,432.599541,397.529148,483.466389,440.142368,586.402972,536.917401,644.072579,128.072254,174.52151
10,308.492464,372.854084,307.653262,368.606942,408.646672,478.449823,502.794527,593.916678,589.867466,701.939899,134.134995,174.366936


In [31]:
quantiles_df.to_csv(opath.parent / f'Climatological_quantiles_{num_quantiles}_cat_{big_var}.csv')