# calculates the seasonal anomalies and the seasonal tercile categories from the gridded VCSN monthly files, using the NZ 6 regions shapefiles 

In [1]:
# Paramaters 

var_name = 'Tmax_N'

In [2]:
import os
import sys
import pathlib

In [3]:
%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
from scipy.spatial import cKDTree
from itertools import product

In [4]:
import salem
import geopandas as gpd

In [5]:
import xarray as xr

### function to calculate the anomalies with respect to the 1981 - 2010 climatology 

In [6]:
def demean(x): 
    return x - x.loc['1981':'2010',].mean()

In [7]:
var_name

'Tmin_N'

In [8]:
big_var = var_name.split('_')[0].upper()

In [9]:
HOME = pathlib.Path.home()

In [10]:
dpath = HOME / 'operational/VCSN/data/NC/MONTHLY/' / var_name.upper()

In [11]:
var_name.upper()

'TMIN_N'

In [12]:
dset = salem.open_xr_dataset(dpath / f'VCSN_gridded_{var_name}_1979-01_2019-12.nc') 

In [13]:
dset

### calculates the seasonal average (or sum if Rain_bc is the variable )

In [14]:
if var_name == 'Rain_bc': 
    dset = dset.rolling(time=3, min_periods=3).sum()
else: 
    dset = dset.rolling(time=3, min_periods=3).mean()

In [15]:
dset = dset.isel(time=slice(2,None))

In [16]:
nz_regions = gpd.read_file(HOME / 'research' / 'Smart_Ideas' / 'data' / 'shapefiles' / 'NZ_regions' / 'NZ_regions_corrected.shp') 

In [17]:
nz_regions

Unnamed: 0,OBJECTID,Id,gridcode,Shape_Leng,Shape_Area,Location,geometry
0,1,1,1,85.215338,5.032753,NNI,"MULTIPOLYGON (((174.70530 -38.17377, 174.70545..."
1,2,2,2,12.336015,2.994028,WNI,"MULTIPOLYGON (((175.13516 -41.37745, 175.13507..."
2,3,3,3,14.235493,3.775388,ENI,"MULTIPOLYGON (((175.85595 -41.35970, 175.85595..."
3,4,4,4,34.656463,3.06628,NSI,"MULTIPOLYGON (((171.32620 -42.12355, 171.32602..."
4,5,5,6,20.191504,4.827228,ESI,"MULTIPOLYGON (((170.21675 -46.05955, 170.21609..."
5,6,6,5,42.941379,9.05741,WSI,"MULTIPOLYGON (((169.20749 -46.66371, 169.20742..."


In [18]:
opath_root = HOME / 'research' / 'Smart_Ideas' / 'outputs' / 'targets' / 'NZ_regions'

### defines the number of quantiles we want 

In [19]:
num_quantiles = 3

In [20]:
quant_values = np.linspace(0, 1, num_quantiles + 1, endpoint=True)

In [21]:
quant_values = quant_values[1:-1]

In [22]:
quant_values

array([0.33333333, 0.66666667])

In [23]:
col_labs = [f"Q{int(x)}" for x in (quant_values*100)]

In [24]:
col_labs

['Q33', 'Q66']

In [25]:
# f, axes = plt.subplots(nrows=3, ncols=2)
# axes = axes.flatten()

quantiles_dict = {}

for i, region_name in enumerate(['NNI','ENI','WNI','NSI','WSI','ESI']): 
    
    shape = nz_regions.query(f"Location == '{region_name}'")
    
    region = dset.salem.subset(shape=shape)

    region = region.salem.roi(shape=shape, all_touched=True)
    
#     region[var_name].isel(time=0).plot(ax=axes[i])
    
    ts = region.mean(dim=['lat','lon'])
    
    ts_df = ts[var_name].to_dataframe()
            
    ts_series = ts_df.loc[:,var_name]
    
    ts_series_cat = []
    
    quantiles_list = []
    
    for month in range(1, 13):
        
        ts_series_m = ts_series[ts_series.index.month == month]
        
        clim = ts_series_m.loc['1981':'2010']
        
        quantiles = [clim.quantile(q=q) for q in quant_values.tolist()]
        
        quantiles_list.append(quantiles.copy())
        
        quantiles.insert(0, -np.inf)
        
        quantiles.append(np.inf)
        
        ts_series_m_cats = pd.cut(ts_series_m, quantiles, labels=list(range(1, num_quantiles + 1)))
        
        ts_series_cat.append(ts_series_m_cats)
        
        del(quantiles)
     
    quantiles_dict[region_name]  = np.array(quantiles_list)
    
    ts_series_cat = pd.concat(ts_series_cat, axis=0)
    
    ts_series_cat = ts_series_cat.sort_index()
    
    ts_df.loc[:,f'cat_{num_quantiles}'] = ts_series_cat
    
    ts_df.loc[:,'anomalies'] = ts_df.loc[:,var_name].groupby(ts_df.index.month).apply(demean)
    
    opath = opath_root / big_var / region_name 
    
    if not opath.exists(): 
        opath.mkdir(parents=True)
        
    ts_df.to_csv(opath / f'TS_NZ_region_{region_name}_{big_var}_{num_quantiles}_quantiles_anoms.csv')

In [26]:
opath

PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/targets/NZ_regions/TMIN/ESI')

### descriptive statistics per quantile category 

In [28]:
ts_df.groupby(ts_df.loc[:,f'cat_{num_quantiles}']).describe()

Unnamed: 0_level_0,Tmin_N,Tmin_N,Tmin_N,Tmin_N,Tmin_N,Tmin_N,Tmin_N,Tmin_N,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
cat_3,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
1,142.0,3.342237,3.064059,-1.292898,0.416693,3.262845,6.321358,8.097552,142.0,-0.638409,0.337881,-1.946637,-0.78942,-0.587705,-0.390996,-0.155974
2,159.0,3.952612,3.097816,-0.823625,1.608417,3.994755,6.855854,8.678777,159.0,0.000395,0.182356,-0.506556,-0.125788,-0.001974,0.151964,0.322817
3,189.0,4.553926,3.193595,-0.213385,1.22826,4.661782,7.382531,10.269038,189.0,0.711017,0.359114,0.191316,0.421492,0.632335,0.954141,2.283398


### saves the climatological terciles calculated from the VCSN regional aggregates

In [33]:
quantiles_list = []
for region_name in ['NNI','ENI','WNI','NSI','WSI','ESI']: 
    df = pd.DataFrame(quantiles_dict[region_name])
    df.index = range(1, 13)
    df.index.name = 'season'
    df.columns = pd.MultiIndex.from_product([[region_name],col_labs])
    quantiles_list.append(df)

In [34]:
quantiles_df = pd.concat(quantiles_list, axis=1)

In [35]:
quantiles_df.to_csv(opath / f'Climatological_quantiles_{num_quantiles}_cat_{big_var}.csv')

In [36]:
quantiles_df

Unnamed: 0_level_0,NNI,NNI,ENI,ENI,WNI,WNI,NSI,NSI,WSI,WSI,ESI,ESI
Unnamed: 0_level_1,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66,Q33,Q66
season,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,11.516223,12.109048,9.856536,10.553417,9.533938,9.999897,7.621007,8.15433,6.145022,6.7313,6.944198,7.612741
2,12.639627,13.419392,11.190426,11.68155,10.824943,11.350316,8.829933,9.356988,7.248262,7.772681,8.200124,8.688083
3,12.585074,13.210424,10.904919,11.305773,10.366581,11.020865,8.547318,9.255278,7.072506,7.543045,7.902332,8.279801
4,11.388393,12.07622,9.608146,9.89246,9.068532,9.627204,7.255774,7.799666,5.709272,6.13874,6.267179,6.775882
5,9.475544,10.318611,7.339551,8.120237,7.161288,7.740512,5.309367,5.852381,3.807051,4.386944,3.980672,4.555353
6,7.652583,7.983185,5.425689,5.998115,5.361454,5.808841,3.100452,3.561086,1.665419,2.166541,1.590907,1.9917
7,5.804997,6.408428,3.891522,4.489663,3.649837,4.386715,1.191843,1.920403,-0.106561,0.411596,-0.284084,0.17896
8,5.172896,5.762249,3.312354,3.815831,3.230375,3.720983,0.589611,1.121071,-0.713333,-0.187846,-0.853196,-0.25672
9,5.645431,6.24143,3.866058,4.316897,3.673118,4.222692,1.305102,1.732145,-0.203557,0.45692,0.050943,0.546799
10,6.818699,7.382453,4.954365,5.39397,4.966552,5.406251,2.645582,3.076779,1.349065,1.742883,1.718465,2.138675
