# calculates the seasonal anomalies and the seasonal tercile categories from the gridded VCSN monthly files, using the NZ 6 regions shapefiles 

In [1]:
# Paramaters 

var_name = 'Tmin_N'

In [2]:
import os
import sys
import pathlib

In [3]:
%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
from scipy.spatial import cKDTree
from itertools import product

In [4]:
import salem
import geopandas as gpd

In [5]:
import xarray as xr

### function to calculate the anomalies with respect to the 1981 - 2010 climatology 

In [6]:
def demean(x): 
    return x - x.loc['1981':'2010',].mean()

In [7]:
var_name

'Rain_bc'

In [8]:
big_var = var_name.split('_')[0].upper()

In [9]:
HOME = pathlib.Path.home()

In [10]:
dpath = HOME / 'operational/VCSN/data/NC/MONTHLY/' / var_name.upper()

In [11]:
var_name.upper()

'RAIN_BC'

In [12]:
dset = salem.open_xr_dataset(dpath / f'VCSN_gridded_{var_name}_1979-01_2019-12.nc') 

In [13]:
dset

### calculates the seasonal average (or sum if Rain_bc is the variable )

In [14]:
if var_name == 'Rain_bc': 
    dset = dset.rolling(time=3, min_periods=3).sum()
else: 
    dset = dset.rolling(time=3, min_periods=3).mean()

In [15]:
dset = dset.isel(time=slice(2,None))

In [16]:
nz_regions = gpd.read_file(HOME / 'research' / 'Smart_Ideas' / 'data' / 'shapefiles' / 'NZ_regions' / 'NZ_regions_corrected.shp') 

In [17]:
nz_regions

Unnamed: 0,OBJECTID,Id,gridcode,Shape_Leng,Shape_Area,Location,geometry
0,1,1,1,85.215338,5.032753,NNI,"MULTIPOLYGON (((174.70530 -38.17377, 174.70545..."
1,2,2,2,12.336015,2.994028,WNI,"MULTIPOLYGON (((175.13516 -41.37745, 175.13507..."
2,3,3,3,14.235493,3.775388,ENI,"MULTIPOLYGON (((175.85595 -41.35970, 175.85595..."
3,4,4,4,34.656463,3.06628,NSI,"MULTIPOLYGON (((171.32620 -42.12355, 171.32602..."
4,5,5,6,20.191504,4.827228,ESI,"MULTIPOLYGON (((170.21675 -46.05955, 170.21609..."
5,6,6,5,42.941379,9.05741,WSI,"MULTIPOLYGON (((169.20749 -46.66371, 169.20742..."


In [18]:
opath_root = HOME / 'research' / 'Smart_Ideas' / 'outputs' / 'targets' / 'NZ_regions'

In [19]:
# f, axes = plt.subplots(nrows=3, ncols=2)
# axes = axes.flatten()

quantiles_dict = {}

for i, region_name in enumerate(['NNI','ENI','WNI','NSI','WSI','ESI']): 
    
    shape = nz_regions.query(f"Location == '{region_name}'")
    
    region = dset.salem.subset(shape=shape)

    region = region.salem.roi(shape=shape, all_touched=True)
    
#     region[var_name].isel(time=0).plot(ax=axes[i])
    
    ts = region.mean(dim=['lat','lon'])
    
    ts_df = ts[var_name].to_dataframe()
            
    ts_series = ts_df.loc[:,var_name]
    
    ts_series_cat = []
    
    quantiles_list = []
    
    for month in range(1, 13):
        
        ts_series_m = ts_series[ts_series.index.month == month]
        
        clim = ts_series_m.loc['1981':'2010']
        
        quantiles = [clim.quantile(q=q) for q in [0.33333,0.66666]]
        
        quantiles_list.append(quantiles.copy())
        
        quantiles.insert(0, -np.inf)
        
        quantiles.append(np.inf)
        
        ts_series_m_cats = pd.cut(ts_series_m, quantiles, labels=[-1,0,1])
        
        ts_series_cat.append(ts_series_m_cats)
        
        del(quantiles)
     
    quantiles_dict[region_name]  = np.array(quantiles_list)
    
    ts_series_cat = pd.concat(ts_series_cat, axis=0)
    
    ts_series_cat = ts_series_cat.sort_index()
    
    ts_df.loc[:,'terciles_cat'] = ts_series_cat
    
    ts_df.loc[:,'anomalies'] = ts_df.loc[:,var_name].groupby(ts_df.index.month).apply(demean)
    
    opath = opath_root / big_var / region_name 
    
    if not opath.exists(): 
        opath.mkdir(parents=True)
        
    ts_df.to_csv(opath / f'TS_NZ_region_{region_name}_{big_var}_terciles_anoms.csv')

In [20]:
ts_df.groupby(ts_df.terciles_cat).describe()

Unnamed: 0_level_0,Rain_bc,Rain_bc,Rain_bc,Rain_bc,Rain_bc,Rain_bc,Rain_bc,Rain_bc,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies,anomalies
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
terciles_cat,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
-1,154.0,117.402219,18.972699,42.879867,105.607942,119.507194,130.302896,153.223789,154.0,-47.754742,17.661567,-112.664598,-57.063392,-46.257072,-34.132219,-12.486988
0,164.0,157.883271,13.001586,128.443637,149.219148,156.757716,167.226769,196.318566,164.0,-7.416635,10.972425,-32.08578,-14.525184,-7.766492,-0.163727,16.036227
1,172.0,220.963094,36.672879,169.135755,191.163877,211.639625,243.095145,326.268424,172.0,56.062165,36.552519,7.492502,26.852295,47.438019,79.099687,170.723959


### saves the climatological terciles calculated from the VCSN regional aggregates

In [45]:
quantiles_list = []
for region_name in ['NNI','ENI','WNI','NSI','WSI','ESI']: 
    df = pd.DataFrame(quantiles_dict[region_name])
    df.index = range(1, 13)
    df.index.name = 'month'
    df.columns = pd.MultiIndex.from_tuples([(region_name,'T1'),(region_name,'T2')])
    quantiles_list.append(df)

In [46]:
quantiles_df = pd.concat(quantiles_list, axis=1)

In [51]:
quantiles_df.to_csv(opath / f'Climatological_terciles_{big_var}.csv')