In [1]:
!pwd

/home/nicolasf/research/Smart_Ideas/code/processors/CDS/notebooks


In [2]:
%matplotlib inline

### os 
import os 
import sys

### datetimes 
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr

### plotting 
from matplotlib import pyplot as plt


In [3]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [4]:
dict_vars = {}
dict_vars['PRECIP'] = 'precip'
dict_vars['SST'] = 'sst'
dict_vars['T2M'] = 't2m'
dict_vars['Z850'] = 'z'

In [5]:
GCM = 'ECMWF'
varname = 'Z850'
period = 'seasonal'
clim = '1981_2010'
step = 3

In [6]:
dpath = pathlib.Path(f'/media/nicolasf/END19101/data/GCMs/processed/CDS/{GCM}')

In [7]:
lfiles = list(dpath.joinpath(varname).glob(f"CDS_{GCM}_{varname}_????-??_{period}_anomalies_{clim}_clim.nc")) 

In [8]:
lfiles.sort()

In [9]:
len(lfiles) 

0

In [10]:
lfiles[0]

IndexError: list index out of range

In [11]:
lfiles[-1]

IndexError: list index out of range

In [None]:
def preprocess(dset): 
    return dset.expand_dims(dim={'init_time':[dset.init_time.data]}, axis=0)

In [None]:
dset = xr.open_mfdataset(lfiles, preprocess=preprocess, concat_dim='init_time', parallel=True)

In [None]:
dset

In [None]:
dset = dset.sel(step=step)

In [None]:
dset

In [None]:
index = dset['init_time'].to_index()

In [None]:
index = index + pd.offsets.MonthBegin(n=step)

In [None]:
index

In [None]:
dset = dset.rename({"init_time":"time"})

In [None]:
dset['time'] = index

In [None]:
dset

In [None]:
dset_m = dset.mean('member')

In [None]:
dset_m

In [None]:
df = dset_m[dict_vars[varname]].to_dataframe()

In [None]:
df.head()

In [None]:
df = df.loc[:,dict_vars[varname]]

In [None]:
df_unstacked = df.unstack(level=['lon','lat'])

In [None]:
df_unstacked.shape

In [None]:
df_unstacked.head()

In [None]:
df_unstacked = df_unstacked.dropna(axis=1)

In [None]:
df_unstacked.head()

In [None]:
df_unstacked.index = dset_m.time.to_index()

In [None]:
df_unstacked.index.name = 'time'

In [None]:
df_unstacked.head()

In [None]:
df_unstacked.tail()

### restrict to 2019

In [None]:
df_unstacked = df_unstacked.loc[:'2019',:]

In [None]:
df_unstacked.head()

In [None]:
df_unstacked.tail()

In [None]:
df_unstacked.to_csv(HOME.joinpath(f"tmp/NIWA_project/new_outputs/{varname}_1981_2019_{GCM}.csv")) 

In [None]:
print(f"conversion done for GCM {GCM}, variable {varname}\n")

### read the target variables 

In [None]:
for target_var in ['RAIN','TMEAN']: 
    
    dpath_target = pathlib.Path(f'/media/nicolasf/END19101/outputs/targets/NZ_regions/NZ_6_regions/{target_var}') 
    
    filename = f'NZ_6_regions_{period}_{target_var}_terciles_and_quintiles.nc'
    
    dset_target = xr.open_dataset(dpath_target.joinpath(filename))
    
    dset_target['time'] = dset_target.time.to_index() - pd.offsets.MonthBegin(1)
    
    dset_target = dset_target.sel(time=slice(df_unstacked.index[0], df_unstacked.index[-1]))
    
    for region in dset_target.coords['region']:
        print(f"extracting {str(region.values)}\n")
        dset_region = dset_target.sel(region=region)
        df_region = dset_region.to_dataframe()
        df_region = df_region.iloc[:,[0,1,4,2,3]]
    #     print(", ".join(df_region.columns)) 
        if target_var == 'RAIN': 
            df_region.to_csv(HOME.joinpath(f"tmp/NIWA_project/new_outputs/Target_Rain_{str(region.values)}_regional_ave_time_series.csv"))
        elif target_var == 'TMEAN': 
            df_region.to_csv(HOME.joinpath(f"tmp/NIWA_project/new_outputs/Target_TMean_{str(region.values)}_regional_ave_time_series.csv"))

        ### counting the number of labels in each class for terciles and quintiles categories 
        terciles_counts = list(map(str, df_region.cat_3.value_counts(sort=False).values.tolist())) 
        print(f"cat_3 counts for region {str(region.values)}: {', '.join(terciles_counts)}\n")

        quintiles_counts = list(map(str, df_region.cat_5.value_counts(sort=False).values.tolist())) 
        print(f"cat_5 counts for region {str(region.values)}: {', '.join(quintiles_counts)}\n")

        ### plots the anomalies 
        f, ax = plt.subplots()
        df_region.anomalies.plot(title=f"{str(region.values)}: {target_var}", ax=ax, kind='area', stacked=False, color='0.2'); 
        ax.grid(ls=':')