### calculate climatologies and anomalies from the dataset of interpolated, seasonally averaged IRI-DL GCMs
-------------------------------------------------------------------------------------------------------

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
from datetime import datetime, timedelta

In [3]:
import numpy as np
import pandas as pd

In [4]:
import xarray as xr; print(xr.__version__)

0.14.1


In [5]:
import dask

In [6]:
client = dask.distributed.Client()

In [7]:
from dask.diagnostics import ProgressBar

In [8]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:46795  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 12  Memory: 134.95 GB


In [9]:
import pathlib

In [10]:
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [11]:
HOME = pathlib.Path.home()

In [12]:
provider = 'IRI'

In [13]:
var_name = 'SST'

In [14]:
# GCM = 'GEM_NEMO'
GCM = 'CanCM4i'

In [15]:
dpath = HOME / 'research' / 'Smart_Ideas' / 'data' / 'GCMs' / 'NC' / GCM / var_name

In [16]:
dpath

PosixPath('/home/nicolasf/research/Smart_Ideas/data/GCMs/NC/CanCM4i/SST')

In [17]:
lfiles = list(dpath.glob("*seasonal_interp.nc"))

In [18]:
def sf(x): 
    x = x.name 
    #     x = x.split('_')[-2] + x.split('_')[-1].split('.nc')[0].zfill(2) + '01'
    x = x.split('_')[2] + (x.split('_')[3]).zfill(2) + '01'
    return int(x)

In [19]:
def sfdates(x): 
    from datetime import datetime
    x = x.name 
    d = datetime(int(x.split('_')[-2]), int(x.split('_')[-1].split('.nc')[0]), 1)
    return d

In [20]:
lfiles.sort(key=sf)

In [21]:
len(lfiles)

0

In [None]:
print(lfiles[0])

In [None]:
print(lfiles[-1])

In [None]:
dset = xr.open_mfdataset(lfiles, combine='by_coords')

In [None]:
dset.time

In [None]:
dset

In [None]:
dset.time[0]

In [None]:
dset.time[-1]

In [None]:
clim = dset.sel(time=slice('1993','2016'))

In [None]:
len(clim.time)

In [None]:
clim = clim.groupby(clim.time.dt.month).mean('time')

In [None]:
clim

In [None]:
(2016 - 1993 + 1) * 12

In [None]:
clim_path = (dpath / 'CLIMATOLOGY')

In [None]:
if not clim_path.exists(): 
    clim_path.mkdir(parents=True)

In [None]:
var_name

In [None]:
clim

In [None]:
clim.to_netcdf(clim_path / f"CLIMATOLOGY_{var_name}_{GCM}_1993_2016.nc")

### calculates the anomalies with respect to the above climatology 

In [None]:
anoms = dset.groupby(dset.time.dt.month) - clim

In [None]:
anoms

### drops the month variable 

In [None]:
anoms = anoms.drop('month')

### add back the forecast valid time variable 

In [None]:
anoms['forecast_valid_time'] = dset['forecast_valid_time']

### Now saves the anomalies in the same folder 

In [None]:
anoms[var_name.lower()][0,2,:,:].plot()

## saves in netcdf with pattern: 
    
```
{var_name}_hindcasts_YYYY_M~GCM~_seasonal_interp.nc
```

In [None]:
anoms

In [None]:
dpath

In [None]:
for t in range(len(anoms.time)): 
    sub = anoms.isel(time=t)
    year = sub.time.dt.year.data.flatten()[0]
    month = sub.time.dt.month.data.flatten()[0]
    print(f"{var_name}_hindcasts_{year}_{month}_{GCM}_seasonal_anomalies_interp.nc") 
    sub.to_netcdf(dpath / f"{var_name}_hindcasts_{year}_{month}_{GCM}_seasonal_anomalies_interp.nc")
    sub.close()