In [1]:
%matplotlib inline

In [2]:
import pathlib

In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
from matplotlib import pyplot as plt

In [6]:
import numpy as np
import xarray as xr
from cartopy import crs as ccrs

In [7]:
def regrid(ds_in, target_ds,  method='bilinear'):
    import xesmf as xe
    """Convenience function for one-time regridding"""
    regridder = xe.Regridder(ds_in, target_ds, method, periodic=True)
    ds_out = regridder(ds_in)
    regridder.clean_weight_file()
    return ds_out

In [8]:
def make_forecast_valid_time(dset, nsteps): 
    from dateutil.relativedelta import relativedelta
    import pandas as pd
    import numpy as np
    time = dset.time.to_series()
    tv = [x + relativedelta(months=m + 2) for m in range(1, nsteps + 1) for x in time]
    tv = np.array(tv)
    tv = tv.reshape((len(tv) // nsteps,nsteps))
    dset['forecast_valid_time'] = (('time','step'), tv)
    return dset

In [9]:
xr.set_options(display_style="html")

<xarray.core.options.set_options at 0x7faf43396450>

In [10]:
HOME = pathlib.Path.home()

In [11]:
HOME

PosixPath('/home/nicolasf')

### read the grid for interpolation 

In [12]:
out_grid = xr.open_dataset(HOME / 'drives' / 'auck_projects' / 'END19101' / 'Working' / 'data' / 'GCMs' / 'target_grid' / 'target_grid_2.5x2.5.nc') 

In [13]:
out_grid

### parameters definition here 

In [14]:
# Parameters 
stage_in = 'pre_processed'
stage_out = 'processed'
period = 'hindcasts'
provider = 'CDS'
GCM = 'ECMWF'
var_name = 'T2M'

In [15]:
# Parameters
stage_in = "pre_processed"
stage_out = "processed"
period = "hindcasts"
provider = "CDS"
GCM = "ECMWF"
var_name = "PRECIP"


----- 

In [16]:
ipath = HOME / 'drives' / 'auck_projects' / 'END19101' / 'Working' / 'data' / 'GCMs' / stage_in / period / provider / GCM / var_name

In [17]:
opath = HOME / 'drives' / 'auck_projects' / 'END19101' / 'Working' / 'data' / 'GCMs' / stage_out / period / provider / GCM / var_name

In [18]:
clim_path = opath / 'CLIMATOLOGY' 

In [19]:
if not clim_path.exists(): 
    clim_path.mkdir(parents=True)

In [20]:
lfiles = list(ipath.glob("*.nc"))

In [21]:
lfiles.sort()

In [22]:
lfiles[0]

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/hindcasts/CDS/ECMWF/PRECIP/ECMWF_PRECIP_1993_01.nc')

In [23]:
lfiles[-1]

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/hindcasts/CDS/ECMWF/PRECIP/ECMWF_PRECIP_2016_12.nc')

In [24]:
len(lfiles)

288

In [25]:
dset = xr.open_dataset(lfiles[0])

In [26]:
dset

In [27]:
dset_m = xr.open_mfdataset(lfiles, combine='nested', concat_dim='time')

In [28]:
dset_m

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,1148 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 11.52 kB 40 B Shape (288, 5) (1, 5) Count 1148 Tasks 288 Chunks Type datetime64[ns] numpy.ndarray",5  288,

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,1148 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,1152 Tasks,288 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.38 GB 32.58 MB Shape (288, 25, 5, 181, 360) (1, 25, 5, 181, 360) Count 1152 Tasks 288 Chunks Type float32 numpy.ndarray",25  288  360  181  5,

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,1152 Tasks,288 Chunks
Type,float32,numpy.ndarray


In [29]:
dset_m.nbytes / 1e9

9.3830584

In [30]:
if GCM == 'UKMO': 
    dset_m = dset_m.sel(lat=slice(-89.5, None))

### calculate the monthly climatology 

In [31]:
clim_m = dset_m.sel(time=slice('1993','2016'))

### can calculate the lead-time dependent climatology from the ensemble mean 

In [32]:
clim_m = clim_m.mean('member')

In [33]:
clim_m = clim_m.groupby(clim_m.time.dt.month).mean() 

In [34]:
clim_m

Unnamed: 0,Array,Chunk
Bytes,15.64 MB,1.30 MB
Shape,"(12, 5, 181, 360)","(1, 5, 181, 360)"
Count,2436 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 15.64 MB 1.30 MB Shape (12, 5, 181, 360) (1, 5, 181, 360) Count 2436 Tasks 12 Chunks Type float32 numpy.ndarray",12  1  360  181  5,

Unnamed: 0,Array,Chunk
Bytes,15.64 MB,1.30 MB
Shape,"(12, 5, 181, 360)","(1, 5, 181, 360)"
Count,2436 Tasks,12 Chunks
Type,float32,numpy.ndarray


### saves to disk in native grid 

In [35]:
clim_m.to_netcdf(clim_path / f'monthly_climatology_1993_2016_{GCM}_{var_name}.nc') 

### now interpolate 

In [36]:
clim_m_regrid = regrid(clim_m, out_grid)

Create weight file: bilinear_181x360_73x144_peri.nc


using dimensions ('lat', 'lon') from data variable precip as the horizontal dimensions for this dataset.
Remove file bilinear_181x360_73x144_peri.nc


In [37]:
clim_m_regrid

Unnamed: 0,Array,Chunk
Bytes,5.05 MB,420.48 kB
Shape,"(12, 5, 73, 144)","(1, 5, 73, 144)"
Count,2448 Tasks,12 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.05 MB 420.48 kB Shape (12, 5, 73, 144) (1, 5, 73, 144) Count 2448 Tasks 12 Chunks Type float64 numpy.ndarray",12  1  144  73  5,

Unnamed: 0,Array,Chunk
Bytes,5.05 MB,420.48 kB
Shape,"(12, 5, 73, 144)","(1, 5, 73, 144)"
Count,2448 Tasks,12 Chunks
Type,float64,numpy.ndarray


In [38]:
clim_m.to_netcdf(clim_path / f'monthly_climatology_1993_2016_{GCM}_{var_name}_interp.nc') 

### now calculates the seasonal averages 

In [39]:
dset_s = dset_m.rolling(step=3, min_periods=3).mean()

In [40]:
dset_s = dset_s.sel(step=slice(3, None))

In [41]:
dset_s.nbytes / 1e9

5.629837776

In [42]:
dset_s.valid_time.load()

### now calculate the seasonal climatology 

In [43]:
clim_s = dset_s.sel(time=slice('1993','2016')).mean('member')

In [44]:
clim_s = clim_s.groupby(clim_s.time.dt.month).mean()

In [45]:
clim_s

Unnamed: 0,Array,Chunk
Bytes,9.38 MB,781.92 kB
Shape,"(12, 3, 181, 360)","(1, 3, 181, 360)"
Count,22884 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.38 MB 781.92 kB Shape (12, 3, 181, 360) (1, 3, 181, 360) Count 22884 Tasks 12 Chunks Type float32 numpy.ndarray",12  1  360  181  3,

Unnamed: 0,Array,Chunk
Bytes,9.38 MB,781.92 kB
Shape,"(12, 3, 181, 360)","(1, 3, 181, 360)"
Count,22884 Tasks,12 Chunks
Type,float32,numpy.ndarray


In [46]:
clim_s

Unnamed: 0,Array,Chunk
Bytes,9.38 MB,781.92 kB
Shape,"(12, 3, 181, 360)","(1, 3, 181, 360)"
Count,22884 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.38 MB 781.92 kB Shape (12, 3, 181, 360) (1, 3, 181, 360) Count 22884 Tasks 12 Chunks Type float32 numpy.ndarray",12  1  360  181  3,

Unnamed: 0,Array,Chunk
Bytes,9.38 MB,781.92 kB
Shape,"(12, 3, 181, 360)","(1, 3, 181, 360)"
Count,22884 Tasks,12 Chunks
Type,float32,numpy.ndarray


### saves to disk 

In [47]:
clim_s.to_netcdf(clim_path / f'seasonal_climatology_1993_2016_{GCM}_{var_name}.nc') 

### interpolate 

In [48]:
clim_s_regrid = regrid(clim_s, out_grid)

Create weight file: bilinear_181x360_73x144_peri.nc


using dimensions ('lat', 'lon') from data variable precip as the horizontal dimensions for this dataset.
Remove file bilinear_181x360_73x144_peri.nc


In [49]:
clim_s_regrid.to_netcdf(clim_path / f'seasonal_climatology_1993_2016_{GCM}_{var_name}_interp.nc') 

### now calculates the anomalies 

### monthly anomalies 

In [50]:
anoms_monthly = dset_m.groupby(dset_m.time.dt.month) - clim_m

In [51]:
anoms_monthly

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,2012 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 11.52 kB 40 B Shape (288, 5) (1, 5) Count 2012 Tasks 288 Chunks Type datetime64[ns] numpy.ndarray",5  288,

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,2012 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,3612 Tasks,288 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.38 GB 32.58 MB Shape (288, 25, 5, 181, 360) (1, 25, 5, 181, 360) Count 3612 Tasks 288 Chunks Type float32 numpy.ndarray",25  288  360  181  5,

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,3612 Tasks,288 Chunks
Type,float32,numpy.ndarray


### seasonal anomalies 

In [52]:
anoms_seasonal = dset_s.groupby(dset_s.time.dt.month) - clim_s

### regrid 

In [53]:
anoms_monthly_regrid = regrid(anoms_monthly[[var_name.lower()]], out_grid)

Create weight file: bilinear_181x360_73x144_peri.nc


using dimensions ('lat', 'lon') from data variable precip as the horizontal dimensions for this dataset.
Remove file bilinear_181x360_73x144_peri.nc


In [54]:
anoms_seasonal_regrid = regrid(anoms_seasonal[[var_name.lower()]], out_grid)

Create weight file: bilinear_181x360_73x144_peri.nc


using dimensions ('lat', 'lon') from data variable precip as the horizontal dimensions for this dataset.
Remove file bilinear_181x360_73x144_peri.nc


In [55]:
anoms_monthly

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,2012 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 11.52 kB 40 B Shape (288, 5) (1, 5) Count 2012 Tasks 288 Chunks Type datetime64[ns] numpy.ndarray",5  288,

Unnamed: 0,Array,Chunk
Bytes,11.52 kB,40 B
Shape,"(288, 5)","(1, 5)"
Count,2012 Tasks,288 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,3612 Tasks,288 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.38 GB 32.58 MB Shape (288, 25, 5, 181, 360) (1, 25, 5, 181, 360) Count 3612 Tasks 288 Chunks Type float32 numpy.ndarray",25  288  360  181  5,

Unnamed: 0,Array,Chunk
Bytes,9.38 GB,32.58 MB
Shape,"(288, 25, 5, 181, 360)","(1, 25, 5, 181, 360)"
Count,3612 Tasks,288 Chunks
Type,float32,numpy.ndarray


### add the valid time to the anomalies datasets

In [56]:
anoms_monthly_regrid['valid_time'] = anoms_monthly['valid_time']

In [57]:
anoms_seasonal_regrid['valid_time'] = anoms_seasonal['valid_time']

### now saves to disk 

In [58]:
opath

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/processed/hindcasts/CDS/ECMWF/PRECIP')

In [59]:
for date in pd.to_datetime(anoms_monthly.time.data): 
    print(f"processing monthly dataset for date {date:%Y_%m}")
    sub = anoms_monthly.sel(time=date)
    sub.to_netcdf(opath / f"{GCM}_{var_name}_monthly_anomalies_{date:%Y_%m}.nc")
    sub.close() 
    sub_regrid = anoms_monthly_regrid.sel(time=date)
    sub_regrid.to_netcdf(opath / f"{GCM}_{var_name}_monthly_anomalies_interp_{date:%Y_%m}.nc")
    sub_regrid.close()

processing monthly dataset for date 1993_01


processing monthly dataset for date 1993_02


processing monthly dataset for date 1993_03


processing monthly dataset for date 1993_04


processing monthly dataset for date 1993_05


processing monthly dataset for date 1993_06


processing monthly dataset for date 1993_07


processing monthly dataset for date 1993_08


processing monthly dataset for date 1993_09


processing monthly dataset for date 1993_10


processing monthly dataset for date 1993_11


processing monthly dataset for date 1993_12


processing monthly dataset for date 1994_01


processing monthly dataset for date 1994_02


processing monthly dataset for date 1994_03


processing monthly dataset for date 1994_04


processing monthly dataset for date 1994_05


processing monthly dataset for date 1994_06


processing monthly dataset for date 1994_07


processing monthly dataset for date 1994_08


processing monthly dataset for date 1994_09


processing monthly dataset for date 1994_10


processing monthly dataset for date 1994_11


processing monthly dataset for date 1994_12


processing monthly dataset for date 1995_01


processing monthly dataset for date 1995_02


processing monthly dataset for date 1995_03


processing monthly dataset for date 1995_04


processing monthly dataset for date 1995_05


processing monthly dataset for date 1995_06


processing monthly dataset for date 1995_07


processing monthly dataset for date 1995_08


processing monthly dataset for date 1995_09


processing monthly dataset for date 1995_10


processing monthly dataset for date 1995_11


processing monthly dataset for date 1995_12


processing monthly dataset for date 1996_01


processing monthly dataset for date 1996_02


processing monthly dataset for date 1996_03


processing monthly dataset for date 1996_04


processing monthly dataset for date 1996_05


processing monthly dataset for date 1996_06


processing monthly dataset for date 1996_07


processing monthly dataset for date 1996_08


processing monthly dataset for date 1996_09


processing monthly dataset for date 1996_10


processing monthly dataset for date 1996_11


processing monthly dataset for date 1996_12


processing monthly dataset for date 1997_01


processing monthly dataset for date 1997_02


processing monthly dataset for date 1997_03


processing monthly dataset for date 1997_04


processing monthly dataset for date 1997_05


processing monthly dataset for date 1997_06


processing monthly dataset for date 1997_07


processing monthly dataset for date 1997_08


processing monthly dataset for date 1997_09


processing monthly dataset for date 1997_10


processing monthly dataset for date 1997_11


processing monthly dataset for date 1997_12


processing monthly dataset for date 1998_01


processing monthly dataset for date 1998_02


processing monthly dataset for date 1998_03


processing monthly dataset for date 1998_04


processing monthly dataset for date 1998_05


processing monthly dataset for date 1998_06


processing monthly dataset for date 1998_07


processing monthly dataset for date 1998_08


processing monthly dataset for date 1998_09


processing monthly dataset for date 1998_10


processing monthly dataset for date 1998_11


processing monthly dataset for date 1998_12


processing monthly dataset for date 1999_01


processing monthly dataset for date 1999_02


processing monthly dataset for date 1999_03


processing monthly dataset for date 1999_04


processing monthly dataset for date 1999_05


processing monthly dataset for date 1999_06


processing monthly dataset for date 1999_07


processing monthly dataset for date 1999_08


processing monthly dataset for date 1999_09


processing monthly dataset for date 1999_10


processing monthly dataset for date 1999_11


processing monthly dataset for date 1999_12


processing monthly dataset for date 2000_01


processing monthly dataset for date 2000_02


processing monthly dataset for date 2000_03


processing monthly dataset for date 2000_04


processing monthly dataset for date 2000_05


processing monthly dataset for date 2000_06


processing monthly dataset for date 2000_07


processing monthly dataset for date 2000_08


processing monthly dataset for date 2000_09


processing monthly dataset for date 2000_10


processing monthly dataset for date 2000_11


processing monthly dataset for date 2000_12


processing monthly dataset for date 2001_01


processing monthly dataset for date 2001_02


processing monthly dataset for date 2001_03


processing monthly dataset for date 2001_04


processing monthly dataset for date 2001_05


processing monthly dataset for date 2001_06


processing monthly dataset for date 2001_07


processing monthly dataset for date 2001_08


processing monthly dataset for date 2001_09


processing monthly dataset for date 2001_10


processing monthly dataset for date 2001_11


processing monthly dataset for date 2001_12


processing monthly dataset for date 2002_01


processing monthly dataset for date 2002_02


processing monthly dataset for date 2002_03


processing monthly dataset for date 2002_04


processing monthly dataset for date 2002_05


processing monthly dataset for date 2002_06


processing monthly dataset for date 2002_07


processing monthly dataset for date 2002_08


processing monthly dataset for date 2002_09


processing monthly dataset for date 2002_10


processing monthly dataset for date 2002_11


processing monthly dataset for date 2002_12


processing monthly dataset for date 2003_01


processing monthly dataset for date 2003_02


processing monthly dataset for date 2003_03


processing monthly dataset for date 2003_04


processing monthly dataset for date 2003_05


processing monthly dataset for date 2003_06


processing monthly dataset for date 2003_07


processing monthly dataset for date 2003_08


processing monthly dataset for date 2003_09


processing monthly dataset for date 2003_10


processing monthly dataset for date 2003_11


processing monthly dataset for date 2003_12


processing monthly dataset for date 2004_01


processing monthly dataset for date 2004_02


processing monthly dataset for date 2004_03


processing monthly dataset for date 2004_04


processing monthly dataset for date 2004_05


processing monthly dataset for date 2004_06


processing monthly dataset for date 2004_07


processing monthly dataset for date 2004_08


processing monthly dataset for date 2004_09


processing monthly dataset for date 2004_10


processing monthly dataset for date 2004_11


processing monthly dataset for date 2004_12


processing monthly dataset for date 2005_01


processing monthly dataset for date 2005_02


processing monthly dataset for date 2005_03


processing monthly dataset for date 2005_04


processing monthly dataset for date 2005_05


processing monthly dataset for date 2005_06


processing monthly dataset for date 2005_07


processing monthly dataset for date 2005_08


processing monthly dataset for date 2005_09


processing monthly dataset for date 2005_10


processing monthly dataset for date 2005_11


processing monthly dataset for date 2005_12


processing monthly dataset for date 2006_01


processing monthly dataset for date 2006_02


processing monthly dataset for date 2006_03


processing monthly dataset for date 2006_04


processing monthly dataset for date 2006_05


processing monthly dataset for date 2006_06


processing monthly dataset for date 2006_07


processing monthly dataset for date 2006_08


processing monthly dataset for date 2006_09


processing monthly dataset for date 2006_10


processing monthly dataset for date 2006_11


processing monthly dataset for date 2006_12


processing monthly dataset for date 2007_01


processing monthly dataset for date 2007_02


processing monthly dataset for date 2007_03


processing monthly dataset for date 2007_04


processing monthly dataset for date 2007_05


processing monthly dataset for date 2007_06


processing monthly dataset for date 2007_07


processing monthly dataset for date 2007_08


processing monthly dataset for date 2007_09


processing monthly dataset for date 2007_10


processing monthly dataset for date 2007_11


processing monthly dataset for date 2007_12


processing monthly dataset for date 2008_01


processing monthly dataset for date 2008_02


processing monthly dataset for date 2008_03


processing monthly dataset for date 2008_04


processing monthly dataset for date 2008_05


processing monthly dataset for date 2008_06


processing monthly dataset for date 2008_07


processing monthly dataset for date 2008_08


processing monthly dataset for date 2008_09


processing monthly dataset for date 2008_10


processing monthly dataset for date 2008_11


processing monthly dataset for date 2008_12


processing monthly dataset for date 2009_01


processing monthly dataset for date 2009_02


processing monthly dataset for date 2009_03


processing monthly dataset for date 2009_04


processing monthly dataset for date 2009_05


processing monthly dataset for date 2009_06


processing monthly dataset for date 2009_07


processing monthly dataset for date 2009_08


processing monthly dataset for date 2009_09


processing monthly dataset for date 2009_10


processing monthly dataset for date 2009_11


processing monthly dataset for date 2009_12


processing monthly dataset for date 2010_01


processing monthly dataset for date 2010_02


processing monthly dataset for date 2010_03


processing monthly dataset for date 2010_04


processing monthly dataset for date 2010_05


processing monthly dataset for date 2010_06


processing monthly dataset for date 2010_07


processing monthly dataset for date 2010_08


processing monthly dataset for date 2010_09


processing monthly dataset for date 2010_10


processing monthly dataset for date 2010_11


processing monthly dataset for date 2010_12


processing monthly dataset for date 2011_01


processing monthly dataset for date 2011_02


processing monthly dataset for date 2011_03


processing monthly dataset for date 2011_04


processing monthly dataset for date 2011_05


processing monthly dataset for date 2011_06


processing monthly dataset for date 2011_07


processing monthly dataset for date 2011_08


processing monthly dataset for date 2011_09


processing monthly dataset for date 2011_10


processing monthly dataset for date 2011_11


processing monthly dataset for date 2011_12


processing monthly dataset for date 2012_01


processing monthly dataset for date 2012_02


processing monthly dataset for date 2012_03


processing monthly dataset for date 2012_04


processing monthly dataset for date 2012_05


In [None]:
for date in pd.to_datetime(anoms_seasonal.time.data): 
    print(f"processing seasonal dataset for date {date:%Y_%m}")
    sub = anoms_seasonal.sel(time=date)
    sub.to_netcdf(opath / f"{GCM}_{var_name}_seasonal_anomalies_{date:%Y_%m}.nc")
    sub.close() 
    sub_regrid = anoms_seasonal_regrid.sel(time=date)
    sub_regrid.to_netcdf(opath / f"{GCM}_{var_name}_seasonal_anomalies_interp_{date:%Y_%m}.nc")
    sub_regrid.close()