In [1]:
import os
import pickle

# numpy
import numpy as np
import numpy.ma as ma

from skimage.transform import resize
from math import pi

# plotting
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('default')

# netCDF
import netCDF4 as nc

from tqdm.notebook import tqdm
from tqdm.notebook import trange

import xarray as xr
import xesmf as xe



In [2]:
def get_timeperiod(f):
    rng = f.split('_')[-1][:-3]
    return [s[:-2] for s in rng.split('-')]

In [3]:
def model_info(x):
    _, _, model, _, ens, _, date = x.split('_')
#     date = get_timeperiod(date)
    return [model, ens]

In [4]:
def process_pr(ncdata, var, nlat, nlon):
    _data = np.array(ncdata.variables[var])
    _lats = np.array(ncdata.variables['lat'])
    _lons = np.array(ncdata.variables['lon'])

    grid_in = {"lon": _lons, "lat": _lats}
    grid_out = {"lon": np.linspace(np.min(_lons), np.max(_lons), 180), 
                "lat": np.linspace(np.min(_lats), np.max(_lats), 90)}
    regridder = xe.Regridder(grid_in, grid_out, "bilinear")

    _data = regridder(_data)
    _data = np.array(_data) * 86400
    _data = np.rot90(_data, k = 2, axes = (1, 2))
    _data = np.flip(_data, axis = 2)
    
    return _data

In [5]:
# _path = 'ncdata/hist_pr/'
# _files = sorted(os.listdir(_path))
# _files.reverse()
# _files

In [6]:
_path = 'ncdata/hist_pr/'
_files = sorted(os.listdir(_path))
_files.reverse()

nlat, nlon = 90, 180

xhist = []
i = 0

model_prev = _files.pop()

ncdata = nc.Dataset(_path + model_prev, "r")
curr_data = process_pr(ncdata, 'pr', nlat, nlon)
model_prev = model_info(model_prev)

xhist.append(curr_data)

while _files:
    model_curr = _files.pop()
    
    ncdata = nc.Dataset(_path + model_curr, "r")
    curr_data = process_pr(ncdata, 'pr', nlat, nlon)
    
    model_curr = model_info(model_curr)    
    if model_prev == model_curr:
        xhist[i] = np.vstack([xhist[i], curr_data])
    else:
        xhist.append(curr_data)
        i += 1
    
    model_prev = model_curr
    print(i, model_curr)

1 ['ACCESS-ESM1-5', 'r2i1p1f1']
2 ['CAMS-CSM1-0', 'r1i1p1f1']
3 ['CAMS-CSM1-0', 'r2i1p1f1']


cannot be safely cast to variable data type
  _data = np.array(ncdata.variables[var])


4 ['CESM2-WACCM', 'r1i1p1f1']
5 ['CESM2-WACCM', 'r2i1p1f1']
6 ['CIESM', 'r1i1p1f1']
7 ['CMCC-CM2-HR4', 'r1i1p1f1']
8 ['CMCC-CM2-SR5', 'r1i1p1f1']
9 ['CNRM-ESM2-1', 'r1i1p1f2']
10 ['CNRM-ESM2-1', 'r2i1p1f2']
11 ['FGOALS-f3-L', 'r1i1p1f1']
12 ['FIO-ESM-2-0', 'r2i1p1f1']
13 ['GFDL-ESM4', 'r1i1p1f1']
13 ['GFDL-ESM4', 'r1i1p1f1']
14 ['GFDL-ESM4', 'r3i1p1f1']
14 ['GFDL-ESM4', 'r3i1p1f1']
15 ['IPSL-CM6A-LR', 'r1i1p1f1']
16 ['IPSL-CM6A-LR', 'r2i1p1f1']
17 ['IPSL-CM6A-LR', 'r3i1p1f1']
18 ['KACE-1-0-G', 'r1i1p1f1']
19 ['KACE-1-0-G', 'r3i1p1f1']
20 ['MIROC6', 'r1i1p1f1']
20 ['MIROC6', 'r1i1p1f1']
21 ['MIROC6', 'r3i1p1f1']
21 ['MIROC6', 'r3i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i

In [7]:
[(i, x.shape) for i, x in enumerate(xhist)]

[(0, (1980, 90, 180)),
 (1, (1980, 90, 180)),
 (2, (1980, 90, 180)),
 (3, (1980, 90, 180)),
 (4, (1980, 90, 180)),
 (5, (1980, 90, 180)),
 (6, (1980, 90, 180)),
 (7, (1980, 90, 180)),
 (8, (1980, 90, 180)),
 (9, (1980, 90, 180)),
 (10, (1980, 90, 180)),
 (11, (1980, 90, 180)),
 (12, (1980, 90, 180)),
 (13, (1980, 90, 180)),
 (14, (1980, 90, 180)),
 (15, (1980, 90, 180)),
 (16, (1980, 90, 180)),
 (17, (1980, 90, 180)),
 (18, (1980, 90, 180)),
 (19, (1980, 90, 180)),
 (20, (1980, 90, 180)),
 (21, (1980, 90, 180)),
 (22, (1980, 90, 180)),
 (23, (1980, 90, 180)),
 (24, (1980, 90, 180)),
 (25, (1980, 90, 180)),
 (26, (1980, 90, 180)),
 (27, (780, 90, 180)),
 (28, (1980, 90, 180)),
 (29, (1980, 90, 180)),
 (30, (1980, 90, 180))]

In [8]:
xhist2 = []
for x in xhist:
    if x.shape[0] == 1980:
        xhist2.append(x)
#     else:
#         xhist2.append(x)

In [9]:
[x.shape for x in xhist2]

[(1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180),
 (1980, 90, 180)]

In [10]:
xhist = np.stack(xhist2, axis = 1)

In [11]:
_path = 'ncdata/ssp245_pr/'
_files = sorted(os.listdir(_path))
_files.reverse()

xrcp = []
i = 0

model_prev = _files.pop()

ncdata = nc.Dataset(_path + model_prev, "r")
curr_data = process_pr(ncdata, 'pr', nlat, nlon)

model_prev = model_info(model_prev)

xrcp.append(curr_data)

while _files:
    model_curr = _files.pop()
    
    ncdata = nc.Dataset(_path + model_curr, "r")
    curr_data = process_pr(ncdata, 'pr', nlat, nlon)
    
    model_curr = model_info(model_curr)    
    if model_prev == model_curr:
        xrcp[i] = np.vstack([xrcp[i], curr_data])
    else:
        xrcp.append(curr_data)
        i += 1
    
    model_prev = model_curr
    print(i, model_curr)

1 ['ACCESS-ESM1-5', 'r2i1p1f1']
2 ['CAMS-CSM1-0', 'r1i1p1f1']
3 ['CAMS-CSM1-0', 'r2i1p1f1']


cannot be safely cast to variable data type
  _data = np.array(ncdata.variables[var])


4 ['CESM2-WACCM', 'r1i1p1f1']
4 ['CESM2-WACCM', 'r1i1p1f1']
5 ['CESM2-WACCM', 'r2i1p1f1']
5 ['CESM2-WACCM', 'r2i1p1f1']
6 ['CIESM', 'r1i1p1f1']
7 ['CMCC-CM2-SR5', 'r1i1p1f1']
8 ['CMCC-ESM2', 'r1i1p1f1']
9 ['CNRM-ESM2-1', 'r1i1p1f2']
10 ['CNRM-ESM2-1', 'r2i1p1f2']
11 ['FGOALS-f3-L', 'r1i1p1f1']
12 ['FIO-ESM-2-0', 'r2i1p1f1']
13 ['GFDL-ESM4', 'r1i1p1f1']
14 ['GFDL-ESM4', 'r3i1p1f1']
15 ['IPSL-CM6A-LR', 'r1i1p1f1']
16 ['IPSL-CM6A-LR', 'r2i1p1f1']
17 ['IPSL-CM6A-LR', 'r3i1p1f1']
18 ['KACE-1-0-G', 'r1i1p1f1']
19 ['KACE-1-0-G', 'r3i1p1f1']
20 ['MIROC6', 'r1i1p1f1']
21 ['MIROC6', 'r3i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
22 ['MPI-ESM1-2-LR', 'r1i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
23 ['MPI-ESM1-2-LR', 'r2i1p1f1']
24 ['MRI-ESM2-0', 'r1i1p1f1']
25 ['MRI-ESM2-0', 'r2i1p1f1']
25 ['MRI-ESM2-0', 

In [12]:
[x.shape for x in xrcp], len(xrcp)

([(1032, 90, 180),
  (1032, 90, 180),
  (1020, 90, 180),
  (1020, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180),
  (1032, 90, 180)],
 32)

In [13]:
xrcp2 = []
for x in xrcp:
    if x.shape[0] == 1032:
        xrcp2.append(x[:-12])
    else:
        xrcp2.append(x)

In [14]:
xrcp = np.stack(xrcp2, axis = 1)
xrcp = np.delete(xrcp, 27, axis=1)
xrcp = np.delete(xrcp, 28, axis=1)

In [15]:
xhist.shape

(1980, 30, 90, 180)

In [16]:
xrcp.shape

(1020, 30, 90, 180)

In [17]:
!touch data/xhist_pr_hr.pkl
!touch data/xrcp_pr_hr.pkl

In [18]:
pickle.dump(xhist, open("data/xhist_pr_hr.pkl", "wb"))
pickle.dump(xrcp, open("data/xrcp_pr_hr.pkl", "wb"))