In [1]:
import sys
sys.path.append('../modules')

from datacube import read_data
from operators import *
from mask import *
from preprocessing import *
from eof import *
from interpolator import *
from gridding import *
from mapper import *
from scipy import interpolate

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import glob
import warnings

import numpy as np
import numpy.ma as ma

import xarray as xr

def check_keys(actual_keys, required_keys):
    passed = True
    if not set(required_keys).issubset(actual_keys):
        raise KeyError(
                    "CanRCM4 ensemble is missing keys {}"
                    .format(required_keys - actual_keys)
            )
        passed = False
    return passed

def check_path(data_path):
    passed = True
    if isinstance(data_path, str) is False:
        raise ValueError('Path requires str got {}'.format(type(data_path)))
        passed = False
    if len(glob.glob(data_path))==0:
        raise ValueError('Path provided has no files with \'.nc\' extension')
        passed = False
    if len(glob.glob(data_path))==1:
        raise ValueError('At least 2 ensemble members required to EOF analysis')
        passed = False
    if len(glob.glob(data_path))<=20:
        warnings.warn("Path has a low ensemble size with < 20 members")

    return passed

def check_time(len_time):
    passed = True
    if len_time != 1:
        raise ValueError('Climpyrical can not take inputs as time series. Must be uni-dimensional in the time axis.')
        passed = False
    return passed

def read_data(data_path):
    """Load an ensemble of CanRCM4
    models into a single datacube.
    ------------------------------
    Args:
        data_path (Str): path to folder
            containing CanRCM4 ensemble
    Returns:
        ds (xarray Dataset): data cube of assembled ensemble models
            into a single variable.
    """
    check_path(data_path)

    nc_list = np.asarray(glob.glob(data_path))
    test_file = xr.open_dataset(nc_list[0])
    actual_keys = set(test_file.variables).union(set(test_file.dims))
    check_keys(actual_keys, {'rlat', 'rlon', 'lat', 'lon'})

    xr_list = [xr.open_dataset(path) for path in nc_list]
    ds = xr.concat(xr_list, 'level')
    actual_keys = set(ds.variables).union(set(ds.dims))
    # find design value key so that it can
    # be renamed to dv for references throughout
    # climpyrical project
#     for var in list(ds.variables):

#         grids = ['rlon', 'rlat', 'lon', 'lat', 'time_bnds', 'bnds', 'time', 'rotated_pole']

#         if var not in grids and var in ds.variables:

#             ds = ds.rename({var: 'dv'})
#             if 'time' in ds.keys():
#                 #check_time(len(ds['time']))
#                 ds = ds.squeeze('time')

    return ds

In [6]:
%load_ext autoreload
%autoreload 2
%page
%alias_magic t timeit

#PATH = '/storage/data/projects/nrc/CanRCM4_large_ensemble/design_values/tas/hdd/*.nc'
#PATH = '/home/nannau/Desktop/ensembles/jan1.0p/*'
PATH = '/storage/data/projects/nrc/CanRCM4_large_ensemble/daily/processed/snw/*1951-2016_max.nc'

ds = read_data(PATH)
#ds = ds[0].squeeze('time')
ds = ds.drop('time').drop('time_bnds').snw[0, ...]
# ds = ds.drop('time').drop('time_bnds')
ds = ds.rename({'time':'level'})
ds.to_netcdf('/home/nannau/Desktop/climpyrical/data/snw_CanRCM4-LE_ens1_1951-2016_max.nc')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Created `%t` as an alias for `%timeit`.
Created `%%t` as an alias for `%%timeit`.


<xarray.DataArray 'snw' (level: 66, rlat: 130, rlon: 155)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       ...,

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]], dtype=float32)
Coordinates:
    lon      (rlat, rlon) float64 232.9 233.3 233.7 234.0 ... 335.5 335.9 336.4
    lat      (rlat, rlon) float64 12.36 12.52 12.68 12.84 ... 59.77 59.46 59.15
  * rlon     (rlon) float64 -33.88 -33.44 -33.0 -32.56 ... 33.0 33.44 33.88
  * rlat     (rlat) float64 -28.6 -28.16 -27.72 -27.28 ... 27.28 27.72 28.16
Dimensions without coordinates:

In [None]:
ds = ds.assign_coords(level=ds.level)
ds['snw'].shape#.to_netcdf('snw_CanRCM4-LE_ens35_1951-2016_max.nc')

In [None]:
ds.to_netcdf('/storagesnw_CanRCM4-LE_ens35_1951-2016_max.nc')