# Load and Save Files for CanRCM4 Downscaling Project
Load in and process HRDPS files here then export them as netcdf or numpy arrays

In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import xarray as xr
from pathlib import Path

In [2]:
#HRDPS data for all of 2016 to end of 2019
path = Path("/home/rbeutel/analysis/eosc510/proj.ipynb").resolve().parents[2]
path2 = "/results/forcing/atmospheric/GEM2.5/operational/"
path = path/path2

hrdps = xr.open_mfdataset(sorted(path.glob("ops_y201*.nc")))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the

In [3]:
#next we want the HRDPS data to be daily instead of hourly
hrdps = hrdps.resample(time_counter='D').mean(dim='time_counter')

In [4]:
#find index of extra day (leap day)
ind = list(hrdps.time_counter.values).index(np.datetime64('2016-02-29T00:00:00.000000000'))

#and then trim to be in a more relevant extent and remove leap day
#(dont need the HRDPS data to stretch as far as it does inland)
hrdps = hrdps.sel(x=slice(0., 480000.),time_counter=slice('2016-01-01 12:00:00', '2019-12-31 12:00:00'))
hrdps_P = np.concatenate((hrdps.atmpres.values[0:ind,:,:], hrdps.atmpres.values[(ind+1):,:,:]))
hrdps_U = np.concatenate((hrdps.u_wind.values[0:ind,:,:], hrdps.u_wind.values[(ind+1):,:,:]))
hrdps_V = np.concatenate((hrdps.v_wind.values[0:ind,:,:], hrdps.v_wind.values[(ind+1):,:,:]))
hrdps_T = np.concatenate((hrdps.tair.values[0:ind,:,:], hrdps.tair.values[(ind+1):,:,:]))

In [6]:
# set up titles
netcdf_title = 'HRDPSsubset.nc'
netcdf_comment = 'HRDPS2.5 datast used in CanRCM4 downscaling attempt for the Salish Sea'
notebook = 'LoadFiles.ipynb'

ds_attrs = {
        'creator_email':
            'rbeutel@eoas.ubc.ca',
        'institution_fullname': (
            'Earth, Ocean & Atmospheric Sciences,'
            ' University of British Columbia'
        ),
    'title': netcdf_title,
    'comment': netcdf_comment,
    'notebook': notebook,
    'summary': f'sea-level pressure, N/S wind, E/W wind, temperature',
    'history': (
            '[{}] File creation.'
            .format(dt.datetime.today().strftime('%Y-%m-%d'))
        )
}

coords_c = {
    'x' : np.arange(hrdps.x.shape[0]),
    'y' : np.arange(hrdps.y.shape[0]),
}

coords = {
    'x' : np.arange(hrdps.x.shape[0]),
    'y' : np.arange(hrdps.y.shape[0]),
    'time_counter' : hrdps.time_counter.values
}

data_c = {}
var_attrs_c = {} 

data_c['nav_lat'] = hrdps.nav_lat.values
var_attrs_c['nav_lat'] = {'units': 'degrees_north',
                       'long_name': 'latitude'}

data_c['nav_lon'] = hrdps.nav_lon.values
var_attrs_c['nav_lon'] = {'units': 'degrees_east',
                       'long_name': 'longitude'}

da = {}
for var in data_c:
    da[var] = xr.DataArray(
        data = data_c[var],
        name=var,
        dims=('y', 'x'),
        coords = coords_c,
        attrs = var_attrs_c[var])

data = {}
var_attrs = {}

var_attrs['slp'] = {'units': 'Pa',
                      'long_name': 'Pressure Reduced to MLS [Pa]'}
data['slp'] = hrdps_P

var_attrs['u_wind'] = {'units': 'm/s',
                      'long_name': 'U component of wind [m/s]'}
data['u_wind'] = hrdps_U

var_attrs['v_wind'] = {'units': 'm/s',
                      'long_name': 'V component of wind [m/s]'}
data['v_wind'] = hrdps_V

var_attrs['temp'] = {'units': 'oC',
                      'long_name': 'Surface temperature [oC]'}
data['temp'] = hrdps_T

for var in data:
    da[var] = xr.DataArray(
        data = data[var],
        name=var,
        dims=('time_counter', 'y', 'x'),
        coords = coords,
        attrs = var_attrs[var])
    

ds = xr.Dataset(
        data_vars={
            'nav_lat': da['nav_lat'],
            'nav_lon': da['nav_lon'],
            'slp': da['slp'],
            'u_wind': da['u_wind'],
            'v_wind': da['v_wind'],
            'temp': da['temp']},
        coords = coords,
        attrs = ds_attrs
)

ds

ValueError: different number of dimensions on data and dims: 3 vs 2