# Forcing data for SUMMA

In [1]:
# netcdf/numpy/xray/stats
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import xarray as xr
from scipy.stats.stats import pearsonr

# OS interaction
import sys, pickle, os

# import plotting
import seaborn as sns
import matplotlib
from matplotlib.pyplot import subplots
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.basemap import Basemap
import kgraph

# Offline Turbulence Package
import turbpy

# Customize
sns.set_style("whitegrid")
sns.set_context('paper')
%matplotlib inline

In [2]:
# --------------------------------------------------------------------------------------------------------------------
# Directory Lists
# Unix
if 'linux' in sys.platform:
    dirPre = '/home/lapok/gdrive/'
# Mac
elif 'darwin' in sys.platform:
    dirPre = '/Users/karllapo/gdrive/'

dirProj = dirPre + 'SnowHydrology/proj/ModTsfc/'
dirPrint = dirProj + 'Graphics'
dirData = dirProj + 'data'



## Open data

In [3]:
os.chdir(dirData)
SWA = xr.open_dataset('SWA.ModTsfc.nc')
SNQ = xr.open_dataset('SNQ.ModTsfc.nc')
CDP = xr.open_dataset('CDP.ModTsfc.nc')

In [4]:
# --------------------------------------------------------------------------------------------
# Resample to daily
dailyResample = False
if dailyResample:
    # Other accounting
    SWA_daily = SWA.resample(how='mean', freq='d', dim='time')
    SNQ_daily = SNQ.resample(how='mean', freq='d', dim='time')
    CDP_daily = CDP.resample(how='mean', freq='d', dim='time')

    os.chdir(dirData)
    SWA_daily.to_netcdf('SWA.ModTsfc.Daily.nc')
    SNQ_daily.to_netcdf('SNQ.ModTsfc.Daily.nc')
    CDP_daily.to_netcdf('CDP.ModTsfc.Daily.nc')

# Load daily resamples
else:
    SWA_daily = xr.open_dataset('SWA.ModTsfc.Daily.nc')
    SNQ_daily = xr.open_dataset('SNQ.ModTsfc.Daily.nc')
    CDP_daily = xr.open_dataset('CDP.ModTsfc.Daily.nc')

# --------------------------------------------------------------------------------------------
# Dictionaries for iterating
met = {'SWA': SWA,
       'SNQ': SNQ,
       'CDP': CDP}

metDaily = {'SWA_daily': SWA_daily,
            'SNQ_daily': SNQ_daily,
            'CDP_daily': CDP_daily}

# Examples
## Forcing file meta

```
Summa expects the following information in the forcing data file
pptrate        | 7                      ! precipitation rate              (kg m-2 s-1)
SWRadAtm       | 8                      ! downwelling shortwave radiaiton (W m-2)
LWRadAtm       | 9                      ! downwelling longwave radiation  (W m-2)
airtemp        | 10                     ! air temperature                 (K)
windspd        | 11                     ! windspeed                       (m/s)
airpres        | 12                     ! pressure                        (Pa)
spechum        | 13                     ! specific humidity               (g/g)
data_step      | 1800                   ! length of time step (seconds)
```

## Forcing data example

In [5]:
cwd = os.getcwd()
os.chdir('/Users/karllapo/gdrive/SnowHydrology/proj/ModTsfc/summa/summaTestCases/testCases_data/inputData/fieldData/reynolds')
print(xr.open_dataset('forcing_above_aspen.nc'))
os.chdir(cwd)  # Return to where I was before

<xarray.Dataset>
Dimensions:    (hru: 1, time: 87672)
Coordinates:
  * time       (time) datetime64[ns] 1998-10-01T01:00:00.028800 ...
Dimensions without coordinates: hru
Data variables:
    hruId      (hru) int32 1001
    latitude   (hru) float64 43.2
    longitude  (hru) float64 243.2
    data_step  timedelta64[ns] 01:00:00
    LWRadAtm   (time, hru) float64 323.0 320.0 313.0 309.0 308.0 312.0 320.0 ...
    SWRadAtm   (time, hru) float64 0.0 0.0 0.0 0.0 0.0 0.0 4.0 88.0 206.0 ...
    airpres    (time, hru) float64 7.935e+04 7.935e+04 7.935e+04 7.935e+04 ...
    airtemp    (time, hru) float64 283.3 282.8 281.5 280.9 280.7 281.1 281.8 ...
    pptrate    (time, hru) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
    spechum    (time, hru) float64 0.005053 0.00498 0.004664 0.00463 ...
    windspd    (time, hru) float64 2.3 2.5 2.4 2.3 2.6 2.4 3.5 4.5 4.1 4.5 ...
Attributes:
    dataset_orig_path: /d2/anewman/summa/summaTestCases/testCases_data/inputData/fieldData/reynolds
    histor

## Attributes file example

In [6]:
cwd = os.getcwd()
os.chdir('/Users/karllapo/gdrive/SnowHydrology/proj/ModTsfc/summa/summaTestCases/settings/wrrPaperTestCases/figure01')
print(xr.open_dataset('summa_zLocalAttributes_riparianAspen.nc'))
os.chdir(cwd)  # Return to where I was before

<xarray.Dataset>
Dimensions:         (gru: 1, hru: 1)
Dimensions without coordinates: gru, hru
Data variables:
    hru2gruId       (hru) int32 1001
    downHRUindex    (hru) int32 0
    slopeTypeIndex  (hru) int32 1
    soilTypeIndex   (hru) int32 8
    vegTypeIndex    (hru) int32 11
    mHeight         (hru) float64 19.25
    contourLength   (hru) float64 180.8
    tan_slope       (hru) float64 0.2083
    elevation       (hru) float64 2.097e+03
    longitude       (hru) float64 243.2
    latitude        (hru) float64 43.2
    HRUarea         (hru) float64 3.27e+04
    hruId           (hru) int32 1001
    gruId           (gru) int32 1001
Attributes:
    datasource: *zLocalAttributes*
    history: Tue Sep 27 21:51:50 2016: ncrename -d nhru,hru -d ngru,gru ./wrrPaperTestCases/figure01/summa_zLocalAttributes_riparianAspen.nc
    NCO: "4.6.0"


### Notes on working with summa and netcdfs
xarray and summa don't play well with the data_step variable. 
    - You have to write data_step as an int representing the timestep in number of seconds.
xarray is not smart about reading files
    - The expected shape for netcdf files in summa is [time, hru]. It does not smartly read labeled dimensions. Hence the `.T`
summa does not pay attention to the time variable description. It needs days since 1990.

## Swamp Angel

In [7]:
# Select water year 2009
SWA = SWA.loc[dict(time = slice(datetime(2006, 10, 1), datetime(2012, 9, 30, 23, 0, 0)))]

# Need to do some janky time conversions
ns = 1e-9 # number of seconds in a nanosecond
startdate = datetime.utcfromtimestamp(SWA.time.values[0].astype(int) * ns)
numdays = (startdate - datetime(1990, 1, 1)).days
timeForNetcdf = numdays + np.arange(0, SWA.time.size) * 1 / 24

# Complete necessary unit conversions and naming conventions
modelSWA = xr.Dataset({'pptrate': (['time', 'hru'], np.atleast_2d(SWA.precipSnow).T),
                       'SWRadAtm': (['time', 'hru'], np.atleast_2d(SWA.SWdwn).T),
                       'LWRadAtm': (['time', 'hru'], np.atleast_2d(SWA.LWdwn).T),
                       'airtemp': (['time', 'hru'], np.atleast_2d(SWA.Tair).T + 273.15),
                       'windspd': (['time', 'hru'], np.atleast_2d(SWA.WIND).T),
                       'airpres': (['time', 'hru'], np.atleast_2d(SWA.Press).T),
                       'spechum': (['time', 'hru'], np.atleast_2d(SWA.QS).T),
                       'data_step': float(3600),
                       'hruId': (['hru'], np.atleast_1d(1001)),
                       'latitude': (['hru'], np.atleast_1d(37.906914)),
                       'longitude': (['hru'], np.atleast_1d(-107.711322)),
                      },
                      coords={'time': pd.to_datetime(SWA.time.values)})
#                       coords={'time': timeForNetcdf})
modelSWA.time.encoding['units'] = 'days since 1990-01-01'
modelSWA.time.encoding['calendar'] = 'standard'
print(modelSWA.time)
os.chdir(dirData)
modelSWA.to_netcdf('SWA.ModTsfc.ModelForcing.nc')

attrSWA = xr.Dataset({'hru2gruId': (['hru'], np.atleast_1d(np.int32(1001))),
                      'gruId': (['gru'], np.atleast_1d(np.int32(1001))),
                      'downHRUindex': (['hru'], np.atleast_1d(np.int32(0))),
                      'slopeTypeIndex': (['hru'], np.atleast_1d(np.int32(1))),
                      'soilTypeIndex': (['hru'], np.atleast_1d(np.int32(2))),
                      'vegTypeIndex': (['hru'], np.atleast_1d(np.int32(2))),
                      'mHeight': (['hru'], np.atleast_1d(3.)),
                      'contourLength': (['hru'], np.atleast_1d(np.int32(1))),
                      'tan_slope': (['hru'], np.atleast_1d(np.int32(0))),
                      'elevation': (['hru'], np.atleast_1d(3371.0)),
                      'latitude': (['hru'], np.atleast_1d(37.906914)),
                      'longitude': (['hru'], np.atleast_1d(-107.711322)),
                      'HRUarea': (['hru'], np.atleast_1d(1.0)),
                      'hruId': (['hru'], np.atleast_1d(np.int32(1001))),
                     })
os.chdir(dirData)
attrSWA.to_netcdf('SWA.ModTsfc.Attributes.nc')
print(attrSWA)

<xarray.DataArray 'time' (time: 52608)>
array(['2006-10-01T00:00:00.000000000', '2006-10-01T01:00:00.000000000',
       '2006-10-01T02:00:00.000000000', ..., '2012-09-30T21:00:00.000000000',
       '2012-09-30T22:00:00.000000000', '2012-09-30T23:00:00.000000000'], dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2006-10-01 2006-10-01T01:00:00 ...
<xarray.Dataset>
Dimensions:         (gru: 1, hru: 1)
Dimensions without coordinates: gru, hru
Data variables:
    downHRUindex    (hru) int32 0
    vegTypeIndex    (hru) int32 2
    elevation       (hru) float64 3.371e+03
    latitude        (hru) float64 37.91
    soilTypeIndex   (hru) int32 2
    hruId           (hru) int32 1001
    gruId           (gru) int32 1001
    hru2gruId       (hru) int32 1001
    mHeight         (hru) float64 3.0
    slopeTypeIndex  (hru) int32 1
    HRUarea         (hru) float64 1.0
    contourLength   (hru) int32 1
    longitude       (hru) float64 -107.7
    tan_slope       (hru) int32 0


In [8]:
# CDP causes summa to hang in 1999, so let's just do the 2000s
CDP = CDP.loc[dict(time = slice(datetime(2002, 10, 1), datetime(2010, 9, 30, 23, 0, 0)))]

# Complete necessary unit conversions and naming conventions
modelCDP = xr.Dataset({'pptrate': (['time', 'hru'], np.atleast_2d(CDP.precipSnow).T),
                       'SWRadAtm': (['time', 'hru'], np.atleast_2d(CDP.SWdwn).T),
                       'LWRadAtm': (['time', 'hru'], np.atleast_2d(CDP.LWdwn).T),
                       'airtemp': (['time', 'hru'], np.atleast_2d(CDP.Tair).T + 273.15),
                       'windspd': (['time', 'hru'], np.atleast_2d(CDP.WIND).T),
                       'airpres': (['time', 'hru'], np.atleast_2d(CDP.Press).T),
                       'spechum': (['time', 'hru'], np.atleast_2d(CDP.QS).T),
                       'data_step': 3600,
                       'hruId': (['hru'], np.atleast_1d(1001)),
                       'latitude': (['hru'], np.atleast_1d(45.300000)),
                       'longitude': (['hru'], np.atleast_1d(5.770000)),
                      },
                      coords={'time': pd.to_datetime(CDP.time.values)})
os.chdir(dirData)
modelCDP.to_netcdf('CDP.ModTsfc.ModelForcing.nc')
print(modelCDP)

attrCDP = xr.Dataset({'hru2gruId': (['hru'], np.atleast_1d(np.int32(1001))),
                      'gruId': (['gru'], np.atleast_1d(np.int32(1001))),
                      'downHRUindex': (['hru'], np.atleast_1d(np.int32(0))),
                      'slopeTypeIndex': (['hru'], np.atleast_1d(np.int32(1))),
                      'soilTypeIndex': (['hru'], np.atleast_1d(np.int32(2))),
                      'vegTypeIndex': (['hru'], np.atleast_1d(np.int32(2))),
                      'mHeight': (['hru'], np.atleast_1d(1.5)),
                      'contourLength': (['hru'], np.atleast_1d(np.int32(1))),
                      'tan_slope': (['hru'], np.atleast_1d(np.int32(0))),
                      'elevation': (['hru'], np.atleast_1d(1325.0)),
                      'longitude': (['hru'], np.atleast_1d(5.770000)),
                      'latitude': (['hru'], np.atleast_1d(45.300000)),
                      'HRUarea': (['hru'], np.atleast_1d(1.0)),
                      'hruId': (['hru'], np.atleast_1d(np.int32(1001))),
                     })
os.chdir(dirData)
attrCDP.to_netcdf('CDP.ModTsfc.Attributes.nc')
print(attrCDP)

<xarray.Dataset>
Dimensions:    (hru: 1, time: 70127)
Coordinates:
  * time       (time) datetime64[ns] 2002-10-01 2002-10-01T00:59:59.999987 ...
Dimensions without coordinates: hru
Data variables:
    longitude  (hru) float64 5.77
    LWRadAtm   (time, hru) float64 328.1 326.7 326.1 324.4 321.1 323.9 322.2 ...
    airtemp    (time, hru) float64 280.1 279.9 279.6 279.5 279.0 279.2 279.2 ...
    data_step  int64 3600
    latitude   (hru) float64 45.3
    windspd    (time, hru) float64 0.4 0.2 0.2 0.4 0.1 0.2 0.3 0.1 0.3 0.7 ...
    airpres    (time, hru) float64 8.772e+04 8.769e+04 8.767e+04 8.765e+04 ...
    hruId      (hru) int64 1001
    pptrate    (time, hru) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
    SWRadAtm   (time, hru) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 17.5 31.94 ...
    spechum    (time, hru) float64 0.007201 0.007107 0.006995 0.006923 ...
<xarray.Dataset>
Dimensions:         (gru: 1, hru: 1)
Dimensions without coordinates: gru, hru
Data variables:
    downHRUin

## Setup Initial Conditions 

My plan is to just use the example file, since I don't have a good sense of the initial conditions for the site

In [9]:
cwd = os.getcwd()
os.chdir('/Users/karllapo/gdrive/SnowHydrology/proj/ModTsfc/summa/summaTestCases/settings/wrrPaperTestCases/figure01')
summaInitCon = xr.open_dataset('summa_zInitialCond.nc')
print(summaInitCon)
os.chdir(cwd)  # Return to where I was before

<xarray.Dataset>
Dimensions:               (hru: 5, ifcToto: 9, midSoil: 8, midToto: 8, scalarv: 1)
Dimensions without coordinates: hru, ifcToto, midSoil, midToto, scalarv
Data variables:
    mLayerVolFracIce      (midToto, hru) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
    scalarCanairTemp      (scalarv, hru) float64 286.0 286.0 286.0 286.0 286.0
    nSnow                 (scalarv, hru) int64 0 0 0 0 0
    iLayerHeight          (ifcToto, hru) float64 0.0 0.0 0.0 0.0 0.0 0.01 ...
    mLayerMatricHead      (midSoil, hru) float64 -1.0 -1.0 -1.0 -1.0 -1.0 ...
    scalarSnowAlbedo      (scalarv, hru) float64 0.82 0.82 0.82 0.82 0.82
    dt_init               (scalarv, hru) float64 60.0 60.0 60.0 60.0 60.0
    mLayerTemp            (midToto, hru) float64 288.5 288.5 288.5 288.5 ...
    scalarSfcMeltPond     (scalarv, hru) float64 0.0 0.0 0.0 0.0 0.0
    scalarCanopyTemp      (scalarv, hru) float64 290.0 290.0 290.0 290.0 290.0
    scalarSnowDepth       (scalarv, hru) float64 0.0 0.0 0.0 0.0 0

In [10]:
# Save a single hru initial condition as the default in summapy
summaInitCon = summaInitCon.sel(hru=0)
print(summaInitCon)
os.chdir('/Users/karllapo/gdrive/SnowHydrology/proj/ModTsfc/summa/ModTsfc_summa/settings/summapyDevelop/CDP')
summaInitCon.to_netcdf('summa_zInitialCond.nc')

<xarray.Dataset>
Dimensions:               (ifcToto: 9, midSoil: 8, midToto: 8, scalarv: 1)
Dimensions without coordinates: ifcToto, midSoil, midToto, scalarv
Data variables:
    mLayerVolFracIce      (midToto) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    scalarCanairTemp      (scalarv) float64 286.0
    nSnow                 (scalarv) int64 0
    iLayerHeight          (ifcToto) float64 0.0 0.01 0.04 0.1 0.25 0.5 1.0 ...
    mLayerMatricHead      (midSoil) float64 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 ...
    scalarSnowAlbedo      (scalarv) float64 0.82
    dt_init               (scalarv) float64 60.0
    mLayerTemp            (midToto) float64 288.5 288.5 288.5 288.5 288.5 ...
    scalarSfcMeltPond     (scalarv) float64 0.0
    scalarCanopyTemp      (scalarv) float64 290.0
    scalarSnowDepth       (scalarv) float64 0.0
    nSoil                 (scalarv) int64 8
    scalarSWE             (scalarv) float64 0.0
    scalarCanopyLiq       (scalarv) float64 3.16
    mLayerVolFracLiq      (midToto)