In [1]:
import xarray as xr
import geopandas as gpd
import numpy as np
import datetime
import pandas as pd
from collections import OrderedDict
from paths import *
from SM_tools import *
from os import listdir
from os.path import isfile, join

### function to save model ensemble swe from .gdat to .nc file 
inputs: indir, outdir

* consider having the ensemble script output a 'hoy' or change file naming convention 

In [3]:
#select the water year of interest 
water_year = 2019

#start date
st = str(water_year -1) +'-09-01'
#end date
ed = str(water_year)+'-09-30'

# snotel swe timeseries 
snotel_swe_all = pd.read_csv(assimPath + 'SNOTEL_data_SWEDmeters'+ str(water_year -1) +'-09-01_'+str(water_year)+'-09-30.csv', index_col=0, parse_dates=True)


In [4]:
#optional
# move SM swe w/o assim into dir with assim runs 
SM_noassim = SMpath + 'outputs/wo_assim/swed.gdat'
#path to directory with all SM swe output files
gdatPath = assimPath+'assim2021-02-12/'
nSWEpath = gdatPath + 'baseline_swed.gdat'
!mv $gdatSWEpath $nSWEpath

mv: missing file operand
Try 'mv --help' for more information.


In [63]:
# function to save SM swe outputs from each assim run into one .nc at the evaluation sites
def SMoutput_to_nc(gdatPath, gdf, outfilepath,st,ed,get_mod_output):
    #number of days in simulation
    num_timesteps =(datetime.datetime.strptime(ed,'%Y-%m-%d')-datetime.datetime.strptime(st,'%Y-%m-%d')).days+1
    
    #list of all variables considered in assimilation run
    filenams = sorted([f[:-10] for f in listdir(gdatPath) if isfile(join(gdatPath, f))])
    #
    files = sorted([f for f in listdir(gdatPath) if isfile(join(gdatPath, f))])

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #timesteps]
    data = np.empty([len(files), len(gdf), num_timesteps])

    #for each SM output swe file
    for h in range(len(files)):
        path = gdatPath+files[h]
        allswe = get_mod_output(path,num_timesteps,nx,ny)
        for i in range(len(gdf)):
            x_idx = int(gdf.x_idx[i])
            y_idx = int(gdf.y_idx[i])
            nam = gdf.code[i]
            modswe = np.squeeze(allswe[:,x_idx,y_idx])
            data[h,i,:] = modswe
            
    #save output as netcdf
    date = pd.date_range(st,ed,freq='d')
    station = gdf['code'].values

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'date'), 
        coords={'assim_run': filenams, 
                'station': station, 'date': date})

    cailbration.attrs['long_name']= 'Assimilation SWE at stations'
    cailbration.attrs['standard_name']= 'assim_swe'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', filenams)
    d['station'] = ('station', station)
    d['date'] = ('date', date)
    d['swe'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "SnowModel swe at stations"
    ds.attrs['model_output'] = "SWE [m]"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.date.attrs['long_name'] = "date"
    ds.date.attrs['axis'] = "date"

    ds.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')
    return ds

In [64]:
#output .nc file name/path
outfilepath = 'test.nc'
assim = SMoutput_to_nc(gdatPath, snotel_eval_sites, outfilepath,st,ed,get_mod_output)

### function to calc performance stats using the above .nc file 

In [2]:
#for testing 
assim = xr.open_dataset('test.nc')
assim

In [24]:
def SMensemble_skill_metrics(mod_swe_nc_path, stn_swe_df, outfilepath,calc_metrics):

    assim = xr.open_dataset(mod_swe_nc_path)

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #metrics]
    data = np.empty([len(assim.assim_run.values), len(assim.station.values), 5])

    for h in range(len(assim.assim_run.values)):
        run = assim.assim_run.values[h]
        for i in range(len(assim.station.values)):
            nam = assim.station.values[i]
            mod_swe = assim.swe.loc[run,nam,:].values
            stn_swe = stn_swe_df[nam].values
            swe_stats = calc_metrics(mod_swe,stn_swe)
            data[h,i,:] = swe_stats

    assim_run = assim.assim_run.values
    station = assim.station.values
    metrics = ['R2','MBE','RMSE','NSE','KGE']

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'metrics'), 
        coords={'assim_run': assim_run, 
                'station': station, 'metrics': metrics})

    cailbration.attrs['long_name']= 'Performance metrics at stations'
    cailbration.attrs['standard_name']= 'metrics'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', assim_run)
    d['station'] = ('station', station)
    d['metrics'] = ('metrics', metrics)
    d['score'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "Performance metrics at stations"
    ds.attrs['model_output'] = "R^2 MBE RMSE NSE KGE"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.metrics.attrs['long_name'] = "performance_metrics"
    ds.metrics.attrs['axis'] = "metrics"

    ds.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')
    return ds

In [25]:
outfilepath = 'skill_scores.nc'
SMensemble_skill_metrics('test.nc', snotel_swe_all, outfilepath,calc_metrics)