In [1]:
import xarray as xr
import geopandas as gpd
import numpy as np
import datetime
import pandas as pd
from collections import OrderedDict
from paths import *
from SM_tools import *
from os import listdir
from os.path import isfile, join

### function to save model ensemble swe from .gdat to .nc file 
inputs: indir, outdir

* consider having the ensemble script output a 'hoy' or change file naming convention 

wy 2020 --> assim_two_2021-05-18/

wy 2019 --> assim_2021-05-07/

wy 2018 --> assim_2021-05-18/

In [2]:
#select the water year of interest 
water_year = 2019

#start date
st = str(water_year -1) +'-09-01'
#end date
ed = str(water_year)+'-09-30'

# snotel swe timeseries 
snotel_swe_all = pd.read_csv(dataPath + 'SNOTEL_data_SWEDmeters'+ str(water_year -1) +'-09-01_'+str(water_year)+'-09-30.csv', index_col=0, parse_dates=True)

#path to assimilation outputs
gdatPath = '/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/'
# gdatPath = gdat_out_path+ 'assim_2021-05-18/'
# # fil = [f for f in listdir(mypath) if isfile(join(mypath, f))]
# # files = sorted(fil)
# # files

In [3]:
# ##optional
# ## move SM swe w/o assim into dir with assim runs 
# SM_noassim = '/scratch/Nina/WY_scratch_snotel/jan2021_snowmodel-dfhill_aspect/outputs/wo_assim/swed.gdat'
# #path to directory with all SM swe output files
# #gdatPath = assimPath+'assim2021-02-12/'
# nSWEpath = gdatPath + 'baseline_swed.gdat'
# !mv $SM_noassim $nSWEpath

In [3]:
# function to save SM swe outputs from each assim run into one .nc at the evaluation sites
def SMoutput_to_nc(gdatPath, gdf, st,ed,get_mod_output, nx, ny):
    #number of days in simulation
    num_timesteps =(datetime.datetime.strptime(ed,'%Y-%m-%d')-datetime.datetime.strptime(st,'%Y-%m-%d')).days+1
    
    #list of all variables considered in assimilation run
    filenams = sorted([f[:-10] for f in listdir(gdatPath) if isfile(join(gdatPath, f))])
    #
    files = sorted([f for f in listdir(gdatPath) if isfile(join(gdatPath, f))])

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #timesteps]
    data = np.empty([len(files), len(gdf), num_timesteps])

    #for each SM output swe file
    for h in range(len(files)):
        path = gdatPath+files[h]
        print(path)
        allswe = get_mod_output(path,num_timesteps,ny,nx)
        for i in range(len(gdf)):
            x_idx = int(gdf.x_idx[i])
            y_idx = int(gdf.y_idx[i])
            nam = gdf.code[i]
            modswe = np.squeeze(allswe[:,y_idx,x_idx])
            data[h,i,:] = modswe
            
    #save output as netcdf
    date = pd.date_range(st,ed,freq='d')
    station = gdf['code'].values

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'date'), 
        coords={'assim_run': filenams, 
                'station': station, 'date': date})

    cailbration.attrs['long_name']= 'Assimilation SWE at stations'
    cailbration.attrs['standard_name']= 'assim_swe'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', filenams)
    d['station'] = ('station', station)
    d['date'] = ('date', date)
    d['swe'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "SnowModel swe at stations"
    ds.attrs['model_output'] = "SWE [m]"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.date.attrs['long_name'] = "date"
    ds.date.attrs['axis'] = "date"
    return ds

In [6]:
## add index to gdf 
insites = point_index_from_grid(snotel_eval_sites,dem_path)

assim = SMoutput_to_nc(gdatPath, insites,st,ed,get_mod_output,nx,ny)

#output .nc file name/path
outfilepath = assim_file_path + 'assim_swe_'+str(water_year)+'.nc'
#assim.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')

/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/baseline_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_M_11_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_M_12_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_M_1_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_M_2_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_all_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_aspect_0.0_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_aspect_2.0_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_aspect_4.0_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_aspect_6.0_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_day_delta10_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_day_delta3_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_day_delta5_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_day_delta7_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim2019/both_elev_1_swed.gdat
/nfs/attic/dfh/Aragon2/WY_gdat/assim20

In [7]:
assim.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')

In [8]:
assim

### function to calc performance stats using the above .nc file 

In [5]:
#for testing 
assim = xr.open_dataset(assim_file_path +'assim_swe_'+str(water_year)+'.nc')
assim

In [10]:
def SMensemble_skill_metrics(mod_swe_nc_path, stn_swe_df,calc_metrics):

    assim = xr.open_dataset(mod_swe_nc_path)

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #metrics]
    data = np.empty([len(assim.assim_run.values), len(assim.station.values), 5])

    for h in range(len(assim.assim_run.values)):
        run = assim.assim_run.values[h]
        for i in range(len(assim.station.values)):
            nam = assim.station.values[i]
            mod_swe = assim.swe.loc[run,nam,:].values
            stn_swe = stn_swe_df[nam].values
            swe_stats = calc_metrics(mod_swe,stn_swe)
            data[h,i,:] = swe_stats

    assim_run = assim.assim_run.values
    station = assim.station.values
    metrics = ['R2','MBE','RMSE','NSE','KGE']

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'metrics'), 
        coords={'assim_run': assim_run, 
                'station': station, 'metrics': metrics})

    cailbration.attrs['long_name']= 'Performance metrics at stations'
    cailbration.attrs['standard_name']= 'metrics'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', assim_run)
    d['station'] = ('station', station)
    d['metrics'] = ('metrics', metrics)
    d['score'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "Performance metrics at stations"
    ds.attrs['model_output'] = "R^2 MBE RMSE NSE KGE"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.metrics.attrs['long_name'] = "performance_metrics"
    ds.metrics.attrs['axis'] = "metrics"
    return ds

In [11]:
#calculate performance
scores = SMensemble_skill_metrics(assim_file_path+'assim_swe_'+str(water_year)+'.nc', snotel_swe_all,calc_metrics)

outfilepath = assim_file_path + 'skill_scores_'+str(water_year)+'.nc'
scores.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')

# calculate skill scores by season

In [12]:
# calculate skill scores by snow season
def SMensemble_skill_metrics_season(mod_swe_nc_path, stn_swe_df,calc_metrics):
    from datetime import datetime
    assim = xr.open_dataset(mod_swe_nc_path)

    # this is currently set up for runs starting sept 1 and will ignore data starting in the first september
    #unique months 
    uqdt = np.unique(np.stack([assim.date.dt.year.values,assim.date.dt.month.values],axis =1),axis = 0)

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #metrics]
    data = np.empty([len(assim.assim_run.values), len(assim.station.values), 2, 5])

    for h in range(len(assim.assim_run.values)):
        run = assim.assim_run.values[h]
        for i in range(len(assim.station.values)):
            nam = assim.station.values[i]
            #accumulation season
            indate = assim.date[(assim.date.values>= np.datetime64(datetime.strptime(str(water_year -1) +'-09-01', '%Y-%m-%d'))) & \
                                (assim.date.values< np.datetime64(datetime.strptime(str(water_year) +'-04-01', '%Y-%m-%d')))].values
            mod_swe = assim.swe.loc[run,nam,indate].values
            stn_swe = stn_swe_df[nam].values[(stn_swe_df.index >= np.datetime64(datetime.strptime(str(water_year-1) +'-09-01', '%Y-%m-%d'))) & \
                                             (stn_swe_df.index < np.datetime64(datetime.strptime(str(water_year) +'-04-01', '%Y-%m-%d')))]
            swe_stats = calc_metrics(mod_swe,stn_swe)
            data[h,i,0,:] = swe_stats 
            #ablation season
            indate = assim.date[(assim.date.values>= np.datetime64(datetime.strptime(str(water_year) +'-04-01', '%Y-%m-%d'))) & \
                                (assim.date.values< np.datetime64(datetime.strptime(str(water_year) +'-10-01', '%Y-%m-%d')))].values
            mod_swe = assim.swe.loc[run,nam,indate].values
            stn_swe = stn_swe_df[nam].values[(stn_swe_df.index >= np.datetime64(datetime.strptime(str(water_year) +'-04-01', '%Y-%m-%d'))) & \
                                             (stn_swe_df.index < np.datetime64(datetime.strptime(str(water_year) +'-10-01', '%Y-%m-%d')))]
            swe_stats = calc_metrics(mod_swe,stn_swe)
            data[h,i,1,:] = swe_stats         

    assim_run = assim.assim_run.values
    station = assim.station.values
    metrics = ['R2','MBE','RMSE','NSE','KGE']
    season = ['acumulation','ablation']

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'season','metrics'), 
        coords={'assim_run': assim_run, 
                'station': station, 'season': season,'metrics': metrics})

    cailbration.attrs['long_name']= 'Performance metrics at stations'
    cailbration.attrs['standard_name']= 'metrics'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', assim_run)
    d['station'] = ('station', station)
    d['metrics'] = ('metrics', metrics)
    d['season'] = ('season', season)
    d['score'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "Performance metrics at stations"
    ds.attrs['model_output'] = "R^2 MBE RMSE NSE KGE"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.metrics.attrs['long_name'] = "performance_metrics"
    ds.metrics.attrs['axis'] = "metrics"

    ds.season.attrs['long_name'] = "swe curve season"
    ds.season.attrs['axis'] = "season"

    return ds

In [13]:
mod_swe_nc_path = assim_file_path + 'assim_swe_'+str(water_year)+'.nc'
seasonal_scores = SMensemble_skill_metrics_season(mod_swe_nc_path, snotel_swe_all,calc_metrics)

outfilepath = assim_file_path + 'seasonal_skill_scores_'+str(water_year)+'.nc'
seasonal_scores.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')

# calculate skill scores by month 

In [15]:
def SMensemble_skill_metrics_month(mod_swe_nc_path, stn_swe_df,calc_metrics):
    
    assim = xr.open_dataset(mod_swe_nc_path)
    
    # this is currently set up for runs starting sept 1 and will ignore data starting in the first september
    #unique months 
    uqdt = np.unique(np.stack([assim.date.dt.year.values,assim.date.dt.month.values],axis =1),axis = 0)

    # create an empty numpy array of dimensions 
    # [#ensemble_members #stations #metrics]
    data = np.empty([len(assim.assim_run.values), len(assim.station.values), len(uqdt)-1, 5])

    for h in range(len(assim.assim_run.values)):
        run = assim.assim_run.values[h]
        for i in range(len(assim.station.values)):
            nam = assim.station.values[i]
            for idx in range(1,len(uqdt)):
                indate = assim.date[(assim.date.dt.year.values==uqdt[idx,0]) & (assim.date.dt.month.values==uqdt[idx,1])].values
                mod_swe = assim.swe.loc[run,nam,indate].values
                stn_swe = stn_swe_df[nam].values[(stn_swe_df.index.year == uqdt[idx,0]) &(stn_swe_df.index.month == uqdt[idx,1])]
                if (max(mod_swe) > 0) | (max(stn_swe) > 0): 
                    swe_stats = calc_metrics(mod_swe,stn_swe)
                    data[h,i,idx-1,:] = swe_stats                
                else:
                    data[h,i,idx-1,:] = np.empty((5)) * np.nan

    assim_run = assim.assim_run.values
    station = assim.station.values
    metrics = ['R2','MBE','RMSE','NSE','KGE']
    month = np.arange(1,13)

    cailbration = xr.DataArray(
        data,
        dims=('assim_run', 'station', 'month','metrics'), 
        coords={'assim_run': assim_run, 
                'station': station, 'month': month,'metrics': metrics})

    cailbration.attrs['long_name']= 'Performance metrics at stations'
    cailbration.attrs['standard_name']= 'metrics'

    d = OrderedDict()
    d['assim_run'] = ('assim_run', assim_run)
    d['station'] = ('station', station)
    d['metrics'] = ('metrics', metrics)
    d['month'] = ('month', month)
    d['score'] = cailbration

    ds = xr.Dataset(d)
    ds.attrs['description'] = "Performance metrics at stations"
    ds.attrs['model_output'] = "R^2 MBE RMSE NSE KGE"

    ds.assim_run.attrs['standard_name'] = "assimilation_run"
    ds.assim_run.attrs['axis'] = "run"

    ds.station.attrs['long_name'] = "station_id"
    ds.station.attrs['axis'] = "station"

    ds.metrics.attrs['long_name'] = "performance_metrics"
    ds.metrics.attrs['axis'] = "metrics"

    ds.month.attrs['long_name'] = "month_of_water_year"
    ds.month.attrs['axis'] = "month"

    return ds

In [16]:
#calculate performance
monthly_scores = SMensemble_skill_metrics_month(assim_file_path+'assim_swe_'+str(water_year)+'.nc', snotel_swe_all,calc_metrics)

outfilepath = assim_file_path + 'monthly_skill_scores_'+str(water_year)+'.nc'
monthly_scores.to_netcdf(outfilepath, format='NETCDF4', engine='netcdf4')

  c /= stddev[:, None]
  c /= stddev[None, :]
  nse = 1-(nse_top/nse_bot)
  kge_std = (np.std(mod_swe)/np.std(stn_swe))
  kge_mean = (np.mean(mod_swe)/np.mean(stn_swe))
  c = cov(x, y, rowvar)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  kge_std = (np.std(mod_swe)/np.std(stn_swe))


In [None]:
monthly_scores