# Examining VIC-WRF data and exporting

In [2]:
%matplotlib widget

import __init__
import scripts.config as config
import numpy as np
import pandas as pd
import tempfile
import datetime
from sklearn.svm import SVR
from natsort import natsorted
import geopandas as gpd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from matplotlib.font_manager import FontProperties
import seaborn as sns
# import matplotlib as mpl
import matplotlib.pyplot as plt
import importlib
import HydroErr as he
import os
from pathlib import Path
from dateutil.relativedelta import relativedelta

In [3]:
# Plotting parameters

XSMALL_SIZE = 6
SMALL_SIZE = 7
MEDIUM_SIZE = 9
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=SMALL_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('axes', titlesize=SMALL_SIZE)  # fontsize of the figure title
plt.rcParams['figure.dpi'] = 140

## Averaging WRF simulations

Each WRF simulation has 5 runs with different parameter sets. Averaging these produces one ensemble data set for each simulation.

Only need to do this once, as the averages are saved locally in compressed text files.

In [4]:
# wrf_dir = config.data_path / 'precip' / 'VIC_WRF_EllsworthCr'
# wrf_file = 'flux_46.40625_-123.90625'
# wrf_cols = ["YEAR","MONTH","DAY","HOUR","OUT_PREC","OUT_PET_SHORT",
#             "OUT_SWE","OUT_EVAP","OUT_RUNOFF","OUT_BASEFLOW",
#             "OUT_SOIL_MOIST0", "OUT_SOIL_MOIST1","OUT_SOIL_MOIST2"]

# for sim_dir in os.listdir(wrf_dir):
#     runs = os.listdir(wrf_dir / sim_dir)
#     try:
#         runs.remove('sim_avg')
#     except ValueError:
#         pass
#     arrs = []
#     for run in runs:
#         arr = np.loadtxt(wrf_dir / sim_dir / run / wrf_file)
#         arrs.append(arr)
#     stack = np.dstack(arrs)
#     averaged = np.mean(stack, axis=2)
#     out_dir = wrf_dir / sim_dir / 'sim_avg'
#     try:
#         out_dir.mkdir(parents=True)
#     except FileExistsError:
#         pass
#     np.savetxt(out_dir / '{}.gz'.format(wrf_file), averaged)

In [None]:
# # Save averaged temp file
# forc_dir = config.data_path / 'precip' / 'WRF_frcs_EllsworthCr_forcings'
# forc_file = 'forc_46.40625_-123.90625'
# forc_cols = ['Year', 'Month', 'Day', 'Hour', 'Precip(mm)', 'Temp(C)', 
#              'Wind(m/s)', 'SWrad(W/m2)', 'LWrad(W/m2)', 'pressure(kPa)', 
#              'VaporPress(kPa)']

# cols = ['Temp(C)']
# inds = [forc_cols.index(x) for x in cols]
# arrs = []
# sim_dirs = []
# for sim_dir in os.listdir(forc_dir):
#     if sim_dir == 'pnnl_historical':
#         continue
#     sim_dirs.append(sim_dir)
#     arr = np.loadtxt(forc_dir / sim_dir / forc_file)
#     arrs.append(arr[:, inds])
# stack = np.column_stack(arrs)
# proj_sims_temp = pd.DataFrame(stack, columns=sim_dirs)
# date_arr = pd.DataFrame(arr, columns=forc_cols)
# proj_sims_temp = np.column_stack([date_arr[['Year', 'Month', 'Day']], stack])

# # Export just averaged temp to speed up imports in the future
# gcm_avg_forc_dir = forc_dir / 'sim_avg'
# try:
#     gcm_avg_forc_dir.mkdir(parents=True)
# except FileExistsError:
#     pass

# np.savetxt(gcm_avg_forc_dir / 'sim_avg_temp.gz', proj_sims_temp)

In [None]:
# # Save averaged precip file
# wrf_dir = config.data_path / 'precip' / 'VIC_WRF_EllsworthCr'
# wrf_file = 'flux_46.40625_-123.90625'
# wrf_cols = ["YEAR","MONTH","DAY","HOUR","OUT_PREC","OUT_PET_SHORT",
#             "OUT_SWE","OUT_EVAP","OUT_RUNOFF","OUT_BASEFLOW",
#             "OUT_SOIL_MOIST0", "OUT_SOIL_MOIST1","OUT_SOIL_MOIST2"]

# cols = ['OUT_PREC']
# inds = [wrf_cols.index(x) for x in cols]
# arrs = []
# sim_dirs = []
# for sim_dir in os.listdir(wrf_dir):
#     if sim_dir == 'pnnl_historical':
#         continue
#     sim_dirs.append(sim_dir)
#     arr = np.loadtxt(wrf_dir / sim_dir / 'sim_avg' / '{}.gz'.format(wrf_file))
#     arrs.append(arr[:, inds])
# stack = np.column_stack(arrs)
# proj_sims_ppt = pd.DataFrame(stack, columns=sim_dirs)
# date_arr = pd.DataFrame(arr, columns=wrf_cols)
# proj_sims_ppt = np.column_stack([date_arr[['YEAR', 'MONTH', 'DAY']], stack])

# # Export just averaged temp to speed up imports in the future
# gcm_avg_dir = wrf_dir / 'sim_avg'
# try:
#     gcm_avg_dir.mkdir(parents=True)
# except FileExistsError:
#     pass

# np.savetxt(gcm_avg_dir / 'sim_avg_ppt.gz', proj_sims_ppt)

In [7]:
# Import and format observed data (2003-2007 runoff)

input_dir = config.velma_data
results_dir = config.data_path.parents[0] / 'results' / 'ellsworth_baseline_03_07_21'

runoff_start = pd.to_datetime('01-01-2003')
runoff_end = pd.to_datetime('12-31-2007')
nse_start = pd.to_datetime('01-01-2004')
nse_end = pd.to_datetime('12-31-2007')

# Projected climate variables
wrf_dir = config.data_path / 'precip' / 'VIC_WRF_EllsworthCr'
wrf_file = 'flux_46.40625_-123.90625'
wrf_cols = ["YEAR","MONTH","DAY","HOUR","OUT_PREC","OUT_PET_SHORT",
            "OUT_SWE","OUT_EVAP","OUT_RUNOFF","OUT_BASEFLOW",
            "OUT_SOIL_MOIST0", "OUT_SOIL_MOIST1","OUT_SOIL_MOIST2"]
arr = np.loadtxt(wrf_dir / 'pnnl_historical' / 'sim_avg' / wrf_file)
pnnl_hist = pd.DataFrame(arr, columns=wrf_cols)
pnnl_hist.index = pd.to_datetime(pnnl_hist[['YEAR', 'MONTH', 'DAY']])

# Projected climate forcings
forc_dir = config.data_path / 'precip' / 'WRF_frcs_EllsworthCr_forcings'
forc_file = 'forc_46.40625_-123.90625'
forc_cols = ['Year', 'Month', 'Day', 'Hour', 'Precip(mm)', 'Temp(C)', 
             'Wind(m/s)', 'SWrad(W/m2)', 'LWrad(W/m2)', 'pressure(kPa)', 
             'VaporPress(kPa)']
arr = np.loadtxt(forc_dir / 'pnnl_historical' / forc_file)
pnnl_hist_forc = pd.DataFrame(arr, columns=forc_cols)
pnnl_hist_forc.index = pd.to_datetime(pnnl_hist_forc[['Year', 'Month', 'Day']])
# Combine variables and forcings
pnnl_hist_join = pd.concat([pnnl_hist_forc, pnnl_hist], axis=1)
pnnl_hist_join = pnnl_hist_join[(pnnl_hist_join.index >= nse_start) & (pnnl_hist_join.index <= nse_end)]

# Runoff observations
runoff_path = input_dir / 'runoff' / 'ellsworth_Q_2003_2007_dummy.csv'
runoff_obs = pd.read_csv(runoff_path, names=['runoff_obs'])
runoff_obs.index = pd.date_range(runoff_start, runoff_end)
runoff_obs = runoff_obs[(runoff_obs.index >= nse_start) & (runoff_obs.index <= nse_end)]

# Runoff quality codes
flow_path = config.streamflow
quality = pd.read_csv(flow_path, usecols=['Date', 'Quality'], parse_dates=True, index_col=0)
quality = quality[(quality.index >= nse_start) & (quality.index <= nse_end)]

# Precipitation
precip_path = input_dir / 'precip' / 'PRISM_gauge_avg_ppt_2003_2019.csv'
forcing_start = pd.to_datetime('01-01-2003')
forcing_end = pd.to_datetime('12-31-2019')                     
precip = pd.read_csv(precip_path, names=['precip'])
precip.index = pd.date_range(forcing_start, forcing_end)
precip = precip[(precip.index >= nse_start) & (precip.index <= nse_end)]

# Air temperature
temp_path = input_dir / 'temp' / 'ellsworth_temp_2003_2019.csv'
temp = pd.read_csv(temp_path, names=['temp'])
temp.index = pd.date_range(forcing_start, forcing_end)
temp = temp[(temp.index >= nse_start) & (temp.index <= nse_end)]

# Import VELMA outputs
velma_results = pd.read_csv(results_dir / 'DailyResults.csv')

# Format datetime of results
jday_pad = velma_results['Day'].apply(lambda x: str(x).zfill(3))
str_year = velma_results['Year'].apply(lambda x: str(x))
velma_results['year_jday'] = str_year + jday_pad
velma_results.index = pd.to_datetime(velma_results['year_jday'], format='%Y%j')
velma_results = velma_results[(velma_results.index >= nse_start) & (velma_results.index <= nse_end)]

In [8]:
# Aggregate hourly projections into daily

pnnl_hist_join.drop(columns=['Year', 'Month', 'Day', 'Hour', 'HOUR'])
cols = ['Precip(mm)', 'Temp(C)', 'Wind(m/s)', 'SWrad(W/m2)',
        'LWrad(W/m2)', 'pressure(kPa)', 'VaporPress(kPa)', 'OUT_PREC',
        'OUT_PET_SHORT', 'OUT_SWE', 'OUT_EVAP', 'OUT_RUNOFF', 'OUT_BASEFLOW',
        'OUT_SOIL_MOIST0', 'OUT_SOIL_MOIST1', 'OUT_SOIL_MOIST2']

methods = ['sum', 'mean', 'mean', 'mean',
          'mean', 'mean', 'mean', 'sum',
          'sum', 'sum', 'sum', 'sum', 'sum', 'sum',
          'sum', 'sum', 'sum']

pnnl_hist_agg = pnnl_hist_join.groupby(pd.Grouper(freq='D')).agg(dict(zip(cols, methods)))

# Make pivot tables
wrf_ppt_yearly = pd.pivot_table(pnnl_hist_agg, index=pnnl_hist_agg.index.dayofyear, columns=pnnl_hist_agg.index.year,
                                   values='OUT_PREC', dropna=False)

wrf_ppt2_yearly = pd.pivot_table(pnnl_hist_agg, index=pnnl_hist_agg.index.dayofyear, columns=pnnl_hist_agg.index.year,
                                   values='Precip(mm)', dropna=False)

wrf_temp_yearly = pd.pivot_table(pnnl_hist_agg, index=pnnl_hist_agg.index.dayofyear, columns=pnnl_hist_agg.index.year,
                                   values='Temp(C)', dropna=False)

runoff_obs_yearly = pd.pivot_table(runoff_obs, index=runoff_obs.index.dayofyear, columns=runoff_obs.index.year, 
                                   values=['runoff_obs'], dropna=False)

precip_yearly = pd.pivot_table(precip, index=precip.index.dayofyear, columns=precip.index.year, 
                                   values=['precip'], dropna=False)

temp_yearly = pd.pivot_table(temp, index=temp.index.dayofyear, columns=temp.index.year, 
                                   values=['temp'], dropna=False)


## Plotting

In [9]:
# Precips
years = runoff_obs_yearly.columns.get_level_values(1)
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for col, year in enumerate(years):
#     wrf_ppt2_yearly.iloc[:, col].plot(ax=axes[col], label='VIC-WRF', linewidth=1)
    wrf_ppt_yearly.iloc[:, col].plot(ax=axes[col], label='WRF', linewidth=1)
    precip_yearly.iloc[:, col].plot(ax=axes[col], label='PRISM/gauge', linewidth=1)
    axes[col].set_title(year)
    axes[col].set_ylim([0, 130])
axes[0].legend(loc='upper left', bbox_to_anchor=(0, 1.3), fancybox=True, ncol=2)
axes[0].set_ylabel('Runoff (mm/day)')
fig.suptitle('Fig. 1: WRF vs. PRISM/gauge precipitation')
plt.tight_layout(rect=[0, 0, 1, 0.99])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
# Temps
years = runoff_obs_yearly.columns.get_level_values(1)
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for col, year in enumerate(years):
    wrf_temp_yearly.iloc[:, col].plot(ax=axes[col], label='WRF', linewidth=1)
    temp_yearly.iloc[:, col].plot(ax=axes[col], label='PRISM', linewidth=1)
    axes[col].set_title(year)
    axes[col].set_ylim([0, 30])
axes[0].legend(loc='upper left', bbox_to_anchor=(0, 1.3), fancybox=True, ncol=2)
axes[0].set_ylabel('Degrees (C)')
fig.suptitle('Fig. 2: WRF vs. PRISM/gauge air temperature')
plt.tight_layout(rect=[0, 0, 1, 0.99])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Exporting historical WRF forcings (2003-2015)

In [164]:
driver_start = pd.to_datetime('01-01-2003')
driver_end = pd.to_datetime('12-31-2015')

pnnl_hist_join_export = pd.concat([pnnl_hist_forc, pnnl_hist], axis=1)
pnnl_hist_join_export = pnnl_hist_join_export[(pnnl_hist_join_export.index >= driver_start) & 
                                              (pnnl_hist_join_export.index <= driver_end)]
pnnl_hist_join_export.drop(columns=['Year', 'Month', 'Day', 'Hour', 'HOUR'])
pnnl_hist_agg_export = pnnl_hist_join_export.groupby(pd.Grouper(freq='D')).agg(dict(zip(cols, methods)))

# Precipitation
outfile = input_dir / 'precip' / 'vic_wrf_ppt_03_15.csv'
ppt_out = pnnl_hist_agg_export.loc[:, 'OUT_PREC']
ppt_out.to_csv(outfile, header=False, index=False)

# Air temperature
outfile = input_dir / 'temp' / 'vic_wrf_temp_03_15.csv'
temp_out = pnnl_hist_agg_export.loc[:, 'Temp(C)']
temp_out.to_csv(outfile, header=False, index=False)

## Examining temperature and precip projections from WRF simulations

#### Precipitation

In [13]:
wrf_dir = config.data_path / 'precip' / 'VIC_WRF_EllsworthCr'
wrf_file = 'flux_46.40625_-123.90625'
wrf_cols = ["YEAR","MONTH","DAY","HOUR","OUT_PREC","OUT_PET_SHORT",
            "OUT_SWE","OUT_EVAP","OUT_RUNOFF","OUT_BASEFLOW",
            "OUT_SOIL_MOIST0", "OUT_SOIL_MOIST1","OUT_SOIL_MOIST2"]

# Get precipitation
cols = ['OUT_PREC']
inds = [wrf_cols.index(x) for x in cols]
arrs = []
sim_dirs = []
for sim_dir in os.listdir(wrf_dir):
    if sim_dir == 'pnnl_historical':
        continue
    sim_dirs.append(sim_dir)
    arr = np.loadtxt(wrf_dir / sim_dir / 'sim_avg' / '{}.gz'.format(wrf_file))
    arrs.append(arr[:, inds])
stack = np.column_stack(arrs)
proj_sims_ppt = pd.DataFrame(stack, columns=sim_dirs)
date_arr = pd.DataFrame(arr, columns=wrf_cols)
proj_sims_ppt.index = pd.to_datetime(date_arr[['YEAR', 'MONTH', 'DAY']])

#### Temperature

In [14]:
forc_dir = config.data_path / 'precip' / 'WRF_frcs_EllsworthCr_forcings'
forc_file = 'forc_46.40625_-123.90625'
forc_cols = ['Year', 'Month', 'Day', 'Hour', 'Precip(mm)', 'Temp(C)', 
             'Wind(m/s)', 'SWrad(W/m2)', 'LWrad(W/m2)', 'pressure(kPa)', 
             'VaporPress(kPa)']

# Get temperature
cols = ['Temp(C)']
inds = [forc_cols.index(x) for x in cols]
arrs = []
sim_dirs = []
for sim_dir in os.listdir(forc_dir):
    if sim_dir == 'pnnl_historical':
        continue
    sim_dirs.append(sim_dir)
    arr = np.loadtxt(forc_dir / sim_dir / forc_file)
    arrs.append(arr[:, inds])
stack = np.column_stack(arrs)
proj_sims_temp = pd.DataFrame(stack, columns=sim_dirs)
date_arr = pd.DataFrame(arr, columns=forc_cols)
proj_sims_temp.index = pd.to_datetime(date_arr[['Year', 'Month', 'Day']])

In [254]:
plt.close('all')
colors = sns.color_palette('Dark2', 13)
sim_start = pd.to_datetime('01-01-2025')
sim_end = pd.to_datetime('12-31-2099')
fig = plt.figure(figsize=(12, 5))
for i, sim in enumerate(proj_sims_temp):
    sim_data = proj_sims_temp.loc[:, sim]
    sim_subset = sim_data[(sim_data.index >= sim_start) & (sim_data.index <= sim_end)]
    sim_group = sim_subset.groupby(pd.Grouper(freq='10Y')).mean()
    sim_group.plot(label=sim, color=colors[i])
    
ensemble_data = proj_sims_temp.mean(axis=1)
ensemble_subset = ensemble_data[(ensemble_data.index >= sim_start) & (ensemble_data.index <= sim_end)]
ensemble_group = ensemble_subset.groupby(pd.Grouper(freq='10Y')).mean()
ensemble_group.plot(label='Average', color='black', linewidth=2.5)
#     sim_group.plot(sim_group.mean())
#     wrf_temp_yearly.iloc[:, col].plot(ax=axes[col], label='WRF', linewidth=1)
#     temp_yearly.iloc[:, col].plot(ax=axes[col], label='PRISM', linewidth=1)
#     axes[col].set_title(year)
#     axes[col].set_ylim([0, 30])
fig.legend(loc='upper left', bbox_to_anchor=(0, 1.01), fancybox=True, ncol=5)
# axes[0].set_ylabel('Runoff (mm/day)')
# fig.suptitle('Fig. 2: WRF vs. PRISM/gauge air temperature')
# plt.tight_layout(rect=[0, 0, 1, 0.99])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x19d9b5c2588>

In [16]:
years = runoff_obs_yearly.columns.get_level_values(1)
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for col, year in enumerate(years):
#     wrf_ppt2_yearly.iloc[:, col].plot(ax=axes[col], label='VIC-WRF', linewidth=1)
    wrf_ppt_yearly.iloc[:, col].plot(ax=axes[col], label='WRF', linewidth=1)
    precip_yearly.iloc[:, col].plot(ax=axes[col], label='PRISM/gauge', linewidth=1)
    axes[col].set_title(year)
    axes[col].set_ylim([0, 130])
axes[0].legend(loc='upper left', bbox_to_anchor=(0, 1.3), fancybox=True, ncol=2)
axes[0].set_ylabel('Runoff (mm/day)')
fig.suptitle('Fig. 1: WRF vs. PRISM/gauge precipitation')
plt.tight_layout(rect=[0, 0, 1, 0.99])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Exporting projected VIC-WRF forcings (2003 - 2099)

In [None]:
# # Projected climate variables
# wrf_dir = config.data_path / 'precip' / 'VIC_WRF_EllsworthCr'
# wrf_file = 'flux_46.40625_-123.90625'
# wrf_cols = ["YEAR","MONTH","DAY","HOUR","OUT_PREC","OUT_PET_SHORT",
#             "OUT_SWE","OUT_EVAP","OUT_RUNOFF","OUT_BASEFLOW",
#             "OUT_SOIL_MOIST0", "OUT_SOIL_MOIST1","OUT_SOIL_MOIST2"]
# pnnl_hist = pd.read_csv(wrf_dir / 'pnnl_historical' / 'sim0' / wrf_file, 
#                         names=wrf_cols, delimiter='\t', parse_dates={'DATE': ['YEAR', 'MONTH', 'DAY']},
#                         index_col=0)