## Accelerate Sensitivity Analysis & Model Calibration Using pyGSFLOW


<img src = "./figures/WorkFlow.PNG">





### Agenda
* Plot PRMS 2D data
* Change 2D data by a scalar factor
* Change zones in 2D maps using conditional masks
* Simple sensitivity analysis
* Comparing simulations and measurements
* Calibration


In [None]:
import os
from gsflow import GsflowModel
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
## The GSFLOW model is loaded as follows
control_file = os.path.join(".", "data", "sagehen", "gsflow", "saghen_new_cont.control")
gs = GsflowModel.load_from_file(control_file)


In [None]:
#gs.prms.Data.data_df.to_csv("temp.csv")
gs.prms.parameters.record_names

### (1) Visualize PRMS data

In [None]:
print(gs.prms.parameters.get_record("ssr2gw_rate"))

In [None]:
Params = gs.prms.parameters
gs.prms.parameters.get_values("ssr2gw_rate")

In [None]:
nrow = 77
ncol = 84
ssr2gw_rate = Params.get_values("ssr2gw_rate").reshape(nrow, ncol)
plt.imshow(ssr2gw_rate)
plt.colorbar();

In [None]:
# 2D data
param = gs.prms.parameters.get_record('rain_adj')
print(param)

In [None]:
rainadj = gs.prms.parameters.get_values('rain_adj')
rainadj = rainadj.reshape(12,nrow,ncol)
fig = plt.figure(figsize=(15,5)); 
for mon in range(12): 
    txt = "Mon {}".format(mon+1)
    ax = plt.subplot(2,6,mon+1) 
    ax.set_title(txt)
    img = ax.imshow(rainadj[mon,:,:], cmap='jet')
    
    fig.colorbar(img, ax= ax)
    ax.set_aspect('auto')
plt.tight_layout(h_pad=1)    



### (2) Scale parameter by a scalar value

In this exercise, let us change the model mode to "PRMS" becuase it's quick to run it. The same exercise can be made using GSFLOW model; however, runing the model will take a longer period of time

In [None]:
gs.control.set_values("model_mode", ['PRMS'])

In [None]:
gs.write_input(workspace=os.path.join(".", "data", "temp"))

In [None]:
gs.gsflow_exe = os.path.abspath(os.path.join("..", "bin", "gsflow.exe"))
gs.run_model()

In [None]:
gs.prms.get_StatVar()

In [None]:
gs.prms.stat.stat_df


In [None]:
basic_cfs_before = gs.prms.stat.stat_df['basin_cfs_1'].values

### Adjusting values for calibration

we can use pygsflow to change parameter values and display the results of our adjustments

we'll start by adjusting `rain_adj` and `snow_adj`

In [None]:
## save a copy of rain_adj before we change ir
rain_adj_backup = np.copy(gs.prms.parameters.get_values('rain_adj'))
snow_adj_backup = np.copy(gs.prms.parameters.get_values('snow_adj'))

In [None]:
## Let us just remove the effect of rain by assiging rain adj factor to 0.0
newrainadj = rain_adj_backup * 0.0
gs.prms.parameters.set_values('rain_adj', newrainadj)
gs.prms.parameters.set_values('snow_adj', 0.0 * snow_adj_backup)

# Write input files
gs.write_input(workspace=os.path.join(".", "data", "temp", "prms_files"))

# run
gs.run_model()

# Get results
gs.prms.get_StatVar() 
basic_cfs_after = gs.prms.stat.stat_df['basin_cfs_1'].values

In [None]:
dates = gs.prms.stat.stat_df['Date'].values
plt.figure(figsize=(10,7))
plt.plot(dates, (basic_cfs_before), 'b', label = 'before' )
plt.plot(dates,(basic_cfs_after), 'r', label = 'after' )
plt.legend()

### (2) Use "array masking" to change parameters


In [None]:
# masking
Params = gs.prms.parameters
hru_elev = np.copy(Params.get_values("hru_elev"))
hru_elev = hru_elev.reshape(nrow, ncol)
plt.imshow(hru_elev)
plt.colorbar();

In [None]:
mask = hru_elev<7000
plt.imshow(mask); # ; plt.colorbar()

In [None]:
rain_adj_jan = np.copy(rainadj[0,:,:])
rain_adj_jan[mask] = rain_adj_jan[mask] * 0.8
plt.imshow(rain_adj_jan)
plt.colorbar();

### Can you change `ssr2_gw_rate` values in subbasin 10?

In [None]:
hru_subbasin = gs.mf.uzf.irunbnd.array
plt.imshow(hru_subbasin)
plt.colorbar();

In [None]:
ssr2gw_rate = Params.get_values("ssr2gw_rate").reshape(nrow, ncol)
plt.imshow(ssr2gw_rate)
plt.colorbar();

In [None]:
ssr2gw_rate[hru_subbasin==10] = ssr2gw_rate[hru_subbasin==10] * 0.0
plt.imshow(ssr2gw_rate)
plt.colorbar();

In [None]:
# The mask can also be complex 
mask2 = np.logical_and(hru_subbasin==10, hru_elev <7500)
plt.imshow(mask2);

## (3) Sensitivity Analysis

Before Calibration, let us do sensitivity analysis.... a simple one!

In [None]:
%%time
#parameters = ['gwflow_coef', 'gwsink_coef', 'ssr2gw_rate', 'slowcoef_lin', 'slowcoef_sq', 'smidx_coef', 
#              'snarea_curve', 'snarea_thresh']

parameters = ['snarea_curve', 'ssr2gw_rate',  'gwflow_coef']
control_file = os.path.join(".", "data", "sagehen", "gsflow", "saghen_new_cont.control")


# load model files
gs = GsflowModel.load_from_file(control_file=control_file)
gs.gsflow_exe = os.path.abspath(os.path.join("..", "bin", "gsflow.exe"))

# Let us use PRMS
gs.control.set_values("model_mode", ['PRMS'])
gs.control.set_values("print_debug", [-1]) # this will turn off screen printout and reduce output to the notebook
gs.control.set_values(name='end_time', values=[1990,1,1,0,0,0])

Params = gs.prms.parameters

# Save a copy of parameter values before they changes
orig_params = {}
for par in parameters:
    orig_params[par] = np.copy(Params.get_values(par))

orig_params

##### Automation of Sensitivity Analysis
We are going to use two loops: loop over parameters and loop over scaling factor. Within the parameter loop, the values of the parameter will be changed by multipling it by a list of scaling factors.

In [None]:
%%time
# Parameters to be explored....
#parameters = ['gwflow_coef', 'gwsink_coef', 'ssr2gw_rate', 'slowcoef_lin', 'slowcoef_sq', 'smidx_coef',
#              'snarea_curve', 'snarea_thresh']
parameters = ['snarea_curve', 'ssr2gw_rate',  'gwflow_coef']
factors = [0.5, 0.75, 1.0, 1.25, 1.5]
all_sens = {}

## Nested Loop
for par in parameters: # loop over parameters   
    responce = []
    for facc in factors: # loop over scaling factor       
        print("\n****** Par =  {}, Facc = {} ****** ".format(par, facc))
        
        Params.set_values(name=par, values = orig_params[par]*facc) # change the parameter  
        
        gs.write_input(workspace=os.path.join(".", "data", "temp"))                   # write input
        
        gs.run_model()                                              # run the model and wait for results
        
        gs.prms.get_StatVar()                                       # load results from Statvar 
        
        mean_outflow = gs.prms.stat.stat_df['basin_cfs_1'].mean()   # compute the mean stream flow
        
        responce.append(mean_outflow)                               # save results
    
    all_sens[par] = responce
    Params.set_values(name=par, values = orig_params[par])


In [None]:
plt.figure(figsize=(15,8))
for par in all_sens.keys():
    plt.plot(factors, all_sens[par], label = par, marker=11 )
plt.ylabel("Average Stream Flow (cfs)")
plt.xlabel("Scaling Factor")
plt.legend()
    

## (4) Simple Calibration 


### (4.a) Error Evaluation


In [None]:
# Before we start let us load the model
control_file = os.path.join(".", "data", "sagehen", "gsflow", "saghen_new_cont.control")
gs = GsflowModel.load_from_file(control_file)
gs.gsflow_exe = os.path.abspath(os.path.join("..", "bin", "gsflow.exe"))
gs.control.set_values("model_mode", ['PRMS']) # todo: remove list requirement when no list is necessary. Provide check on datatype
gs.control.set_values("print_debug", [-1]) # this will turn off screen printout and reduce output to the notebook
gs.control.set_values(name='end_time', values=[1990,1,1,0,0,0])
Params = gs.prms.parameters

gs.write_input(workspace=os.path.join(".", "data", "temp"))              # write input
gs.run_model()                                         # run the model
gs.prms.get_StatVar()  
# gs.run_model()   


#### Compare measurements and simulations

In [None]:
# Dialy Stream flow
# get measurments within the simulation period...
meas_data = gs.prms.data.data_df  # Data object in PRMS
mask = np.logical_and(meas_data['Date']>=pd.Timestamp('1982-08-01'),  meas_data['Date']<=pd.Timestamp('1990-01-11'))
measure_data = gs.prms.data.data_df[mask]

# get simulation results
simData = gs.prms.stat.stat_df  # Results from StatVar file

# plot simulated vs. measured flow
ax = measure_data.plot(x= 'Date' , y= 'runoff_0', color = 'r', label = 'Measured')
simData.plot(ax=ax, x= 'Date' , y= 'basin_cfs_1', color = 'b', label = 'Simulated', figsize = (15,8))
plt.legend()
measure_data

#### Monthly Stream flow


In [None]:
# Monthly average for streamflow measurements
monthly_meas = measure_data.groupby([measure_data.Date.dt.year, measure_data.Month]).mean()
monthly_sim = simData.groupby([simData.Date.dt.year, simData.Month]).mean()

# Plots
plt.figure(figsize=(15,7))

plt.subplot(1,2,1)
plt.plot(monthly_meas['runoff_0'].values, 'r', label = 'Measured')
plt.plot(monthly_sim['basin_cfs_1'].values, 'b', label = 'Simulated')
plt.xlabel("Month Number after starting Year (1982)")
plt.ylabel("Flow (cfs)")
plt.legend()

plt.subplot(1,2,2)
plt.scatter(monthly_meas['runoff_0'].values,monthly_sim['basin_cfs_1'].values ); plt.plot([0,140],[0,140],'r')
plt.xlabel("Measured flow (cfs)")
plt.ylabel("Simulated flow (cfs)")


### (4.b) Compute Error Metric

In [None]:
def calculate_error(gs):
    """
    This function calculates mean squared error for simulated stream flow.
    """
    meas_data = gs.prms.data.data_df # get measured data
    # Get data for simulated period
    mask = np.logical_and(meas_data['Date']>=pd.Timestamp('1982-08-01'),  meas_data['Date']<=pd.Timestamp('1990-01-01'))    
    measure_data = gs.prms.data.data_df[mask]
    
    # get simulated values
    simData = gs.prms.stat.stat_df
    
    # compute error
    err = simData['basin_cfs_1'].values - measure_data['runoff_0'].values
    # compute mean squared error
    rmse = (np.mean(np.power(err, 2.0)))**0.5
    return rmse

### (4.c) Simple Calibration
Change one parameter at a time. This method can be useful when input parameters are independent.
<img src = ".\figures\calibration_cycle.png">

In [None]:
orig_params.keys()

In [None]:
%%time
parameters = ['gwflow_coef', 'snarea_curve', 'ssr2gw_rate']
factors = [ 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25]
all_errors = {}
for par in parameters:    
    errors = []
    for facc in factors:
        print("\n****** Par =  {}, Facc = {} ****** ".format(par, facc))
        
        # change input parameters
        Params.set_values(name=par, values = orig_params[par]*facc) # change the parameter
        
        # write input files
        gs.write_input(workspace=os.path.join(".", "data", "temp"))              # write input
        
        # run the model
        gs.run_model()                                              # run the model
        
        # load the results
        gs.prms.get_StatVar()                                       # load load results from Statvar 
        
        # calculate the error
        err = calculate_error(gs)                                    # compute the mean square error
        errors.append(err)
        
    all_errors[par] = errors
    Params.set_values(name=par, values=orig_params[par])
   


In [None]:
plt.figure(figsize=(15,5))
i = 1
for par in all_errors.keys():
    plt.subplot(1,3,i)
    plt.plot(factors, all_errors[par], label = par, marker='s' )
    if i == 1:
        plt.ylabel("Mean Squared Error " + r"$(ft^{3})$")
    i = i + 1
    plt.ylim([20, 25])
    plt.title(par)

plt.show()
    

### Remember: This calibration method is usefull only if input parameters are independent. In external calibration software, parameters can be changed and evaluated concurrently.