# Linear regression for predicting streamflow error in PCR model

A linear regression is fitted to the output of the PCR model and meteorological data to predict its error.

In [None]:
!pip install hydroeval

In [2]:
#set the model parameters
RUNS = 5              #number of runs to perform
LAG = True           #use lagged variables or not
LOCATION = 'lobith'   #set the location

#import libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from datetime import datetime
import hydroeval as he

#set input and output folders
in_folder = f'/content/drive/MyDrive/ADS/Final Thesis Project/temp/formatted_data/{LOCATION}/'
out_folder = f'/content/drive/MyDrive/ADS/Final Thesis Project/temp/predictions/{LOCATION}/'

#initiate the summary string by writing the current time and date
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

mlr_summary = dt_string + '\n'
pcr_summary = dt_string + '\n'

#loop for all test set possibilities
for i in range(1,RUNS+1):

  #read the data according to the mode
  path = in_folder + 'no_'*(not LAG) + f'lag_{i}.npz'
  with np.load(path) as f:
    X_train, X_test, y_train, y_test, obs = list(f.values())

  #fit the model and make a prediction
  lm = LinearRegression()
  lm.fit(X_train,y_train)
  y_pred =  lm.predict(X_test)

  #compute the MLR and PCR predictions and evaluate them
  pcr = obs - y_test
  pred = pcr + y_pred

  nse = he.evaluator(he.nse, pred, obs)
  kge, r, alpha, beta = he.evaluator(he.kge, pred, obs)

  nse_pcr = he.evaluator(he.nse, pcr, obs)
  kge_pcr, r, alpha, beta = he.evaluator(he.kge, pcr, obs)

  #write the results in the log string
  mlr_summary += f'***\nModel: MLR\nLag: {LAG}\nTest split: {i}\nNSE: {nse[0]}\nKGE: {kge[0]}\n'
  pcr_summary += f'***\nModel: PCR\nTest split: {i}\nNSE: {nse_pcr[0]}\nKGE: {kge_pcr[0]}\n'

  #save the predictions in the drive
  np.save(out_folder + 'no_'*(not LAG) + f'lag/pcr/pcr_test_{i}', pcr)
  np.save(out_folder + 'no_'*(not LAG) +f'lag/mlr/mlr_test_{i}', pred)

#save the results
with open(out_folder + 'no_'*(not LAG) + 'lag/mlr/mlr_summary.txt', 'w') as f:
    f.write(mlr_summary)

with open(out_folder + 'no_'*(not LAG) + 'lag/pcr/pcr_summary.txt', 'w') as f:
    f.write(pcr_summary)