In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR

**Read data**

In [None]:
df=pd.read_csv("/Users/rahuljauhari/Desktop/research runoff/final destination/merged_imd.csv")
df.drop(columns=['Unnamed: 0'],inplace=True)

In [None]:
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.set_index('DateTime', inplace=True)

In [None]:
monthly_mean = df.resample('M').mean()

In [None]:
# pca on monthly mean 
from sklearn.decomposition import PCA
pca = PCA(n_components=100)
pca.fit(monthly_mean)
monthly_mean_pca = pca.transform(monthly_mean)

In [None]:
monthly_mean_pca[0:10]

**Actual value**

In [None]:
df_actual=pd.read_excel("/Users/rahuljauhari/Desktop/research runoff/Calibrated and Validated.xlsx")

In [None]:
# select last column
observed_runnoff=df_actual['observed']

**Normalization**

In [None]:
from scipy.stats import zscore
def func(name):
    x=0
    y=0
    if name=='zscore':
        x_norm = zscore(monthly_mean_pca)
        y_norm = zscore(observed_runnoff)
        x_norm[x_norm > 3] = 2.8
        x_norm[x_norm < -3] = -2.8
        y_norm[y_norm >3] = 2.8
        y_norm[y_norm < -3] = -2.8
        x=x_norm
        y=y_norm
    if name=='StandardScaler':
        scaler = StandardScaler()
        x_scaled = scaler.fit_transform(monthly_mean_pca)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1,1))
        x_scaled[x_scaled > 3] = 2.8
        x_scaled[x_scaled < -3] = -2.8
        y_scaled[y_scaled >3] = 2.8
        y_scaled[y_scaled < -3] = -2.8
        x=      x_scaled  
        y=y_scaled
    if name == 'MinMaxScaler':
        scaler = MinMaxScaler(feature_range=(0,1))
        x_scaled = scaler.fit_transform(monthly_mean_pca)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1,1))
        x=      x_scaled  
        y=y_scaled
    return x,y

In [None]:
from sklearn.metrics import mean_squared_error
def rmse1(yt, yp): #lower the better
    return np.sqrt(mean_squared_error(yt, yp))
# Kling-Gupta effciency
def kge1(yt, yp): #highqer the better
    r = np.corrcoef(yt, yp,rowvar=False)[0, 1]
    alpha = np.std(yp) / np.std(yt)
    beta = np.mean(yp) / np.mean(yt)
    return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)
# Normalized standard Error 
def nse1(yt, yp): 
    return 1 - np.sum((yt - yp)**2) / np.sum((yt - np.mean(yt))**2)
    # r squared
def r21(yt, yp): #higher the better
    return 1 - np.sum((yt - yp)**2) / np.sum((yt - np.mean(yt))**2)

**Train test split**

In [None]:
from sklearn.model_selection import train_test_split
x,y=func("MinMaxScaler")
X_train, X_test,y_train,y_test = train_test_split(x,y,test_size=0.3,shuffle=False)

**SVR**

In [None]:
SVR_model = SVR()
SVR_model.fit(X_train, y_train.ravel())
y_pred_train = SVR_model.predict(X_train)
y_pred = SVR_model.predict(X_test)
# sgd optimizer


In [None]:
print("RMSE: ", rmse1(y_train, y_pred_train))
print("KGE: ", kge1(y_train, y_pred_train))
print("R2: ", r21(y_train, y_pred_train))

In [None]:
print("RMSE: ", rmse1(y_test, y_pred))
print("KGE: ", kge1(y_test, y_pred))
print("R2: ", r21(y_test, y_pred))

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
kernel = ['linear', 'poly', 'rbf', 'sigmoid']
C = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
gamma = ['scale', 'auto']
degree = [0, 1, 2, 3, 4, 5, 6]
epsilon = [0.001, 0.01, 0.1, 1, 10]
coef0 = [0.001, 0.01, 0.1, 1, 10]
tol = [0.0001, 0.001, 0.01, 0.1, 1, 10]
shrinking = [True, False]
random_grid = {'kernel': kernel, 'C': C, 'gamma': gamma, 'degree': degree, 'epsilon': epsilon, 'coef0': coef0, 'tol': tol, 'shrinking': shrinking}
SVR_model = SVR()
cv = KFold(n_splits=5, shuffle=False)
SVR_random = RandomizedSearchCV(estimator = SVR_model, param_distributions = random_grid, n_iter = 500, cv = cv, verbose=0, n_jobs = -1)
SVR_random.fit(X_train, y_train.ravel())


In [None]:
print(SVR_random.best_params_)
mod = SVR_random.best_estimator_

In [None]:
y_pred_train = mod.predict(X_train)
y_pred = mod.predict(X_test)

In [None]:
print("RMSE: ", rmse1(y_train, y_pred_train))
print("KGE: ", kge1(y_train, y_pred_train))
print("R2: ", r21(y_train, y_pred_train))

In [None]:
print("RMSE: ", rmse1(y_test, y_pred))
print("KGE: ", kge1(y_test, y_pred))
print("R2: ", r21(y_test, y_pred))