In [12]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [13]:
data = pd.read_csv("../Data/Biometric Data Analysis/train.csv").fillna(0)
data.drop("id", axis=1, inplace=True)

In [14]:
data_rho_25 = data[data['rho'] == 25]
data_rho_20 = data[data['rho'] == 20]
data_rho_15 = data[data['rho'] == 15]
data_rho_10 = data[data['rho'] == 10]

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix



In [17]:
def XY(df):
    X = df.iloc[:,:-4]
    y_hhb = df.iloc[:,-4]
    y_hhbo2 = df.iloc[:,-3]
    y_ca = df.iloc[:,-2]
    y_na = df.iloc[:,-1]
    
    return X, y_hhb, y_hhbo2, y_ca, y_na

### Linear Regression

In [27]:
# Linear Regression
from sklearn.linear_model import LinearRegression

def Linear_Regression(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = LinearRegression()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = LinearRegression()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = LinearRegression()
    model_ca.fit(X3_train, y3_train)
    
    model_na = LinearRegression()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

In [37]:
print("Linear Regression")
hbb_25, hbo2_25, ca_25, na_25 = Linear_Regression(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Linear_Regression(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Linear_Regression(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Linear_Regression(data_rho_10, 10)

Linear Regression
25  mm
RMSE - hhb  : 2.930094
RMSE - hbo2 : 0.976743
RMSE - ca   : 3.179317
RMSE - na   : 2.495150
20  mm
RMSE - hhb  : 13.457468
RMSE - hbo2 : 1.228934
RMSE - ca   : 3.318765
RMSE - na   : 2.165221
15  mm
RMSE - hhb  : 2.864130
RMSE - hbo2 : 1.370279
RMSE - ca   : 3.161002
RMSE - na   : 2.235443
10  mm
RMSE - hhb  : 2.674224
RMSE - hbo2 : 1.029513
RMSE - ca   : 3.119747
RMSE - na   : 1.864971


### TheilSenRegressor

In [42]:
# TheilSenRegressor
from sklearn.linear_model import  TheilSenRegressor

def TheilSen_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = TheilSenRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = TheilSenRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = TheilSenRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = TheilSenRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

In [43]:
print("TheilSenRegressor")
hbb_25, hbo2_25, ca_25, na_25 = TheilSen_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = TheilSen_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = TheilSen_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = TheilSen_Regressor(data_rho_10, 10)

TheilSenRegressor
25  mm
RMSE - hhb  : 2.994742
RMSE - hbo2 : 0.950928
RMSE - ca   : 3.077671
RMSE - na   : 1.884541
20  mm
RMSE - hhb  : 74.509832
RMSE - hbo2 : 2.776281
RMSE - ca   : 3.162600
RMSE - na   : 2.105739
15  mm
RMSE - hhb  : 25.292223
RMSE - hbo2 : 3.315575
RMSE - ca   : 28.146580
RMSE - na   : 9.710598
10  mm
RMSE - hhb  : 8.209610
RMSE - hbo2 : 1.445874
RMSE - ca   : 14.211019
RMSE - na   : 4.823271


### RANSACRegressor

In [44]:
# RANSACRegressor
from sklearn.linear_model import  RANSACRegressor

def RANSAC_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = RANSACRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = RANSACRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = RANSACRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = RANSACRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

In [46]:
print("RANSACRegressor")
hbb_25, hbo2_25, ca_25, na_25 = RANSAC_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = RANSAC_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = RANSAC_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = RANSAC_Regressor(data_rho_10, 10)

RANSACRegressor
25  mm
RMSE - hhb  : 24.352318
RMSE - hbo2 : 1.413500
RMSE - ca   : 3.799792
RMSE - na   : 4.264306
20  mm
RMSE - hhb  : 378.559878
RMSE - hbo2 : 24.133447
RMSE - ca   : 1357.263409
RMSE - na   : 115.556450
15  mm
RMSE - hhb  : 301.397490
RMSE - hbo2 : 20.399617
RMSE - ca   : 314.923702
RMSE - na   : 219.325411
10  mm
RMSE - hhb  : 17.113484
RMSE - hbo2 : 6.989808
RMSE - ca   : 68.583273
RMSE - na   : 22.959117


### RANSACRegressor

In [48]:
# RANSACRegressor
from sklearn.linear_model import  HuberRegressor

def Huber_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = HuberRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = HuberRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = HuberRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = HuberRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("HuberRegressor")
hbb_25, hbo2_25, ca_25, na_25 = Huber_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Huber_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Huber_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Huber_Regressor(data_rho_10, 10)

HuberRegressor


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

25  mm
RMSE - hhb  : 2.995170
RMSE - hbo2 : 1.003571
RMSE - ca   : 3.086868
RMSE - na   : 1.866875


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


20  mm
RMSE - hhb  : 2.907043
RMSE - hbo2 : 0.990710
RMSE - ca   : 3.100670
RMSE - na   : 1.920653


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

15  mm
RMSE - hhb  : 3.033924
RMSE - hbo2 : 0.998973
RMSE - ca   : 3.098365
RMSE - na   : 1.890855
10  mm
RMSE - hhb  : 3.036493
RMSE - hbo2 : 1.052329
RMSE - ca   : 3.050888
RMSE - na   : 1.849535


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


### Support Vector Machine

In [67]:
# SVC
from sklearn.svm import SVR

def Support_Vector_Machine(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = SVR()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = SVR()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = SVR()
    model_ca.fit(X3_train, y3_train)
    
    model_na = SVR()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Support_Vector_Machine")
hbb_25, hbo2_25, ca_25, na_25 = Support_Vector_Machine(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Support_Vector_Machine(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Support_Vector_Machine(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Support_Vector_Machine(data_rho_10, 10)

Support_Vector_Machine
25  mm
RMSE - hhb  : 2.983824
RMSE - hbo2 : 0.986811
RMSE - ca   : 3.060924
RMSE - na   : 1.862710
20  mm
RMSE - hhb  : 2.935460
RMSE - hbo2 : 0.965731
RMSE - ca   : 3.089126
RMSE - na   : 1.910480
15  mm
RMSE - hhb  : 2.800459
RMSE - hbo2 : 1.019873
RMSE - ca   : 3.093567
RMSE - na   : 1.781468
10  mm
RMSE - hhb  : 3.008667
RMSE - hbo2 : 0.983433
RMSE - ca   : 3.004898
RMSE - na   : 1.869402


### Nu Support Vector Regression

In [73]:
from sklearn.svm import NuSVR

def Nu_Support_Vector_Regression(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = NuSVR()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = NuSVR()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = NuSVR()
    model_ca.fit(X3_train, y3_train)
    
    model_na = NuSVR()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Nu_Support_Vector_Regression")
hbb_25, hbo2_25, ca_25, na_25 = Nu_Support_Vector_Regression(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Nu_Support_Vector_Regression(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Nu_Support_Vector_Regression(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Nu_Support_Vector_Regression(data_rho_10, 10)

Nu_Support_Vector_Regression
25  mm
RMSE - hhb  : 2.929986
RMSE - hbo2 : 1.002545
RMSE - ca   : 2.895598
RMSE - na   : 1.841261
20  mm
RMSE - hhb  : 2.965937
RMSE - hbo2 : 1.035463
RMSE - ca   : 3.056275
RMSE - na   : 1.816348
15  mm
RMSE - hhb  : 3.199093
RMSE - hbo2 : 1.053477
RMSE - ca   : 2.904442
RMSE - na   : 1.867375
10  mm
RMSE - hhb  : 3.120643
RMSE - hbo2 : 1.008693
RMSE - ca   : 2.939181
RMSE - na   : 1.868486


### Bayesian Ridge Regression

In [77]:
from sklearn.linear_model import BayesianRidge

def Bayesian_Ridge(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = BayesianRidge()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = BayesianRidge()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = BayesianRidge()
    model_ca.fit(X3_train, y3_train)
    
    model_na = BayesianRidge()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Nu_Support_Vector_Regression")
hbb_25, hbo2_25, ca_25, na_25 = Bayesian_Ridge(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Bayesian_Ridge(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Bayesian_Ridge(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Bayesian_Ridge(data_rho_10, 10)

Nu_Support_Vector_Regression
25  mm
RMSE - hhb  : 2.900236
RMSE - hbo2 : 0.967423
RMSE - ca   : 2.811305
RMSE - na   : 1.876900
20  mm
RMSE - hhb  : 2.859963
RMSE - hbo2 : 1.006424
RMSE - ca   : 3.002404
RMSE - na   : 1.921736
15  mm
RMSE - hhb  : 2.959169
RMSE - hbo2 : 1.007914
RMSE - ca   : 2.874593
RMSE - na   : 1.888983
10  mm
RMSE - hhb  : 2.907267
RMSE - hbo2 : 0.976428
RMSE - ca   : 2.954338
RMSE - na   : 1.915450


### GaussianProcessRegressor

In [63]:
# GaussianProcess_Regressor
from sklearn.gaussian_process import GaussianProcessRegressor

def Gaussian_Process_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    # LinearRegression
    model_hhb = GaussianProcessRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = GaussianProcessRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = GaussianProcessRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = GaussianProcessRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Gaussian_Process_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Gaussian_Process_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Gaussian_Process_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Gaussian_Process_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Gaussian_Process_Regressor(data_rho_10, 10)

Gaussian_Process_Regressor
25  mm
RMSE - hhb  : 6.074225
RMSE - hbo2 : 2.065552
RMSE - ca   : 6.021994
RMSE - na   : 3.698880
20  mm
RMSE - hhb  : 6.484956
RMSE - hbo2 : 2.104626
RMSE - ca   : 5.563105
RMSE - na   : 3.799312
15  mm
RMSE - hhb  : 6.232206
RMSE - hbo2 : 1.972731
RMSE - ca   : 6.209657
RMSE - na   : 3.621058
10  mm
RMSE - hhb  : 6.013664
RMSE - hbo2 : 2.099929
RMSE - ca   : 6.288731
RMSE - na   : 3.445333


### KNN Regressor

In [95]:
from sklearn.neighbors import KNeighborsRegressor

def KNeighbors_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    # LinearRegression
    model_hhb = KNeighborsRegressor(n_neighbors=15, algorithm='brute')
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = KNeighborsRegressor(n_neighbors=5, algorithm='brute')
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = KNeighborsRegressor(n_neighbors=15, algorithm='brute')
    model_ca.fit(X3_train, y3_train)
    
    model_na = KNeighborsRegressor(n_neighbors=15, algorithm='brute')
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Gaussian_Process_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = KNeighbors_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = KNeighbors_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = KNeighbors_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = KNeighbors_Regressor(data_rho_10, 10)

Gaussian_Process_Regressor
25  mm
RMSE - hhb  : 2.953882
RMSE - hbo2 : 1.092772
RMSE - ca   : 2.940028
RMSE - na   : 1.956537
20  mm
RMSE - hhb  : 3.116854
RMSE - hbo2 : 1.098828
RMSE - ca   : 3.198660
RMSE - na   : 1.928618
15  mm
RMSE - hhb  : 3.141732
RMSE - hbo2 : 1.144443
RMSE - ca   : 3.061605
RMSE - na   : 1.932670
10  mm
RMSE - hhb  : 3.145394
RMSE - hbo2 : 1.099162
RMSE - ca   : 3.007200
RMSE - na   : 1.896639


### Multi-layer Perceptron regressor.

In [103]:
from sklearn.neural_network import MLPRegressor

def MLP_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    # LinearRegression
    model_hhb = MLPRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = MLPRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = MLPRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = MLPRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Gaussian_Process_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = MLP_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = MLP_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = MLP_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = MLP_Regressor(data_rho_10, 10)

Gaussian_Process_Regressor
25  mm
RMSE - hhb  : 3.060260
RMSE - hbo2 : 1.009319
RMSE - ca   : 2.871721
RMSE - na   : 1.973638
20  mm
RMSE - hhb  : 2.961148
RMSE - hbo2 : 1.033313
RMSE - ca   : 3.189525
RMSE - na   : 1.942620
15  mm
RMSE - hhb  : 3.128528
RMSE - hbo2 : 1.044665
RMSE - ca   : 2.961931
RMSE - na   : 1.846431
10  mm
RMSE - hhb  : 3.047813
RMSE - hbo2 : 0.978312
RMSE - ca   : 2.880391
RMSE - na   : 1.865479


### RandomForestRegressor

In [114]:
from sklearn.ensemble import RandomForestRegressor

def Random_Forest_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    # LinearRegression
    model_hhb = RandomForestRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = RandomForestRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = RandomForestRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = RandomForestRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Random_Forest_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Random_Forest_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Random_Forest_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Random_Forest_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Random_Forest_Regressor(data_rho_10, 10)

Random_Forest_Regressor
25  mm
RMSE - hhb  : 3.002657
RMSE - hbo2 : 1.004665
RMSE - ca   : 3.068782
RMSE - na   : 1.899920
20  mm
RMSE - hhb  : 3.107828
RMSE - hbo2 : 1.032148
RMSE - ca   : 3.102367
RMSE - na   : 2.012158
15  mm
RMSE - hhb  : 3.138550
RMSE - hbo2 : 1.054000
RMSE - ca   : 3.030838
RMSE - na   : 1.978374
10  mm
RMSE - hhb  : 3.001638
RMSE - hbo2 : 0.965221
RMSE - ca   : 3.193956
RMSE - na   : 1.978656


### DecisionTreeRegressor

In [115]:
 from sklearn.tree import DecisionTreeRegressor

def Decision_Tree_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    model_hhb = DecisionTreeRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = DecisionTreeRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = DecisionTreeRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = DecisionTreeRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Decision_Tree_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Decision_Tree_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Decision_Tree_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Decision_Tree_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Decision_Tree_Regressor(data_rho_10, 10)

Decision_Tree_Regressor
25  mm
RMSE - hhb  : 4.147803
RMSE - hbo2 : 1.338928
RMSE - ca   : 4.210985
RMSE - na   : 2.611998
20  mm
RMSE - hhb  : 4.138034
RMSE - hbo2 : 1.432491
RMSE - ca   : 4.475728
RMSE - na   : 2.672221
15  mm
RMSE - hhb  : 4.417350
RMSE - hbo2 : 1.431714
RMSE - ca   : 4.132148
RMSE - na   : 2.658598
10  mm
RMSE - hhb  : 4.140656
RMSE - hbo2 : 1.403357
RMSE - ca   : 4.222129
RMSE - na   : 2.747112


### ExtraTreesRegressor

In [116]:
from sklearn.ensemble import ExtraTreesRegressor

def Extra_Trees_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    model_hhb = ExtraTreesRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = ExtraTreesRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = ExtraTreesRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = ExtraTreesRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Gaussian_Process_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Extra_Trees_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Extra_Trees_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Extra_Trees_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Extra_Trees_Regressor(data_rho_10, 10)

Gaussian_Process_Regressor
25  mm
RMSE - hhb  : 3.074519
RMSE - hbo2 : 1.075853
RMSE - ca   : 3.064700
RMSE - na   : 1.956844
20  mm
RMSE - hhb  : 3.018425
RMSE - hbo2 : 1.099322
RMSE - ca   : 3.119375
RMSE - na   : 1.999367
15  mm
RMSE - hhb  : 3.182450
RMSE - hbo2 : 1.059620
RMSE - ca   : 3.131628
RMSE - na   : 2.004467
10  mm
RMSE - hhb  : 3.130679
RMSE - hbo2 : 1.078126
RMSE - ca   : 3.091208
RMSE - na   : 1.913916


### Passive Aggressive Regression

In [131]:
from sklearn.linear_model import PassiveAggressiveRegressor

from sklearn.ensemble import ExtraTreesRegressor

def Passive_Aggressive_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    model_hhb = PassiveAggressiveRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = PassiveAggressiveRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = PassiveAggressiveRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = PassiveAggressiveRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Passive_Aggressive_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Passive_Aggressive_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Passive_Aggressive_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Passive_Aggressive_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Passive_Aggressive_Regressor(data_rho_10, 10)

Passive_Aggressive_Regressor
25  mm
RMSE - hhb  : 3.034069
RMSE - hbo2 : 1.115109
RMSE - ca   : 3.623148
RMSE - na   : 2.851420
20  mm
RMSE - hhb  : 2.970067
RMSE - hbo2 : 1.126624
RMSE - ca   : 4.245566
RMSE - na   : 2.028723
15  mm
RMSE - hhb  : 3.632398
RMSE - hbo2 : 1.272885
RMSE - ca   : 3.767871
RMSE - na   : 1.983791
10  mm
RMSE - hhb  : 4.155879
RMSE - hbo2 : 1.047722
RMSE - ca   : 3.270661
RMSE - na   : 2.944348


### GradientBoostingRegressor

In [132]:
from sklearn.ensemble import GradientBoostingRegressor

def Gradient_Boosting_Regressor(df, num):
    X, y_hhb, y_hbo2, y_ca, y_na = XY(df)
    
    # Split data
    
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=123)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hbo2, test_size=0.2, shuffle=124)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=124)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=124)
    
    model_hhb = GradientBoostingRegressor()
    model_hhb.fit(X1_train, y1_train)
    
    model_hbo2 = GradientBoostingRegressor()
    model_hbo2.fit(X2_train, y2_train)
    
    model_ca = GradientBoostingRegressor()
    model_ca.fit(X3_train, y3_train)
    
    model_na = GradientBoostingRegressor()
    model_na.fit(X4_train, y4_train)
                 
    preds_hhb = model_hhb.predict(X1_test)
    preds_hbo2 = model_hbo2.predict(X2_test)
    preds_ca = model_ca.predict(X3_test)
    preds_na = model_na.predict(X4_test)
    
    rmse_hhb = np.sqrt(mean_squared_error(y1_test, preds_hhb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hhb  : %f" % (rmse_hhb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return model_hhb, model_hbo2, model_ca, model_na

print("Gradient_Boosting_Regressor")
hbb_25, hbo2_25, ca_25, na_25 = Gradient_Boosting_Regressor(data_rho_25, 25)
hbb_20, hbo2_20, ca_20, na_20 = Gradient_Boosting_Regressor(data_rho_20, 20)
hbb_15, hbo2_15, ca_15, na_15 = Gradient_Boosting_Regressor(data_rho_15, 15)
hbb_10, hbo2_10, ca_10, na_10 = Gradient_Boosting_Regressor(data_rho_10, 10)

Gradient_Boosting_Regressor
25  mm
RMSE - hhb  : 3.003722
RMSE - hbo2 : 1.004566
RMSE - ca   : 2.851129
RMSE - na   : 1.922602
20  mm
RMSE - hhb  : 2.989403
RMSE - hbo2 : 1.005734
RMSE - ca   : 3.049165
RMSE - na   : 1.924703
15  mm
RMSE - hhb  : 2.969965
RMSE - hbo2 : 1.072023
RMSE - ca   : 3.137761
RMSE - na   : 1.958281
10  mm
RMSE - hhb  : 2.965910
RMSE - hbo2 : 1.019958
RMSE - ca   : 3.136479
RMSE - na   : 1.836411
