In [1]:
import time
import datetime
import pandas as pd
import numpy as np
from numpy import mean, std
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import cross_val_score, RepeatedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.utils import resample
from sklearn.feature_selection import RFECV

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential

from time import time

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.utils import shuffle

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.svm import SVR, LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import StackingRegressor
import catboost as cb

from sklearn.model_selection import KFold

  from pandas import MultiIndex, Int64Index


## Load data

In [2]:
df = pd.read_csv('rawdata_775_CostScale.csv', encoding='latin1')

In [3]:
unscaled_data = df.drop(['no','type', 'duration'], axis=1)
unscaled_inputs = df.drop(['no','type', 'buildingcost', 'duration'], axis=1)
unscaled_targets = df['buildingcost']
unscaled_data.describe()

Unnamed: 0,totalarea,sitearea,buildingarea,typicalfloorheight,totalheight,basement,groundlevel,parkinglot,type1,type2,...,type4,type5,type6,type7,type8,type9,type10,type11,year,buildingcost
count,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0,...,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0,775.0
mean,8135.758903,49807.33,2628.330041,4.144555,20.486452,0.876129,4.113548,94.811613,0.091613,0.179355,...,0.019355,0.068387,0.037419,0.037419,0.042581,0.242581,0.04,0.010323,2017.767742,10260.298065
std,5342.883743,141916.8,1791.675555,0.684189,7.851161,0.581929,1.866337,147.167558,0.288665,0.383897,...,0.137858,0.252572,0.18991,0.18991,0.20204,0.42892,0.196086,0.10114,1.858076,6113.239972
min,325.0,482.0,122.84,2.4,5.2,0.0,1.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015.0,251.0
25%,3587.205,5949.5,1294.36,3.7,15.925,1.0,3.0,38.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2016.0,5164.5
50%,7014.75,13210.0,2236.31,4.2,19.3,1.0,4.0,64.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018.0,9515.0
75%,11880.94,26140.2,3643.135,4.2,22.775,1.0,5.0,107.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019.0,14215.0
max,36699.07,1320256.0,11392.0,8.0,70.0,4.0,20.0,1830.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2021.0,28622.0


## Data preprocessing

In [4]:
Feature_Names = ['totalarea', 'buildingarea', 'totalheight', 'basement', 
                 'groundlevel', 'parkinglot']

unscaled_X = unscaled_data[Feature_Names]
unscaled_y = unscaled_data['buildingcost']

X = unscaled_data[Feature_Names].to_numpy()
y = unscaled_data['buildingcost'].to_numpy()

#----------------------------------------------------------------------------------------------------------
#  min, max values of Cost in the training data
#----------------------------------------------------------------------------------------------------------
ymin = min(y)
ymax = max(y)

multiplier = ymax - ymin

# Feature scaling required for neural network
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(X)
scaled_X = scaler.transform(X)
scaler.fit(y.reshape(-1, 1))
scaled_y = scaler.transform(y.reshape(-1, 1))

scaled_X_df = pd.DataFrame(scaled_X)
scaled_y_df = pd.DataFrame(scaled_y)

# prepare cross validation
kfold = KFold(n_splits= 5, shuffle=True, random_state = 0 )

In [5]:
#################################################################################################
np.random.seed(7)
scoring_param = make_scorer(mean_squared_error,greater_is_better=False)
#################################################################################################

## Neural Networks

In [7]:
n_features = scaled_X.shape[1]

ann = Sequential()
ann.add(Dense(50, activation='relu', kernel_initializer='normal', input_shape=(n_features,)))
ann.add(Dense(30, activation='relu', kernel_initializer='normal'))
#ann.add(Dropout(0.1))
#ann.add(Dense(10, activation='relu', kernel_initializer='he_normal'))
ann.add(Dense(1, activation='sigmoid'))

Adam_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
ann.compile(optimizer=Adam_optimizer, loss='mean_squared_error')
max_epochs =100
early_stopping = tf.keras.callbacks.EarlyStopping(patience = 30)

ann_train_rmse = []; ann_train_RSQ = []; ann_train_MBE = []
ann_test_rmse = []; ann_test_RSQ = []; ann_test_MBE = []

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    ann.fit(scaled_X_train, scaled_y_train, epochs=max_epochs, batch_size=16, 
                    callbacks=[early_stopping],
                    validation_data = (scaled_X_test, scaled_y_test),
                    verbose=0, use_multiprocessing=True)
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = ann.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    ann_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    ann_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    ann_train_MBE.append(train_MBE)

    Y_hat = ann.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    ann_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    ann_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    ann_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(ann_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(ann_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(ann_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(ann_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(ann_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(ann_test_MBE, columns = {'MBE_test'})
ann_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)

ann_results_df



Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1972.871741,0.898392,-40.006881,1949.134654,0.88623,16.583419
1,1970.807203,0.894887,-237.752692,2034.518896,0.892556,-279.951321
2,1911.953825,0.900886,-313.219982,2191.857083,0.87709,-114.427575
3,1982.166185,0.891235,345.322233,2004.269452,0.90412,556.339851
4,2027.436862,0.892755,580.210491,1881.699295,0.89368,229.018152


## Linear Regression

In [17]:
lr_train_rmse = []; lr_train_RSQ = []; lr_train_MBE = []
lr_test_rmse = []; lr_test_RSQ = []; lr_test_MBE = []

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    lin_reg = LinearRegression().fit(scaled_X_train, scaled_y_train)
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = lin_reg.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    lr_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    lr_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    lr_train_MBE.append(train_MBE)

    Y_hat = lin_reg.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    lr_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    lr_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    lr_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(lr_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(lr_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(lr_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(lr_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(lr_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(lr_test_MBE, columns = {'MBE_test'})
lr_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)

lr_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,2009.403522,0.894594,-4.990959e-13,1949.330012,0.886207,60.523613
1,1985.525825,0.893311,2.795906e-12,2053.284259,0.890565,-122.5753
2,1947.391936,0.897178,-4.288348e-13,2182.18907,0.878172,196.611684
3,2014.279249,0.887682,1.361611e-12,1964.609038,0.907877,137.922433
4,2001.426834,0.895489,-2.619042e-12,1980.58413,0.882212,-289.317064


## Decision Tree

In [30]:
dt_train_rmse = []; dt_train_RSQ = []; dt_train_MBE = []
dt_test_rmse = []; dt_test_RSQ = []; dt_test_MBE = []
dt = DecisionTreeRegressor(criterion = 'absolute_error', 
                             min_samples_split = 25, 
                             max_depth = 6,
                             min_samples_leaf = 10,
                             max_leaf_nodes = 30)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    dt.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = dt.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    dt_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    dt_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    dt_train_MBE.append(train_MBE)

    Y_hat = dt.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    dt_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    dt_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    dt_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(dt_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(dt_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(dt_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(dt_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(dt_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(dt_test_MBE, columns = {'MBE_test'})
dt_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
dt_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1889.686212,0.906779,194.143548,2211.39453,0.853554,244.519355
1,1876.600429,0.904696,184.591935,2136.674929,0.881495,55.83871
2,1879.654466,0.904206,212.322581,2306.998743,0.863838,385.345161
3,1927.271192,0.897176,144.095161,2231.225653,0.881176,308.36129
4,1859.790629,0.909757,139.501613,2127.82675,0.864047,-130.835484


## Random Forest

In [18]:
rf_train_rmse = []; rf_train_RSQ = []; rf_train_MBE = []
rf_test_rmse = []; rf_test_RSQ = []; rf_test_MBE = []
rf = RandomForestRegressor(n_estimators=715, max_depth=5, min_samples_leaf=4)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    rf.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = rf.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    rf_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    rf_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    rf_train_MBE.append(train_MBE)

    Y_hat = rf.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    rf_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    rf_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    rf_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(rf_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(rf_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(rf_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(rf_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(rf_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(rf_test_MBE, columns = {'MBE_test'})
rf_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
rf_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1645.832381,0.929286,7.250498,1999.862131,0.880231,59.236341
1,1636.14612,0.927554,15.40496,1949.173235,0.901381,-71.359931
2,1618.355382,0.928989,7.785571,2086.966938,0.888572,212.372186
3,1642.104793,0.925353,11.472302,2072.613544,0.897469,176.729258
4,1622.098592,0.93135,3.90538,1984.960327,0.881691,-263.043566


## XGBoost

In [19]:
xgb_train_rmse = []; xgb_train_RSQ = []; xgb_train_MBE = []
xgb_test_rmse = []; xgb_test_RSQ = []; xgb_test_MBE = []

xgb = XGBRegressor(n_estimators =200, max_depth=3, learning_rate = 0.023)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    xgb.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = xgb.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    xgb_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    xgb_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    xgb_train_MBE.append(train_MBE)

    Y_hat = xgb.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    xgb_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    xgb_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    xgb_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(xgb_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(xgb_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(xgb_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(xgb_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(xgb_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(xgb_test_MBE, columns = {'MBE_test'})
xgb_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
xgb_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1573.342054,0.935378,-37.730513,1894.343039,0.892537,-25.810877
1,1565.741266,0.933655,-36.381332,1893.231393,0.906961,-144.168091
2,1545.324067,0.935253,-38.138006,2073.86365,0.889967,200.496032
3,1541.646036,0.934207,-40.019841,2055.20384,0.899184,154.235586
4,1563.117174,0.936252,-37.648933,1956.99334,0.885001,-314.880288


## Light GBM

In [20]:
lgb_train_rmse = []; lgb_train_RSQ = []; lgb_train_MBE = []
lgb_test_rmse = []; lgb_test_RSQ = []; lgb_test_MBE = []

lgb = LGBMRegressor(n_estimators = 100 , max_depth = 4 , learning_rate = 0.056)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    lgb.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = lgb.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    lgb_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    lgb_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    lgb_train_MBE.append(train_MBE)

    Y_hat = lgb.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    lgb_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    lgb_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    lgb_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(lgb_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(lgb_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(lgb_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(lgb_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(lgb_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(lgb_test_MBE, columns = {'MBE_test'})
lgb_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
lgb_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1517.52575,0.939882,5.449447e-06,1892.591146,0.892735,11.953072
1,1539.036111,0.935899,1.612157e-05,1931.674374,0.903144,-54.343644
2,1483.115959,0.940361,7.981356e-06,2067.887157,0.8906,225.249791
3,1476.774505,0.939628,5.174729e-06,2093.287652,0.895414,253.526911
4,1531.309726,0.93882,2.606327e-07,1900.339249,0.891563,-285.400993


## SVR

In [21]:
svr_train_rmse = []; svr_train_RSQ = []; svr_train_MBE = []
svr_test_rmse = []; svr_test_RSQ = []; svr_test_MBE = []

#svr = SVR(C = 10 ,  degree = 2 , epsilon = 0.01, gamma = 0.05)
svr = SVR(kernel = 'linear', C = 400,  degree = 2 , epsilon = 0.005, gamma = 0.01)
for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    svr.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = svr.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    svr_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    svr_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    svr_train_MBE.append(train_MBE)

    Y_hat = svr.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    svr_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    svr_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    svr_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(svr_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(svr_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(svr_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(svr_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(svr_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(svr_test_MBE, columns = {'MBE_test'})
svr_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
svr_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,2024.582058,0.892995,169.331204,1932.405776,0.888175,225.451642
1,2007.347462,0.890953,185.380582,2116.537431,0.883718,78.067972
2,1963.874523,0.89543,179.440808,2197.400154,0.876468,367.393031
3,2023.37295,0.886666,140.308714,1962.197351,0.908103,279.738714
4,2011.811938,0.894401,91.045817,1998.851714,0.880029,-201.324608


## Bagging Tree Regressor

In [28]:
btr_train_rmse = []; btr_train_RSQ = []; btr_train_MBE = []
btr_test_rmse = []; btr_test_RSQ = []; btr_test_MBE = []
dt = DecisionTreeRegressor(criterion = 'mae', 
                             min_samples_split = 25, 
                             max_depth = 6,
                             min_samples_leaf = 10,
                             max_leaf_nodes = 30)
bagging = BaggingRegressor(base_estimator = dt, n_estimators =100)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    bagging.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = bagging.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    btr_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    btr_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    btr_train_MBE.append(train_MBE)

    Y_hat = bagging.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    btr_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    btr_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    btr_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(btr_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(btr_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(btr_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(btr_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(btr_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(btr_test_MBE, columns = {'MBE_test'})
btr_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
btr_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1840.990021,0.911522,161.256145,2005.806917,0.879518,203.339484
1,1844.662509,0.907912,168.56496,1972.576477,0.898999,93.052452
2,1824.068925,0.909788,207.559702,2137.34755,0.883127,416.019742
3,1829.711581,0.907323,130.062637,2142.517869,0.890436,299.493258
4,1819.106824,0.913662,152.961992,2001.912441,0.879661,-117.387935


## Adaboost 

In [26]:
ada_train_rmse = []; ada_train_RSQ = []; ada_train_MBE = []
ada_test_rmse = []; ada_test_RSQ = []; ada_test_MBE = []

ada = AdaBoostRegressor(
    DecisionTreeRegressor(max_depth=4), n_estimators=114, learning_rate = 0.089, random_state=42)

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    ada.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = ada.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    ada_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    ada_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    ada_train_MBE.append(train_MBE)

    Y_hat = ada.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    ada_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    ada_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    ada_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(ada_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(ada_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(ada_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(ada_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(ada_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(ada_test_MBE, columns = {'MBE_test'})
ada_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
ada_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1741.597829,0.920818,-199.664938,2034.565946,0.876038,-184.548881
1,1693.237361,0.92241,-135.615761,2057.461256,0.890119,-202.569658
2,1682.256188,0.92327,-218.447207,2086.576413,0.888614,48.25424
3,1698.587105,0.92013,-178.403931,2121.168062,0.892609,3.789021
4,1688.593495,0.925607,-185.820088,2130.543533,0.8637,-421.36742


## CatBoost regressor 

In [24]:
cat_train_rmse = []; cat_train_RSQ = []; cat_train_MBE = []
cat_test_rmse = []; cat_test_RSQ = []; cat_test_MBE = []

cat_model = cb.CatBoostRegressor(iterations = 250 , learning_rate = 0.05, 
                               depth = 2, l2_leaf_reg = 0.200, verbose=False )
for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    cat_reg = cat_model.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = cat_reg.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    cat_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    cat_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    cat_train_MBE.append(train_MBE)

    Y_hat = cat_reg.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    cat_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    cat_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    cat_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(cat_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(cat_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(cat_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(cat_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(cat_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(cat_test_MBE, columns = {'MBE_test'})
cat_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
cat_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1723.582996,0.922447,0.027149,1921.395494,0.889445,34.498636
1,1722.79016,0.919678,0.035182,1958.625974,0.900422,-112.945542
2,1688.453971,0.922704,0.016303,2129.178761,0.884019,207.275136
3,1726.043642,0.917527,0.038897,2081.142048,0.896624,311.416039
4,1704.047964,0.924239,-0.004584,1925.583896,0.888663,-286.747358


## Stacking Ensemble Method

In [25]:
# define the base models
#Keras Model
def build_nn():
    model= Sequential()
    model.add(Dense(50, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
    model.add(Dense(30, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam_optimizer, loss='mean_squared_error')
    return model

keras_reg = tf.keras.wrappers.scikit_learn.KerasRegressor(build_nn, epochs=500, batch_size=32,verbose=False)
keras_reg._estimator_type = "regressor"

bagging_reg = BaggingRegressor(DecisionTreeRegressor())
ada_reg = AdaBoostRegressor(
    DecisionTreeRegressor(max_depth=4), n_estimators=100, random_state=42)
rf_reg = RandomForestRegressor(n_estimators=194, max_depth=10, min_samples_leaf=3)
xgb_reg = XGBRegressor(n_estimators =200, max_depth=1, learning_rate = 0.1)
lgb_reg = LGBMRegressor(n_estimators = 1000 , max_depth = 2 , learning_rate = 0.012)
svr_reg = SVR(C = 10 ,  degree = 2 , epsilon = 0.01, gamma = 0.05)
cat_reg = cb.CatBoostRegressor(iterations = 250 , learning_rate = 0.05, 
                               depth = 2, l2_leaf_reg = 2.00,verbose=False )

level0 = list()
#level0.append(('MLR', lin_reg))
level0.append(('RF', rf_reg))
#level0.append(('BDT', bagging_reg))
#level0.append(('XRB', xgb_reg))
level0.append(('SVR', svr_reg))
#level0.append(('LGB', lgb_reg))
#level0.append(('ADA', ada_reg))
level0.append(('CAT', cat_reg))
level0.append(('ANN', keras_reg))
#level0.append(('knn', KNeighborsRegressor()))
      
# define meta lelevel1 = lin_regarner model
level1 = lin_reg
# define the stacking ensemble
SRmodel = StackingRegressor(estimators=level0, final_estimator=level1, cv=5)

str_train_rmse = []; str_train_RSQ = []; str_train_MBE = []
str_test_rmse = []; str_test_RSQ = []; str_test_MBE = []

for train_index, test_index in kfold.split(scaled_X_df):
    scaled_X_train, scaled_X_test = scaled_X_df.iloc[train_index,:], scaled_X_df.iloc[test_index,:]
    scaled_y_train, scaled_y_test = scaled_y_df.iloc[train_index], scaled_y_df.iloc[test_index]
    unscaled_y_train, unscaled_y_test = unscaled_y.iloc[train_index], unscaled_y.iloc[test_index]
    
    scaled_X_train = scaled_X_train.to_numpy(); scaled_X_test = scaled_X_test.to_numpy()
    scaled_y_train = scaled_y_train.to_numpy(); scaled_y_test = scaled_y_test.to_numpy()
    unscaled_y_train = unscaled_y_train.to_numpy(); unscaled_y_test = unscaled_y_test.to_numpy()
    
    #----------------------------------------------------------------------------------------------------------
    SRmodel.fit(scaled_X_train, scaled_y_train.ravel())
    #----------------------------------------------------------------------------------------------------------
    
    Y_hat = SRmodel.predict(scaled_X_train)
    Y_Train_Pred = (ymin + Y_hat * multiplier)
    
    train_rmse = np.sqrt(mean_squared_error(unscaled_y_train.ravel(), Y_Train_Pred))
    str_train_rmse.append(train_rmse)
    train_RSQ = r2_score(unscaled_y_train.ravel(), Y_Train_Pred)
    str_train_RSQ.append(train_RSQ)
    train_MBE = np.mean(unscaled_y_train.ravel() - Y_Train_Pred)
    str_train_MBE.append(train_MBE)

    Y_hat = SRmodel.predict(scaled_X_test)
    Y_Test_Pred = (ymin + Y_hat * multiplier)

    test_rmse = np.sqrt(mean_squared_error(unscaled_y_test.ravel(), Y_Test_Pred))
    str_test_rmse.append(test_rmse)
    test_RSQ = r2_score(unscaled_y_test.ravel(), Y_Test_Pred)
    str_test_RSQ.append(test_RSQ)
    test_MBE = np.mean(unscaled_y_test.ravel() - Y_Test_Pred)
    str_test_MBE.append(test_MBE)

rmse_train_df = pd.DataFrame(str_train_rmse, columns = {'rmse_train'})
rsq_train_df = pd.DataFrame(str_train_RSQ, columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame(str_train_MBE, columns = {'MBE_train'})
rmse_test_df = pd.DataFrame(str_test_rmse, columns = {'rmse_test'})
rsq_test_df = pd.DataFrame(str_test_RSQ, columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame(str_test_MBE, columns = {'MBE_test'})
str_results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)
str_results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
0,1614.705419,0.931936,-24.427691,1935.893918,0.887771,-14.240779
1,1691.299892,0.922588,-25.451493,1998.331192,0.896344,-116.000685
2,1829.011362,0.909299,97.977269,2163.401904,0.880261,314.128651
3,1592.965535,0.929754,18.986997,1922.635937,0.911771,216.919662
4,1599.372835,0.933261,1.506037,1897.913594,0.89184,-302.186846


## Results

In [70]:
rmse_train_list = {'ann' : ann_train_rmse,
                  'lr' : lr_train_rmse,
                  'enr' : enr_train_rmse,
                  'rf' : rf_train_rmse,
                  'bdt' : bdt_train_rmse,
                  'adb' : adb_train_rmse,
                  'svr' : svr_train_rmse,
                  'xgb' : xgb_train_rmse,
                  'lgb' : lgb_train_rmse,
                  'cbr' : cbr_train_rmse,
                  'ser' : ser_train_rmse,
                  }
rsq_train_list = {'ann' : ann_train_RSQ,
                  'lr' : lr_train_RSQ,
                  'enr' : enr_train_RSQ,
                  'rf' : rf_train_RSQ,
                  'bdt' : bdt_train_RSQ,
                  'adb' : adb_train_RSQ,
                  'svr' : svr_train_RSQ,
                  'xgb' : xgb_train_RSQ,
                  'lgb' : lgb_train_RSQ,
                  'cbr' : cbr_train_RSQ,
                  'ser' : ser_train_RSQ,
                  }
mbe_train_list = {'ann' : ann_train_MBE,
                  'lr' : lr_train_MBE,
                  'enr' : enr_train_MBE,
                  'rf' : rf_train_MBE,
                  'bdt' : bdt_train_MBE,
                  'adb' : adb_train_MBE,
                  'svr' : svr_train_MBE,
                  'xgb' : xgb_train_MBE,
                  'lgb' : lgb_train_MBE,
                  'cbr' : cbr_train_MBE,
                  'ser' : ser_train_MBE,
                  }
rmse_test_list = {'ann' : ann_test_rmse,
                  'lr' : lr_test_rmse,
                  'enr' : enr_test_rmse,
                  'rf' : rf_test_rmse,
                  'bdt' : bdt_test_rmse,
                  'adb' : adb_test_rmse,
                  'svr' : svr_test_rmse,
                  'xgb' : xgb_test_rmse,
                  'lgb' : lgb_test_rmse,
                  'cbr' : cbr_test_rmse,
                  'ser' : ser_test_rmse,
                  }
rsq_test_list = {'ann' : ann_test_RSQ,
                  'lr' : lr_test_RSQ,
                  'enr' : enr_test_RSQ,
                  'rf' : rf_test_RSQ,
                  'bdt' : bdt_test_RSQ,
                  'adb' : adb_test_RSQ,
                  'svr' : svr_test_RSQ,
                  'xgb' : xgb_test_RSQ,
                  'lgb' : lgb_test_RSQ,
                  'cbr' : cbr_test_RSQ,
                  'ser' : ser_test_RSQ,
                  }
mbe_test_list = {'ann' : ann_test_MBE,
                  'lr' : lr_test_MBE,
                  'enr' : enr_test_MBE,
                  'rf' : rf_test_MBE,
                  'bdt' : bdt_test_MBE,
                  'adb' : adb_test_MBE,
                  'svr' : svr_test_MBE,
                  'xgb' : xgb_test_MBE,
                  'lgb' : lgb_test_MBE,
                  'cbr' : cbr_test_MBE,
                  'ser' : ser_test_MBE,
                  }

In [71]:
rmse_train_df = pd.DataFrame.from_dict(rmse_train_list, orient = 'index', columns = {'rmse_train'})
rsq_train_df = pd.DataFrame.from_dict(rsq_train_list, orient = 'index', columns = {'RSQ_train'})
mbe_train_df = pd.DataFrame.from_dict(mbe_train_list, orient = 'index', columns = {'MBE_train'})
rmse_test_df = pd.DataFrame.from_dict(rmse_test_list, orient = 'index', columns = {'rmse_test'})
rsq_test_df = pd.DataFrame.from_dict(rsq_test_list, orient = 'index', columns = {'RSQ_test'})
mbe_test_df = pd.DataFrame.from_dict(mbe_test_list, orient = 'index', columns = {'MBE_test'})
results_df = pd.concat([rmse_train_df, rsq_train_df, mbe_train_df, rmse_test_df, rsq_test_df, mbe_test_df], axis = 1)

In [72]:
results_df

Unnamed: 0,rmse_train,RSQ_train,MBE_train,rmse_test,RSQ_test,MBE_test
ann,1618.164112,0.930478,-178.39376,2278.369937,0.871984,-30.637298
lr,1979.452772,0.895968,-109.103761,2490.43556,0.847044,-267.634748
enr,1980.040576,0.895907,-109.103761,2490.606214,0.847023,-276.378944
rf,1005.030009,0.973182,-116.352329,2264.910498,0.873492,-192.279992
bdt,846.190711,0.980989,-120.0612,2279.112704,0.871901,1.624975
adb,1540.131399,0.937022,-519.428583,2289.543665,0.870725,-592.005328
svr,1814.420948,0.912592,60.695494,2279.112704,0.871901,1.624975
xgb,1607.802238,0.931366,-114.431036,2184.53459,0.882312,-231.236542
lgb,1558.126187,0.935542,-109.10377,2167.384824,0.884152,-219.807314
cbr,1169.137974,0.963708,-107.950074,2066.51087,0.894685,-185.565412


In [50]:
results_df.to_csv('results.csv')