# Importing Modules

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import KFold


# Load Data

In [None]:
excel_data = pd.read_excel('data_acc_89.xlsx')

In [None]:
def parse_data(excel_data):

    numeric_values = excel_data[['Sy','Elevation','Drainage_density']].values
    categorical_values = excel_data[['Lithology']].values

    enc = OneHotEncoder(handle_unknown='ignore')
    enc.fit(categorical_values)
    one_hot_vals = enc.transform(categorical_values).toarray()

    scaler = StandardScaler()
    scaler.fit(numeric_values)
    normalized_vals = scaler.transform(numeric_values)

    X = np.hstack([normalized_vals,one_hot_vals])
    Y = excel_data[['GWL18']].values.ravel()

    return (X,Y,enc,scaler)

In [None]:
(X_less,Y_less,enc_less,scaler_less) = parse_data(excel_data[excel_data['GWL18']<=7.5])
(X_greater,Y_greater,enc_greater,scaler_greater) = parse_data(excel_data[excel_data['GWL18']>7.5])

# ML Model

In [None]:
class GWLML_Model:

    def __init__(self, mode):

        if (mode=='less'):
            self.knn = KNeighborsRegressor(n_neighbors=10, weights='distance', p=1)
            self.svr = SVR(kernel='rbf', gamma=3, C=1)
            self.rf = RandomForestRegressor(n_estimators=70, random_state=2)
            self.adaboost = AdaBoostRegressor(n_estimators=70, random_state=2)
            self.mlp = MLPRegressor(hidden_layer_sizes=[32,64,32], activation='relu', random_state=2)
            self.ensemble = ElasticNet(random_state=2)

        if (mode=='greater'):
            self.knn = KNeighborsRegressor(n_neighbors=50, weights='distance', p=1)
            self.svr = SVR(kernel='rbf', gamma=1, C=1)
            self.rf = RandomForestRegressor(n_estimators=100, random_state=2)
            self.adaboost = AdaBoostRegressor(n_estimators=90, random_state=2) 
            self.mlp = MLPRegressor(hidden_layer_sizes=[8], activation='tanh', random_state=2)
            self.ensemble = ElasticNet(random_state=2)


    def fit(self,X,Y):

        self.knn.fit(X,Y) 
        self.svr.fit(X,Y) 
        self.rf.fit(X,Y) 
        self.adaboost.fit(X,Y) 
        self.mlp.fit(X,Y) 

        yp_knn = self.knn.predict(X)
        yp_svr = self.svr.predict(X)
        yp_rf = self.rf.predict(X)
        yp_adaboost = self.adaboost.predict(X)
        yp_mlp = self.mlp.predict(X)

        X_ens = np.vstack([yp_knn,yp_svr,yp_rf,yp_adaboost,yp_mlp]).T


        self.ensemble.fit(X_ens,Y)


    def predict(self,X):

        yp_knn = self.knn.predict(X)
        yp_svr = self.svr.predict(X)
        yp_rf = self.rf.predict(X)
        yp_adaboost = self.adaboost.predict(X)
        yp_mlp = self.mlp.predict(X)

        X_ens = np.vstack([yp_knn,yp_svr,yp_rf,yp_adaboost,yp_mlp]).T

        yp_ens = self.ensemble.predict(X_ens)

        return {
            'knn' : yp_knn,
            'svr' : yp_svr,
            'rf' : yp_rf,
            'adaboost' : yp_adaboost,
            'mlp' : yp_mlp,
            'ens' : yp_ens,
        }

    def evaluate(self,X,Y):

        yp_knn = self.knn.predict(X)
        yp_svr = self.svr.predict(X)
        yp_rf = self.rf.predict(X)
        yp_adaboost = self.adaboost.predict(X)
        yp_mlp = self.mlp.predict(X)

        X_ens = np.vstack([yp_knn,yp_svr,yp_rf,yp_adaboost,yp_mlp]).T

        yp_ens = self.ensemble.predict(X_ens)

        err = {
            'knn' : np.mean(np.abs(yp_knn-Y)),
            'svr' : np.mean(np.abs(yp_svr-Y)),
            'rf' : np.mean(np.abs(yp_rf-Y)),
            'adaboost' : np.mean(np.abs(yp_adaboost-Y)),
            'mlp' : np.mean(np.abs(yp_mlp-Y)),
            'ens' : np.mean(np.abs(yp_ens-Y))
        }

        print('\tknn\t',round(err['knn'],3),
              '\tsvr\t',round(err['svr'],3),
              '\trf\t',round(err['rf'],3),
              '\tadaboost\t',round(err['adaboost'],3),
              '\tmlp\t',round(err['mlp'],3),
              '\tens\t',round(err['ens'],3),
              )
        
        return err

In [None]:
model_less = GWLML_Model('less')
model_less.fit(X_less,Y_less)

In [None]:
model_greater = GWLML_Model('greater')
model_greater.fit(X_greater,Y_greater)



# CV

In [None]:
import warnings
warnings.filterwarnings('ignore')


In [None]:
kf = KFold(n_splits=10)
kf.get_n_splits(X_less)

errs = {}

for train_index, test_index in kf.split(X_less):
    X_train, X_test = X_less[train_index], X_less[test_index]
    y_train, y_test = Y_less[train_index], Y_less[test_index]

    model_less = GWLML_Model('less')
    model_less.fit(X_train,y_train)
    err = model_less.evaluate(X_test,y_test)

    for e in err:
        if e not in errs:
            errs[e]=[]
        errs[e].append(err[e])

print('------------------------')
print('\tknn\t',round(np.mean(errs['knn']),3),
        '\tsvr\t',round(np.mean(errs['svr']),3),
        '\trf\t',round(np.mean(errs['rf']),3),
        '\tadaboost\t',round(np.mean(errs['adaboost']),3),
        '\tmlp\t',round(np.mean(errs['mlp']),3),
        '\tens\t',round(np.mean(errs['ens']),3)
    )


knn	 0.904 svr	 0.892 rf	 0.939 adaboost	 1.003 mlp	 0.87 ens	 0.923
knn	 1.108 svr	 1.067 rf	 1.122 adaboost	 1.099 mlp	 1.087 ens	 1.042
knn	 0.913 svr	 0.942 rf	 1.019 adaboost	 0.881 mlp	 0.88 ens	 0.867
knn	 0.81 svr	 0.88 rf	 0.78 adaboost	 0.966 mlp	 0.844 ens	 0.819
knn	 1.373 svr	 1.378 rf	 1.285 adaboost	 1.517 mlp	 1.387 ens	 1.38
knn	 1.142 svr	 1.178 rf	 1.189 adaboost	 1.078 mlp	 1.241 ens	 1.075
knn	 1.352 svr	 1.476 rf	 1.324 adaboost	 1.696 mlp	 1.189 ens	 1.529
knn	 1.278 svr	 1.249 rf	 1.298 adaboost	 1.34 mlp	 1.243 ens	 1.271
knn	 1.352 svr	 1.338 rf	 1.348 adaboost	 1.167 mlp	 1.439 ens	 1.279
knn	 1.319 svr	 1.347 rf	 1.332 adaboost	 1.456 mlp	 1.458 ens	 1.347
------------------------
	knn	 1.155 	svr	 1.175 	rf	 1.163 	adaboost	 1.22 	mlp	 1.164 	ens	 1.153


In [None]:
kf = KFold(n_splits=10)
kf.get_n_splits(X_greater)

errs = {}

for train_index, test_index in kf.split(X_greater):
    X_train, X_test = X_greater[train_index], X_greater[test_index]
    y_train, y_test = Y_greater[train_index], Y_greater[test_index]

    model_greater = GWLML_Model('greater')
    model_greater.fit(X_train,y_train)
    err = model_greater.evaluate(X_test,y_test)

    for e in err:
        if e not in errs:
            errs[e]=[]
        errs[e].append(err[e])

print('------------------------')
print('\tknn\t',round(np.mean(errs['knn']),3),
        '\tsvr\t',round(np.mean(errs['svr']),3),
        '\trf\t',round(np.mean(errs['rf']),3),
        '\tadaboost\t',round(np.mean(errs['adaboost']),3),
        '\tmlp\t',round(np.mean(errs['mlp']),3),
        '\tens\t',round(np.mean(errs['ens']),3)
    )


	knn	 3.087 	svr	 3.666 	rf	 4.008 	adaboost	 4.072 	mlp	 4.492 	ens	 3.108
	knn	 3.648 	svr	 3.708 	rf	 6.487 	adaboost	 9.521 	mlp	 3.637 	ens	 3.71
	knn	 3.658 	svr	 3.158 	rf	 3.842 	adaboost	 9.456 	mlp	 3.095 	ens	 3.657
	knn	 3.502 	svr	 3.292 	rf	 3.59 	adaboost	 8.204 	mlp	 3.431 	ens	 3.492
	knn	 5.659 	svr	 2.81 	rf	 9.238 	adaboost	 16.922 	mlp	 2.196 	ens	 5.768
	knn	 2.877 	svr	 1.884 	rf	 3.852 	adaboost	 12.476 	mlp	 1.378 	ens	 2.888
	knn	 2.346 	svr	 2.032 	rf	 2.806 	adaboost	 9.44 	mlp	 1.98 	ens	 2.328
	knn	 4.632 	svr	 4.781 	rf	 4.835 	adaboost	 4.978 	mlp	 5.767 	ens	 4.633
	knn	 4.448 	svr	 3.985 	rf	 4.031 	adaboost	 6.077 	mlp	 3.777 	ens	 4.418
	knn	 12.651 	svr	 13.078 	rf	 12.564 	adaboost	 12.639 	mlp	 13.009 	ens	 12.614
------------------------
	knn	 4.651 	svr	 4.239 	rf	 5.525 	adaboost	 9.378 	mlp	 4.276 	ens	 4.662


# 2kresults.xlsx

In [None]:
excel_data2k = pd.read_excel('2kresults.xlsx')

In [None]:
excel_data2k

Unnamed: 0,FID1,Drainage_density,Sy,Lithology,Elevation,Predicted,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,0,0.0,0.000,15,0,2,,,
1,1,0.0,0.047,15,0,1,,,1 = less than or equal 7.5
2,2,0.0,0.047,15,0,1,,,2 = greater than 7.5
3,3,0.0,0.047,15,0,1,,,
4,4,0.0,0.047,15,0,1,,,
...,...,...,...,...,...,...,...,...,...
37138,37138,0.0,0.148,27,99,1,,,
37139,37139,0.0,0.148,27,99,1,,,
37140,37140,0.0,0.148,27,101,1,,,
37141,37141,0.0,0.148,27,101,1,,,


In [None]:
excel_data2k[['Drainage_density','Lithology','Sy','Elevation','Predicted']]

Unnamed: 0,Drainage_density,Lithology,Sy,Elevation,Predicted
0,0.0,15,0.000,0,2
1,0.0,15,0.047,0,1
2,0.0,15,0.047,0,1
3,0.0,15,0.047,0,1
4,0.0,15,0.047,0,1
...,...,...,...,...,...
37138,0.0,27,0.148,99,1
37139,0.0,27,0.148,99,1
37140,0.0,27,0.148,101,1
37141,0.0,27,0.148,101,1


In [None]:
def read_2kresults(excel_data, enc, scaler):    

    numeric_values = excel_data[['Sy','Elevation','Drainage_density']].values
    categorical_values = excel_data[['Lithology']].values

    one_hot_vals = enc.transform(categorical_values).toarray()
    
    normalized_vals = scaler.transform(numeric_values)

    X = np.hstack([normalized_vals,one_hot_vals])
    id = excel_data[['FID1']].values.ravel()

    return (X,id)

In [None]:
(X2k_less,id_less) = read_2kresults(excel_data2k[excel_data2k['Predicted']==1], enc_less, scaler_less)
(X2k_greater,id_greater) = read_2kresults(excel_data2k[excel_data2k['Predicted']==2], enc_greater, scaler_greater)

In [None]:
model_less = GWLML_Model('less')
model_less.fit(X_less,Y_less)

model_greater = GWLML_Model('greater')
model_greater.fit(X_greater,Y_greater)

In [None]:
yp_less = model_less.predict(X2k_less)
yp_greater = model_greater.predict(X2k_greater)

In [None]:
max_id = max(max(id_greater),max(id_less))

In [None]:
out_csv = ['']*(max_id+1)

In [None]:
for i in range(len(id_less)):

    out_csv[id_less[i]] = f"{yp_less['knn'][i]},{yp_less['svr'][i]},{yp_less['rf'][i]},{yp_less['adaboost'][i]},{yp_less['mlp'][i]},{yp_less['ens'][i]}"

for i in range(len(id_greater)):

    out_csv[id_greater[i]] = f"{yp_greater['knn'][i]},{yp_greater['svr'][i]},{yp_greater['rf'][i]},{yp_greater['adaboost'][i]},{yp_greater['mlp'][i]},{yp_greater['ens'][i]}"

In [None]:
fp = open('out.csv','w')
fp.write('\n'.join(out_csv))
fp.close()