In [None]:
import pandas as pd
import numpy as np
from numpy import mean, std, absolute
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neural_network import MLPRegressor
import scipy as sp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2
from sklearn.preprocessing import normalize
import time

plt.style.use('ggplot')

In [None]:
!pip install openpyxl

In [None]:
data = pd.read_excel('Data/Gasification Data.xlsx', index_col=0, header=0)
data = data.iloc[1:223,:14] 
# display(list(data.columns.values))

# Drop null balues and store dataframe in dataframe 2
data=data.dropna()

#Check Null values again after removing
print(data.isnull().values.any())
print(data.isna().values.any())

X = data.iloc[:, :9]
y = data.iloc[:, 9:]
input_columns = list(X.columns.values)
input_columns = [i.split(' [', 1)[0] for i in input_columns]
output_columns = list(y.columns.values)
output_columns = [i.split(' [', 1)[0] for i in output_columns]
print(input_columns, output_columns)

Xvals = X.values
yvals = y.values
print(Xvals.shape, yvals.shape)

Xnorm = np.zeros_like(Xvals)
ynorm = np.zeros_like(yvals)
for idx in range(len(input_columns)):
    Xnorm[:, idx] = (Xvals[:,idx]-min(Xvals[:,idx]))/(max(Xvals[:,idx])-min(Xvals[:,idx]))
for odx in range(len(output_columns)):
    ynorm[:, odx] = (yvals[:,odx]-min(yvals[:,odx]))/(max(yvals[:,odx])-min(yvals[:,odx]))

print(np.max(Xnorm), np.max(ynorm))
print(np.min(Xnorm), np.min(ynorm))

In [None]:
sns.histplot(Xnorm[:, 0])

## Regression Analysis

In [None]:
# constants
methods = ['MLP']
random_state = 42
hidden_layers = [2, 4, 6, 8]
activation = ['logistic', 'tanh', 'relu']
alpha = [0.1, 0.01, 0.001, 0.0001]
learning_rate = 'adaptive'
learning_rate_init = [0.01, 0.001]
early_stopping = True

methods_extended = []
for met in methods:
    for hlayer in hidden_layers:
        for act in activation:
            for alp in alpha:
                for lr_init in learning_rate_init:
                    methods_extended.append(met + '_hlayer_' + str(hlayer) + '_act_' + str(act)+
                                            '_alpha_' + str(alp) +'_lr_' + str(lr_init))
print(methods_extended)

In [None]:
## Using train-test split
X_train, X_test, y_train, y_test = train_test_split(Xnorm, ynorm, test_size=.3, random_state=random_state)

prediction = {}
error = {}
r2value = {}
for met in methods_extended:
    print(met)
    score = {}
    prediction[met] = np.zeros_like(y_test) 
    
    string_extract = met.split('_')
    
    for idx in range(y.shape[1]):
        start = time.time()
        training_x = np.asarray(X_train)
        testing_x = np.asarray(X_test)
        training_y = np.asarray(y_train)[:, idx]
        testing_y = np.asarray(y_test)[:, idx]
        regr_multimlp = MLPRegressor(hidden_layer_sizes=int(string_extract[2]), activation=string_extract[4], 
                                     solver='adam', alpha=float(string_extract[6]), batch_size='auto', 
                                     learning_rate=learning_rate, learning_rate_init=float(string_extract[8]), 
                                     power_t=0.5, max_iter=1000, shuffle=True, random_state=random_state, 
                                     tol=0.0001, verbose=False, warm_start=False, momentum=0.9, 
                                     nesterovs_momentum=True, early_stopping=early_stopping, 
                                     validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, 
                                     n_iter_no_change=10)
        end = time.time()
        print('Computational Time:', end-start)
        regr_multimlp.fit(training_x, training_y)
        prediction[met][:, idx] = regr_multimlp.predict(testing_x)
    
    error[met] = np.sqrt(mse(y_test, prediction[met], multioutput='raw_values'))
    r2value[met] = r2(y_test, prediction[met], multioutput='raw_values')

In [None]:
import openpyxl

mean_error = []
mean_std = []
mean_r2 = []
for met in methods_extended:
    mean_error.append(np.mean(error[met]))
    mean_std.append(np.std(error[met]))
    mean_r2.append(np.mean(r2value[met]))
np.savetxt('mean_error_MLP.csv', mean_error)
np.savetxt('mean_r2_MLP.csv', mean_r2)
mim = np.argmin(mean_error)
print(methods_extended[mim])
mi2 = np.argmax(mean_r2)
print(methods_extended[mi2])
print(mim, mi2)

In [None]:
## Print the RMSE and R-2 for the best MLP model
rmse_best_index = np.argmin(mean_error)
print(rmse_best_index)
rmse_best = error[methods_extended[rmse_best_index]]
print(rmse_best, np.mean(rmse_best))

r2_best_index = np.argmax(mean_r2)
print(r2_best_index)
r2_best = r2value[methods_extended[r2_best_index]]
print(r2_best, np.mean(r2_best))

## Plotting the Results

In [None]:
### Plot
for met in methods_extended:
    for odx, out in enumerate(output_columns):
#         plt.subplot(5, 1, odx +1)
        plt.figure(figsize=(8, 4))
        plt.plot(np.asarray(y_test)[:, odx], label='True value', color='blue')
        plt.plot(np.asarray(prediction[met])[:, odx], color='red', 
                 label='Predicted value \n (RMSE = %.4f \n R2=%.3f)' % (error[met][odx], (r2value[met][odx])))
        plt.xlabel("Features", fontsize=12)
        plt.ylabel("Values", fontsize=12)
        plt.rc('xtick',labelsize=12)
        plt.rc('ytick',labelsize=12)
        plt.title("%s (%s)" % (met, out), fontsize=14)
        plt.legend(fontsize=12)

        plt.savefig('Results/Prediction_%s_%s.pdf' % (met, out))
        plt.clf()
#     plt.show()