In [None]:
# Import the libraries required in the script

import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import shutil, os
import keras
from keras.models import Sequential, load_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LearningRateScheduler, EarlyStopping
from keras.layers import Dense
import sys
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import callbacks
from IPython.display import clear_output

In [None]:
# ---  dataset 1 --- #

# inpumetersSet excel file path (first training dataset)
inputExcelFile ="simulations.xlsx"

# Reading an excel file
excelFile = pd.read_excel(inputExcelFile)

# Converting excel file into CSV file
excelFile.to_csv("dataset1.csv", index = None, header=True)

# Reading and Converting the output csv file into a dataframe object
dataframeObject_1 = pd.DataFrame(pd.read_csv("dataset1.csv"))

In [None]:
# hyperparameters

Dense_1           = 32
Dense_2           = 64
Dense_3           = 32

learning_rate     = 5e-4
N                 = 280
validation_split  = 0.2
N_test            = 30  
batch_size        = 60
epochs            = 5000

In [None]:
# dataset 1

input_df_1 = dataframeObject_1.drop(['Simulation', 'A1', 'A2', 'B1', 'B2', 'kg', 'g', 'C1', 'Ap'], axis=1)

output_df_1 = dataframeObject_1.drop(['Simulation', 'd10', 'd21', 'd32', 'd43', 'Concentration'], axis=1)

input_array_1 = input_df_1.to_numpy()

output_array_1 = output_df_1.to_numpy()

input_array_1[:,[1,2,3,4]] = np.log10(input_array_1[:,[1,2,3,4]])

output_array_1[:,[0,1,4,7]] = np.log10(output_array_1[:,[0,1,4,7]])

# scaling dataset

scaler_X_K = MinMaxScaler(feature_range=(0, 1), copy=True)
x_training_scaled_1 = scaler_X_K.fit(input_array_1)
x_training_scaled_1 = scaler_X_K.transform(input_array_1)

scaler_y_K = MinMaxScaler(feature_range=(0, 1), copy=True)
y_training_scaled_1 = scaler_y_K.fit(output_array_1)
y_training_scaled_1 = scaler_y_K.transform(output_array_1)

In [None]:
# random splitting
x_training, x_testing, y_training, y_testing = train_test_split(x_training_scaled_1, y_training_scaled_1, test_size=validation_split)

print(x_training.shape)
print(y_training.shape)

In [None]:
model = Sequential()
initializer = 'normal'

model.add(Dense(Dense_1, input_dim=x_training.shape[1], kernel_initializer=initializer, activation='relu'))
model.add(Dense(Dense_2, kernel_initializer=initializer, activation='relu'))
model.add(Dense(Dense_3, kernel_initializer=initializer, activation='relu'))
model.add(Dense(y_training.shape[1], kernel_initializer=initializer, activation="linear"))

# compile the model with loss and optimizer
model.compile(loss=keras.losses.mean_absolute_error,
              optimizer=Adam(learning_rate=learning_rate),
              metrics=['accuracy'])

# save model at the best performance, early stopping if the accuracy on validation does not change
checkpoint = ModelCheckpoint("best_model_K.hdf5", monitor='val_loss', verbose=1,
             save_best_only=True, mode='auto')
es = EarlyStopping(monitor='val_loss', verbose=1, patience=5000)

class PlotCurrentEstimate(callbacks.Callback):
    """Callback to plot loss value and layers while training"""    
    def __init__(self, update_freq=2):
        self.epoch = 0
        self.update_freq = update_freq
        self.h = {'loss': [], "val_loss": []}        
    def on_epoch_end(self, epoch, logs={}):
        self.epoch += 1
        
        if self.epoch % self.update_freq == 0:
            y_curr = self.model.predict(x_training)
            clear_output(wait=True)
           
            self.h["loss"].append(logs["loss"])
            self.h["val_loss"].append(logs["val_loss"])
            
            h = self.h

trainplot = PlotCurrentEstimate()

In [None]:
history = model.fit(x_training, y_training,
          batch_size=batch_size,
          epochs=epochs,
          verbose=2,
          validation_data=(x_testing, y_testing),
          callbacks=[checkpoint, es, trainplot])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.yscale("log")
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("trainingCurve.png")
plt.show()

In [None]:
y_predicted_training = model.predict(x_training)
y_predicted_training = scaler_y_K.inverse_transform(y_predicted_training)
y_training_rescale = scaler_y_K.inverse_transform(y_training)

y_predicted_test = model.predict(x_testing)
y_predicted_test = scaler_y_K.inverse_transform(y_predicted_test)
y_testing_rescale = scaler_y_K.inverse_transform(y_testing)

# the error is calculated between the real y_testing values and NN predicted ones
errore = np.zeros(y_predicted_test.shape)
errore_medio = []
for j in range(0,y_predicted_test.shape[1]):
  for i in range(0,y_predicted_test.shape[0]):
    errore[i][j] = float(abs((y_testing_rescale[i][j]-y_predicted_test[i][j])/y_testing_rescale[i][j]))*100
  errore_medio.append(sum(np.abs(errore[:,j]))/len(errore[:,j]))
errore_massimo = np.max(errore)
errore_minimo = np.min(errore)
print('Average percentage error on test set = %r\nMaximum percentage error= %r\nMinimum percentage error = %r' %(errore_medio, errore_massimo, errore_minimo))
dev = np.std(errore)

# plotting results
fig, axs = plt.subplots(2,4, figsize=(7, 7))
fig.subplots_adjust(hspace = .3, wspace=.5)

axs = axs.ravel()

for i in range(y_testing_rescale.shape[1]):
    axs[i].plot(y_testing_rescale[:,i], y_predicted_test[:,i], 'ro')
    axs[i].plot(y_testing_rescale[:,i], y_testing_rescale[:,i])
    if i==0:
        axs[i].set_ylabel('NN')
    elif i==4:
        axs[i].set_xlabel('CFD')
        axs[i].set_ylabel('NN')
    elif i==7:
        axs[i].set_xlabel('CFD')

fig.savefig("testing.png")

fig, axs = plt.subplots(2,4, figsize=(7, 7))
fig.subplots_adjust(hspace = .3, wspace=.5)

axs = axs.ravel()

for i in range(y_testing_rescale.shape[1]):
    axs[i].plot(y_training_rescale[:,i], y_predicted_training[:,i], 'bo')
    axs[i].plot(y_training_rescale[:,i], y_training_rescale[:,i])
    if i==0:
        axs[i].set_ylabel('NN')
    elif i==2:
        axs[i].set_xlabel('CFD')
        axs[i].set_ylabel('NN')
    elif i==3:
        axs[i].set_xlabel('CFD')

fig.savefig("training.png")
plt.show()