# Metamodeling - DNN
Objective: Develop and Optimize DNN to predict oil spill

Outline of this script:
1. Data Preparation
2. Develop DNN model
3. Fit and measure performance
4. Apply Grid Search, Random Search, HyperOpt and TPOT
5. Compare thier performance


Input for this script: A dataset which include

Output of this script:
A few dataframe that contains performance measures (e.g. R^2 and RMSE) and design of the algorithm

Date: May 01, 2021

Written by: Tanmoy Das

# Data Preparation & Initial Modeling
### Importing ADSAM dataset

In [1]:
# Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import time

In [2]:
# Algorithm related libraries
# !pip install plotly
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
import numpy as np
# metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 

In [3]:
# Import Training dataset  
X_train_dev = pd.read_csv('Dataset for metamodeling/X_train_dev.csv', header=0).copy()
X_val = pd.read_csv('Dataset for metamodeling/X_val.csv', header=0).copy()
y_train_dev = pd.read_csv('Dataset for metamodeling/y_train_dev.csv', header=0).copy()
y_val = pd.read_csv('Dataset for metamodeling/y_val.csv', header=0).copy()

In [4]:
# X_train_dev.info()
display(X_train_dev.describe().T)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
$type_A$,74880.0,0.490008,0.334118,0.0,0.166667,0.5,0.833333,1.0
$type_B$,74880.0,0.49109,0.331719,0.0,0.230769,0.538462,0.769231,1.0
$type_{oil}$,74880.0,0.492446,0.376486,0.0,0.0,0.333333,1.0,1.0
$velocity_A$,74880.0,0.498944,0.28864,0.0,0.24773,0.498735,0.749022,1.0
$velocity_B$,74880.0,0.49948,0.288556,0.0,0.249848,0.49923,0.750018,1.0
$angle_{collision}$,74880.0,0.500327,0.288654,0.0,0.250539,0.500202,0.749741,1.0
$location_{impact.B}$,74880.0,0.398842,0.242892,0.0,0.197668,0.361345,0.57549,1.0
$displacement_A$,74880.0,0.459782,0.325586,0.0,0.122403,0.457617,0.67307,1.0
$displacement_B$,74880.0,0.405861,0.288774,0.0,0.219923,0.362168,0.543024,1.0
$length_B$,74880.0,0.639653,0.318023,0.0,0.442238,0.748682,0.937961,1.0


### Initial Model
Note: this performance is obtained without any hyperparameter tuning

In [5]:
# 
from sklearn.neural_network import MLPRegressor
# from sklearn import metrics
neural_net_wo_scl = MLPRegressor(hidden_layer_sizes=16, activation='logistic', solver='adam', alpha=0.01,
                           batch_size='auto', learning_rate='adaptive',random_state=1, max_iter=20, 
                           learning_rate_init =.8, verbose= False)
neural_net_wo_scl = neural_net_wo_scl.fit(X_train_dev, y_train_dev)
neural_net_wo_scl.score(X_val, y_val) 



0.10808269486506364

# Model Improvement by Optimization

In [10]:
MLPRegressor().get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

# Model Improvement by Random Search

### Table 4 (DNN part) & Figure 6: PCP 

### Setup of the hyperparameters & Designing the DNN Architecture

In [4]:
## Random Search for PCP
start_random = time.time()
# Set up the sample space for Random Search
parameter_grid = { 'learning_rate_init' : np.linspace(0.020,.25,5),
                    'hidden_layer_sizes' : [128, 64, 32, 16, 8, 2], # list(range(16, 32, 5)),
                    'alpha' : np.linspace(0.0001,.0002,5) }      # range(start, stop, step) #(start=1,stop=1000, num=100))
# Define how many samples
number_models = 100

from sklearn.model_selection import RandomizedSearchCV
rand_search_dnn_model = RandomizedSearchCV(estimator=MLPRegressor(), # check with empty model e.g. SVR()
                            param_distributions=parameter_grid,
                            n_iter = number_models,
                            scoring='r2', n_jobs=30,
                            cv = 2,            refit=True,
                            return_train_score = True)
# Add alpha as hyperparatemeter later??
rand_search_dnn_model.fit(X_train_dev, y_train_dev)

# Calculate scores on validation training set 
learning_rate_init = [item['learning_rate_init'] for item in rand_search_dnn_model.cv_results_['params']]
hidden_layer_sizes = [item['hidden_layer_sizes'] for item in rand_search_dnn_model.cv_results_['params']]
alpha = [item['alpha'] for item in rand_search_dnn_model.cv_results_['params']]
scores = list(rand_search_dnn_model.cv_results_['mean_test_score'])
df_rands_dnn_perfm = pd.DataFrame([hidden_layer_sizes, learning_rate_init, alpha, scores]).T
df_rands_dnn_perfm.columns = ['hidden layer sizes','Learning Rate', 'alpha', 'Score']
# df_rands_dnn_perfm.groupby(['Learning Rate']).mean()
display(df_rands_dnn_perfm)

# Determine the best performance and associate hyperparameters
df_rands_best = pd.DataFrame([rand_search_dnn_model.best_score_, rand_search_dnn_model.best_params_['hidden_layer_sizes'],
                              rand_search_dnn_model.best_params_['learning_rate_init']]).T
print('Best Performance from Random Search')
df_rands_best.columns = ['Best R^2', 'Number of hidden layers', 'Learning Rate']
display(df_rands_best)

#runtime_random_search = time.time() - start_random
#display(runtime_random_search)

Unnamed: 0,hidden layer sizes,Learning Rate,alpha,Score
0,32.0,0.1350,0.000100,0.164258
1,16.0,0.0200,0.000100,0.659051
2,64.0,0.0775,0.000200,0.363508
3,128.0,0.1925,0.000100,0.271581
4,32.0,0.0200,0.000100,0.700960
...,...,...,...,...
95,16.0,0.1350,0.000200,0.293273
96,8.0,0.0200,0.000100,0.421779
97,32.0,0.0775,0.000100,0.255240
98,32.0,0.2500,0.000200,0.294387


Best Performance from Random Search


Unnamed: 0,Best R^2,Number of hidden layers,Learning Rate
0,0.814949,128.0,0.02


In [3]:
# Save the Random Search model
import pickle
pickle.dump(rand_search_dnn_model, open('Models 05.2021/rand_search_dnn_model.sav', 'wb')) #  Random Search
df_rands_dnn_perfm.to_csv('Models 05.2021/data_rands_dnn_perfm_PCP.csv')



NameError: name 'rand_search_dnn_model' is not defined

In [4]:
df_rands_dnn_perfm = pd.read_csv('Models 05.2021/data_rands_dnn_perfm_PCP.csv')

### Parallel Coordinat Plot during Random Search

In [6]:
!pip install plotly-orca

ERROR: Could not find a version that satisfies the requirement plotly-orca (from versions: none)
ERROR: No matching distribution found for plotly-orca


In [12]:
# !pip install plotly
# 33 
# Parallel Coordinate Plot
# https://plotly.com/python/parallel-coordinates-plot/
#plt.clf()
import plotly.graph_objects as go
import os

fig = plt.figure()
fig = go.Figure(data=
    go.Parcoords(#abelfont(size=[12]),
        line = dict(color = df_rands_dnn_perfm['Score'], showscale=True), # showscale is for colorbar
        dimensions = list([
            dict(    #    constraintrange = [4,8],
                label = 'No. of Neurons', values = df_rands_dnn_perfm['hidden layer sizes']),
            dict(#range = [0.01,2],
                label = 'Learning Rate', values = df_rands_dnn_perfm['Learning Rate']),
            dict(# range = [0,8],
                label = 'Alpha', values = df_rands_dnn_perfm['alpha'])
        ]),
        #parallelaxes(fontsize=14)
    ) #colorbar
)

fig.update_layout(
    autosize=True,
    width=500,
    height=500)
 #   yaxis=dict(tickformat=".02%"))

#go.Layout()
#fig.write_image('Fig6.jpeg')

#img_bytes = fig.to_image(format="jpeg", width=600, height=350, scale=2)
#Image(img_bytes)
# fig.to_image(format="png")

# fig.show()

<Figure size 432x288 with 0 Axes>

In [10]:
%matplotlib inline
fig.update_traces(tickfont_size=20)
fig.update_layout(
    font=dict(
        family="Times New Roman",
        size=25,
        color="Black" 
    )
)
plt.savefig('Figures 06.2021/Figure6_PCP.png', dpi=600)
fig.show()

# Documentation: https://plotly.github.io/plotly.py-docs/generated/plotly.graph_objects.Parcoords.html
# https://plotly.com/python/reference/parcoords/

<Figure size 432x288 with 0 Axes>

In [4]:
start_random = time.time()
# Set up the sample space for Random Search
parameter_grid = { 'learning_rate_init' : np.linspace(0.020,.25,5),
                    'hidden_layer_sizes' : [(32,32, 32),(32,32), (32, 16), (32), (16,8)], # list(range(16, 32, 5)),
                    'alpha' : np.linspace(0.0001,.0002,5) }      # range(start, stop, step) #(start=1,stop=1000, num=100))
# Define how many samples
number_models = 100

from sklearn.model_selection import RandomizedSearchCV
rand_search_dnn_model = RandomizedSearchCV(estimator=MLPRegressor(), # check with empty model e.g. SVR()
                            param_distributions=parameter_grid,
                            n_iter = number_models,
                            scoring='r2', n_jobs=30,
                            cv = 2,            refit=True,
                            return_train_score = True)
# Add alpha as hyperparatemeter later??
rand_search_dnn_model.fit(X_train_dev, y_train_dev)

# Calculate scores on validation training set 
learning_rate_init = [item['learning_rate_init'] for item in rand_search_dnn_model.cv_results_['params']]
hidden_layer_sizes = [item['hidden_layer_sizes'] for item in rand_search_dnn_model.cv_results_['params']]
alpha = [item['alpha'] for item in rand_search_dnn_model.cv_results_['params']]
scores = list(rand_search_dnn_model.cv_results_['mean_test_score'])

df_rands_dnn_perfm = pd.DataFrame([hidden_layer_sizes, learning_rate_init, alpha, scores]).T
df_rands_dnn_perfm.columns = ['hidden layer sizes','Learning Rate', 'alpha', 'Score']
# df_rands_dnn_perfm.groupby(['Learning Rate']).mean()
display(df_rands_dnn_perfm)

# Determine the best performance and associate hyperparameters
df_rands_best = pd.DataFrame([rand_search_dnn_model.best_score_, rand_search_dnn_model.best_params_['hidden_layer_sizes'],
                              rand_search_dnn_model.best_params_['learning_rate_init']]).T
print('Best Performance from Random Search')
df_rands_best.columns = ['Best R^2', 'Number of hidden layers', 'Learning Rate']
display(df_rands_best)


# r2_test = .80 for hidden_layer 56,32  ++ good one
df_rands_dnn_perfm.to_csv('df_rands_dnn_perfm.csv')
df_rands_dnn_perfm.sort_values(by=['r2'],  ascending=False)

Unnamed: 0,hidden layer sizes,Learning Rate,alpha,Score
0,"(32, 32)",0.1925,0.00015,-0.0813055
1,"(32, 16)",0.135,0.0002,-0.0149412
2,"(32, 32, 32)",0.0775,0.0001,0.3767
3,"(32, 32)",0.02,0.000175,0.917569
4,32,0.02,0.0001,0.799876
...,...,...,...,...
95,"(32, 32, 32)",0.0775,0.00015,0.740451
96,"(32, 32)",0.25,0.0001,0.347403
97,"(32, 32, 32)",0.135,0.000125,0.35823
98,"(32, 16)",0.02,0.000175,0.846108


Best Performance from Random Search


Unnamed: 0,Best R^2,Number of hidden layers,Learning Rate
0,0.927299,"(32, 32, 32)",0.02


44.97604560852051

## TPOT

In [6]:
!pip install tpot

Collecting tpot
  Downloading TPOT-0.11.7-py3-none-any.whl (87 kB)
Collecting xgboost>=1.1.0
  Downloading xgboost-1.4.0-py3-none-win_amd64.whl (97.8 MB)
Collecting stopit>=1.1.1
  Downloading stopit-1.1.2.tar.gz (18 kB)
Collecting update-checker>=0.16
  Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Collecting deap>=1.2
  Downloading deap-1.3.1-cp38-cp38-win_amd64.whl (108 kB)
Building wheels for collected packages: stopit
  Building wheel for stopit (setup.py): started
  Building wheel for stopit (setup.py): finished with status 'done'
  Created wheel for stopit: filename=stopit-1.1.2-py3-none-any.whl size=11959 sha256=5b187a829a8564644e10138ca13ac67ac7e69a71825637eb1847543fc8f1b08c
  Stored in directory: c:\users\tn753037\appdata\local\pip\cache\wheels\a8\bb\8f\6b9328d23c2dcedbfeb8498b9f650d55d463089e3b8fc0bfb2
Successfully built stopit
Installing collected packages: xgboost, stopit, update-checker, deap, tpot
Successfully installed deap-1.3.1 stopit-1.1.2 tpot-0.11.7 u

In [None]:
# TPOT
from tpot import TPOTRegressor

config_dict = { 'sklearn.neural_network.MLPRegressor':  { 'learning_rate_init' : np.linspace(0.015,1,100),
                    'hidden_layer_sizes' : list(range(10, 1000, 10)) }}
tpot_dnn_model = TPOTRegressor(generations=3, population_size=5,
                    verbosity=2, offspring_size=10,
                    scoring='r2', cv=2, config_dict = config_dict)

In [12]:
# TPOT
from tpot import TPOTRegressor

config_dict = { 'sklearn.neural_network.MLPRegressor':  { 'learning_rate_init' : np.linspace(0.015,.03,10),
                    'hidden_layer_sizes' : [(64,32),(32,32), (32, 16)]}}  #[(32,32, 32),(32,32), (32, 16), (32), (16,8)]
tpot_dnn_model = TPOTRegressor(generations=1, population_size=2,#
                    verbosity=2, offspring_size=2,
                    scoring='r2', cv=2, config_dict = config_dict)

In [13]:
from sklearn.multioutput import MultiOutputRegressor
TPOT_multi = MultiOutputRegressor(tpot_dnn_model)
TPOT_multi.fit(X_train_dev, y_train_dev)

print('TPOT EA on DNN: ')
print(TPOT_multi.score(X_val, y_val))

HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.7938345478237014

Best pipeline: MLPRegressor(CombineDFs(CombineDFs(input_matrix, input_matrix), input_matrix), hidden_layer_sizes=(32, 32), learning_rate_init=0.019999999999999997)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.8661029343068397

Best pipeline: MLPRegressor(input_matrix, hidden_layer_sizes=(64, 32), learning_rate_init=0.019999999999999997)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.8514794272705797

Best pipeline: MLPRegressor(input_matrix, hidden_layer_sizes=(64, 32), learning_rate_init=0.021666666666666667)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.937040454877772

Best pipeline: MLPRegressor(input_matrix, hidden_layer_sizes=(32, 32), learning_rate_init=0.015)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.9577056577519354

Best pipeline: MLPRegressor(MLPRegressor(input_matrix, hidden_layer_sizes=(32, 32), learning_rate_init=0.015), hidden_layer_sizes=(32, 32), learning_rate_init=0.019999999999999997)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.9562192247899501

Best pipeline: MLPRegressor(MLPRegressor(input_matrix, hidden_layer_sizes=(32, 32), learning_rate_init=0.018333333333333333), hidden_layer_sizes=(32, 16), learning_rate_init=0.03)


HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=4.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.9466699950240665

Best pipeline: MLPRegressor(input_matrix, hidden_layer_sizes=(64, 32), learning_rate_init=0.019999999999999997)
TPOT EA on DNN: 
0.8917692712446847


## Grid Search

In [6]:
# Develop GridSearchCV for the DNN

params_grid = {
    'hidden_layer_sizes':  [(64,32, 32),(64,64), (64, 32)], #[(64,32, 32),(64,32), (32, 16), (32), (16,8)]
    'activation': ['relu'],
    'solver': ['adam'], # max_iter=200
    'learning_rate_init': [0.02,0.01]
}
from sklearn.model_selection import GridSearchCV
grid_search_dnn_model = GridSearchCV(MLPRegressor(), param_grid= params_grid,
                        scoring='r2')
# Add alpha as hyperparatemeter later??
grid_search_dnn_model.fit(X_train_dev, y_train_dev)

# Suppress ignore messages??

# -------------------------- Performanec metrics
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error
# RMSE_grid
y_pred_grid = grid_search_dnn_model.predict(X_val)
from sklearn.metrics import mean_squared_error, r2_score
rmse_grid = mean_squared_error(y_val, y_pred_grid, squared=False) # If False, returns RMSE value
print('rmse_grid: ', rmse_grid)
# R2_grid
r2_grid = r2_score(y_val, y_pred_grid) # R^2
print('r2_grid: ', r2_grid)


# ----------------------------
# Performance metric & accociated hyperparameters
hidden_layer_sizes = [item['hidden_layer_sizes'] for item in grid_search_dnn_model.cv_results_['params']]
hidden_layer_sizes2 =[]
for i in range (len(hidden_layer_sizes)):
    results = hidden_layer_sizes[i]
    hidden_layer_sizes2.append(results[0])
learning_rate_init = [item['learning_rate_init'] for item in grid_search_dnn_model.cv_results_['params']]
scores = list(grid_search_dnn_model.cv_results_['mean_test_score'])
df_grids_dnn_perfm = pd.DataFrame([hidden_layer_sizes2, learning_rate_init, scores]).T
df_grids_dnn_perfm.columns = ['hidden layer sizes', 'learning rate', 'Score']
df_grids_dnn_perfm

# Determine the best performance and associate hyperparameters
df_grids_best = pd.DataFrame([grid_search_dnn_model.best_score_, grid_search_dnn_model.best_params_['hidden_layer_sizes'],
                              grid_search_dnn_model.best_params_['learning_rate_init']]).T
print('Best Performance from Grid Search')
df_grids_best.columns = ['Best R^2', 'Number of hidden layers', 'Learning Rate']
df_grids_best

rmse_grid:  0.04773447079973153
r2_grid:  0.9334571783201253
Best Performance from Grid Search


In [7]:
df_grids_best

Unnamed: 0,Best R^2,Number of hidden layers,Learning Rate
0,0.927181,"(64, 32, 32)",0.01


# Bayesian Optimization

In [8]:
from sklearn.model_selection import cross_val_score
from hyperopt import hp
# Setting the space
space = {
    'hidden_layer_sizes': hp.uniform('hidden_layer_sizes', 128, 256),
    'learning_rate_init': hp.uniform('learning_rate_init', 0.01, .1),
}# r2_score
def objective_r2(params_bo):
    params_bo = {'hidden_layer_sizes': int(params_bo['hidden_layer_sizes']),
        'learning_rate_init': params_bo['learning_rate_init']}
    neural_net_bo = MLPRegressor(activation='relu', solver='adam', alpha=0.001,
                              batch_size='auto',  random_state=1, max_iter=100, verbose= False,
                              **params_bo)
    #
    neural_net_bo.fit(X_train_dev, y_train_dev)
    #r2_bo = (-1)* cross_val_score(neural_net_bo, X_train, y_train, scoring='r2', cv=5).mean()
    y_pred_bo = neural_net_bo.predict(X_val)
    r2_bo = r2_score(y_val, y_pred_bo) # If False, returns RMSE value
    print('r2_bo: ', r2_bo)
    print('hidden_layer_sizes, learning_rate_init', params_bo['hidden_layer_sizes'], params_bo['learning_rate_init'])
    return r2_bo

In [9]:
from hyperopt import fmin, tpe
best_result = fmin(fn=objective_r2,
            space=space,
            max_evals=100,
            rstate=np.random.RandomState(22),
            algo=tpe.suggest)

# print the best parameter
print("Best R^2 {:.3f} params {}", objective_r2(best_result), best_result)

r2_bo:                                                                                                                 
0.39840887907173556                                                                                                    
hidden_layer_sizes, learning_rate_init                                                                                 
134                                                                                                                    
0.09542400141507949                                                                                                    
r2_bo:                                                                                                                 
0.655816927844316                                                                                                      
hidden_layer_sizes, learning_rate_init                                                                                 
206                                     

0.4945677121182753                                                                                                     
hidden_layer_sizes, learning_rate_init                                                                                 
140                                                                                                                    
0.09206481676037932                                                                                                    
r2_bo:                                                                                                                 
0.3770795349054194                                                                                                     
hidden_layer_sizes, learning_rate_init                                                                                 
166                                                                                                                    
0.08756967748827167                     

hidden_layer_sizes, learning_rate_init                                                                                 
160                                                                                                                    
0.04870037371617788                                                                                                    
r2_bo:                                                                                                                 
0.5403084725025368                                                                                                     
hidden_layer_sizes, learning_rate_init                                                                                 
253                                                                                                                    
0.08926967872744813                                                                                                    
r2_bo:                                  

218                                                                                                                    
0.09945462353039781                                                                                                    
r2_bo:                                                                                                                 
0.4298423724129469                                                                                                     
hidden_layer_sizes, learning_rate_init                                                                                 
195                                                                                                                    
0.09605496066567845                                                                                                    
r2_bo:                                                                                                                 
0.4785519683376765                      

# Save the best model

In [None]:
# pickle.dump(grid_s, open('grid_s.sav', 'wb')) #  Grid Search

# load the model from disk
loaded_model = pickle.load(open('grid_search_dnn.sav', 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)