In [1]:
import random
import math
import csv
import time
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import accuracy_score,top_k_accuracy_score, mean_squared_error, mean_absolute_error
from sklearn.calibration import CalibratedClassifierCV
from sklearn.multioutput import MultiOutputRegressor
from joblib import dump, load

In [2]:
numexpr=100000
noise=0.01
csv_path=f"./data/train_data{numexpr}_noise{noise}.csv"
df = pd.read_csv(csv_path,index_col=0)

In [3]:
#Split label from features
X = df.drop(['mean','sd'],axis=1)
y = df[['mean','sd']]

In [4]:
y.tail()

Unnamed: 0,mean,sd
99994,7.95,0.097816
99995,6.53,0.066476
99996,4.84,0.075955
99997,8.31,0.029097
99998,6.44,0.160553


In [5]:
#Split train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

In [6]:
#Scale data
scaler = MinMaxScaler()#StandardScaler()#
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [7]:
scaled_max=scaled_X_train[0].max()
unscaled_max=X_train.iloc[0].values.reshape(1, -1).max()
print(f"scaled: {scaled_max} not scaled: {unscaled_max}")

scaled: 0.17826481708395678 not scaled: 19.120040090653447


### NN layers and nodes configurations tested:
- (64),(128),(256),(64,128),(128,256),(128,256,512),(64,32),(128,64),(128,64,32),(256,128,64),(256,64,32)
- (512,256,64),(512,128,64),(512,64,32),(256,128,64,32),(512,256,128,64),(512,128,64,32)
- (512,256,256,128,64),(512,1024,512,128,64),(512,256,128,64,32),(1024,512,256,128,64,32)
- (4096,2048,1024, 512, 256, 128, 64, 32),(8192,4096,2048,1024, 512, 256, 128, 64, 32)



In [11]:
#Define grid search parameters
param_grid = {'estimator__activation':['relu'],
              'estimator__hidden_layer_sizes':[(256, 128, 64, 32)],
              'estimator__solver':['lbfgs','sgd', 'adam'],
              'estimator__learning_rate':['constant', 'invscaling', 'adaptive'],
              'estimator__momentum':[0.25,0.5,0.75],
              'estimator__beta_1':[0.1,0.25,0.5],
              'estimator__beta_2':[0.1,0.25,0.5]
             }
grid = GridSearchCV(mor,param_grid,n_jobs=6,cv=5)

In [12]:
#Define grid search parameters
param_grid = {'estimator__activation':['logistic', 'tanh', 'relu'],
              'estimator__hidden_layer_sizes':[(2048,1024, 512, 256, 128, 64, 32)],
              'estimator__solver':['lbfgs','sgd', 'adam'],
              'estimator__learning_rate':['constant', 'invscaling', 'adaptive'],
              'estimator__momentum':[0.25,0.5,0.75],
              'estimator__beta_1':[0.1,0.25,0.5,0.7,0.9],
              'estimator__beta_2':[0.025,0.05,0.1,0.25,0.5],
              'estimator__alpha':[0.000025,0.00005,0.000075,0.0001,0.00025,0.0005],
             }
grid = GridSearchCV(mor,param_grid,n_jobs=6,cv=5)

In [11]:
#Found structure, solver and activation function. Finetune solver
param_grid = { 'estimator__alpha':[0.000025,0.00005,0.000075]
             }
grid = GridSearchCV(mor,param_grid,n_jobs=-1,cv=5)

In [12]:
#Run Grid search
grid.fit(scaled_X_train,y_train)

GridSearchCV(cv=5,
             estimator=MultiOutputRegressor(estimator=MLPRegressor(beta_2=0.05,
                                                                   early_stopping=True,
                                                                   hidden_layer_sizes=(256,
                                                                                       128,
                                                                                       64,
                                                                                       32),
                                                                   max_iter=500,
                                                                   random_state=1,
                                                                   tol=0.001)),
             n_jobs=-1,
             param_grid={'estimator__alpha': [2.5e-05, 5e-05, 7.5e-05],
                         'estimator__beta_1': [0.1, 0.3, 0.5],
                         'estimator__learni

In [13]:
#Display best parameter combination
grid.best_params_

{'estimator__alpha': 7.5e-05,
 'estimator__beta_1': 0.5,
 'estimator__learning_rate': 'constant'}

In [14]:
#Calcualte predictions based on test data
grid_pred = grid.predict(scaled_X_test)

In [15]:
#Display accuracy
# Evaluate the regressor
mse_one = mean_squared_error(y_test['mean'], grid_pred[:,0])
mse_two = mean_squared_error(y_test['sd'], grid_pred[:,1])
print(f'MSE for first regressor: {mse_one} - second regressor: {mse_two}')
mae_one = mean_absolute_error(y_test['mean'], grid_pred[:,0])
mae_two = mean_absolute_error(y_test['sd'], grid_pred[:,1])
print(f'MAE for first regressor: {mae_one} - second regressor: {mae_two}')

MSE for first regressor: 0.3167534225324732 - second regressor: 0.0020502793632905126
MAE for first regressor: 0.4073872071727252 - second regressor: 0.035520058200964905


In [95]:
#Display accuracy
# Evaluate the regressor
mse_one = mean_squared_error(y_test['mean'], grid_pred[:,0])
mse_two = mean_squared_error(y_test['sd'], grid_pred[:,1])
print(f'MSE for first regressor: {mse_one} - second regressor: {mse_two}')
mae_one = mean_absolute_error(y_test['mean'], grid_pred[:,0])
mae_two = mean_absolute_error(y_test['sd'], grid_pred[:,1])
print(f'MAE for first regressor: {mae_one} - second regressor: {mae_two}')

MSE for first regressor: 0.28060971486827246 - second regressor: 0.0018864366452100646
MAE for first regressor: 0.3794850509413987 - second regressor: 0.034212836254247904


In [69]:
y_test_single_diam=y_test['mean'].iloc[10]
y_test_single_sd=y_test['sd'].iloc[10]

In [70]:
y_pred_single_diam = grid.predict(scaled_X_test[10].reshape(1, -1))[0][0]
y_pred_single_sd = grid.predict(scaled_X_test[10].reshape(1, -1))[0][1]

In [71]:
print(f"The actual diameter was {y_test_single_diam} and the model predicted {y_pred_single_diam}")
print(f"The actual sd was {y_test_single_sd} and the model predicted {y_pred_single_sd}")

The actual diameter was 0.72 and the model predicted 0.7514195039564033
The actual sd was 0.0916959415779256 and the model predicted 0.10909870671583663


## Optimal MLP Regression parameters
- random_state=1, max_iter=500,tol=0.001,early_stopping=True,activation='relu',alpha=0.000075,
                   hidden_layer_sizes=(2048, 1024, 512, 256, 128, 64, 32),solver='adam',beta_1=0.5,beta_2=0.05,learning_rate='constant'

In [8]:
#Define final MLP
mlp_fin = MLPRegressor(random_state=1, max_iter=500,tol=0.001,early_stopping=True,activation='relu',alpha=0.000075,
                   hidden_layer_sizes=(2048, 1024, 512, 256, 128, 64, 32),solver='adam',beta_1=0.5,beta_2=0.05,learning_rate='constant')
mor_fin = MultiOutputRegressor(mlp_fin)


In [None]:
#Fit final svc
start = time.time()
mor_fin.fit(scaled_X_train,y_train)
end = time.time()
print('SVM fitting taken ', end-start,' seconds')

In [13]:
y_test_single_diam=y_test['mean'].iloc[30]
y_test_single_sd=y_test['sd'].iloc[30]

y_pred_single_diam = mor_fin.predict(scaled_X_test[30].reshape(1, -1))[0][0]
y_pred_single_sd = mor_fin.predict(scaled_X_test[30].reshape(1, -1))[0][1]

In [14]:
print(f"The actual diameter was {y_test_single_diam} and the model predicted {y_pred_single_diam}")
print(f"The actual sd was {y_test_single_sd} and the model predicted {y_pred_single_sd}")

The actual diameter was 8.31 and the model predicted 8.583286900016857
The actual sd was 0.103156086904102 and the model predicted 0.1410328365411205


In [15]:
y_pred_fin=mor_fin.predict(scaled_X_test)

In [16]:
#Display accuracy
# Evaluate the regressor
mse_one = mean_squared_error(y_test['mean'], y_pred_fin[:,0])
mse_two = mean_squared_error(y_test['sd'], y_pred_fin[:,1])
print(f'MSE for first regressor: {mse_one} - second regressor: {mse_two}')
mae_one = mean_absolute_error(y_test['mean'], y_pred_fin[:,0])
mae_two = mean_absolute_error(y_test['sd'], y_pred_fin[:,1])
print(f'MAE for first regressor: {mae_one} - second regressor: {mae_two}')

MSE for first regressor: 0.8142250315092586 - second regressor: 0.004178018869916192
MAE for first regressor: 0.6889790564782604 - second regressor: 0.05379979407605083


## Save model with joblib for persistence

In [17]:
dump(mor_fin, 'MLP_REGR.joblib')

['MLP_REGR.joblib']