# Neural Network - MLP Classification

In [20]:
# Imports
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import category_encoders as ce
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.neural_network import MLPClassifier
from skopt import BayesSearchCV
import numpy as np

## Read Datasets

In [2]:
df_day_minus_0 = pd.read_csv('C:/com748/code/com748/data/processed/balanced_dataset/day_minus_0.csv')
df_day_minus_1 = pd.read_csv('C:/com748/code/com748/data/processed/balanced_dataset/day_minus_1.csv')
df_day_minus_2 = pd.read_csv('C:/com748/code/com748/data/processed/balanced_dataset/day_minus_2.csv')
df_day_minus_7 = pd.read_csv('C:/com748/code/com748/data/processed/balanced_dataset/day_minus_7.csv')

  df_day_minus_0 = pd.read_csv('C:/com748/code/com748/data/processed/balanced_dataset/day_minus_0.csv')


Handle Missing values

In [3]:
df_day_minus_0 = df_day_minus_0.drop(columns=['date','file_date'], axis=1)
df_day_minus_1 = df_day_minus_1.drop(columns=['date'], axis=1)
df_day_minus_2 = df_day_minus_2.drop(columns=['date'], axis=1)
df_day_minus_7 = df_day_minus_7.drop(columns=['date'], axis=1)

df_day_minus_0['null_columns'] = df_day_minus_0.isnull().sum(axis=1)
df_day_minus_1['null_columns'] = df_day_minus_1.isnull().sum(axis=1)
df_day_minus_2['null_columns'] = df_day_minus_2.isnull().sum(axis=1)
df_day_minus_7['null_columns'] = df_day_minus_7.isnull().sum(axis=1)

df_day_minus_0 = df_day_minus_0[df_day_minus_0.null_columns == 0]
df_day_minus_1 = df_day_minus_1[df_day_minus_1.null_columns == 0]
df_day_minus_2 = df_day_minus_2[df_day_minus_2.null_columns == 0]
df_day_minus_7 = df_day_minus_7[df_day_minus_7.null_columns == 0]

split into x and y

In [4]:
X_0 = df_day_minus_0.drop(columns=['failure', 'serial_number', 'null_columns'], axis=1)
X_1 = df_day_minus_1.drop(columns=['failure', 'serial_number', 'null_columns'], axis=1)
X_2 = df_day_minus_2.drop(columns=['failure', 'serial_number', 'null_columns'], axis=1)
X_7 = df_day_minus_7.drop(columns=['failure', 'serial_number', 'null_columns'], axis=1)

Y_0 = df_day_minus_0['failure']
Y_1 = df_day_minus_1['failure']
Y_2 = df_day_minus_2['failure']
Y_7 = df_day_minus_7['failure']

Split into training and testing

In [5]:
encoder_0 = ce.OrdinalEncoder(cols=['model', 'capacity_bytes'])
encoder_1 = ce.OrdinalEncoder(cols=['model', 'capacity_bytes'])
encoder_2 = ce.OrdinalEncoder(cols=['model', 'capacity_bytes'])
encoder_7 = ce.OrdinalEncoder(cols=['model', 'capacity_bytes'])

X_train_0, X_test_0, y_train_0, y_test_0 = train_test_split(X_0, Y_0, test_size=0.2, random_state=42)
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_1, Y_1, test_size=0.2, random_state=42)
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2, Y_2, test_size=0.2, random_state=42)
X_train_7, X_test_7, y_train_7, y_test_7 = train_test_split(X_7, Y_7, test_size=0.2, random_state=42)

X_train_0 = encoder_0.fit_transform(X_train_0)
X_test_0 = encoder_0.transform(X_test_0)

X_train_1 = encoder_1.fit_transform(X_train_1)
X_test_1 = encoder_1.transform(X_test_1)

X_train_2 = encoder_2.fit_transform(X_train_2)
X_test_2 = encoder_2.transform(X_test_2)

X_train_7 = encoder_0.fit_transform(X_train_7)
X_test_7 = encoder_0.transform(X_test_7)

Data Scaling (Using robust scaler to handle outliers)

In [6]:
from sklearn.preprocessing import RobustScaler

print(X_train_0.describe())
scaled_X_train_0 = X_train_0.copy()

r_scaler = RobustScaler()
scaled_X_train_0[scaled_X_train_0.columns] = r_scaler.fit_transform(scaled_X_train_0[scaled_X_train_0.columns])

print(scaled_X_train_0.describe())


              model  capacity_bytes  smart_1_normalized   smart_1_raw  \
count  21612.000000    21612.000000        21612.000000  2.161200e+04   
mean       2.760087        2.085045           96.274986  1.223515e+08   
std        1.841142        1.131281           19.810505  7.039539e+07   
min        1.000000        1.000000           37.000000  0.000000e+00   
25%        1.000000        1.000000           80.000000  6.115209e+07   
50%        2.000000        2.000000          100.000000  1.227242e+08   
75%        4.000000        3.000000          116.000000  1.831709e+08   
max        7.000000        5.000000          120.000000  2.441335e+08   

       smart_3_normalized  smart_3_raw  smart_4_normalized   smart_4_raw  \
count        21612.000000      21612.0        21612.000000  21612.000000   
mean            92.509856          0.0           99.998843     15.467472   
std              3.375152          0.0            0.055668     72.197321   
min             83.000000          0.0

Hyperparameter Tuning

In [8]:
# Initialize the MLP model
mlp_model_0_cv = MLPClassifier(random_state=90)

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100)],
    'activation': ['logistic', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [100, 500, 1000]
}

rand_search_0 = RandomizedSearchCV(mlp_model_0_cv, param_grid, cv=5, scoring='roc_auc', n_iter=40)
rand_search_0.fit(scaled_X_train_0, y_train_0)

print(rand_search_0.best_params_)

df_randsearch_results_0 = pd.DataFrame(rand_search_0.cv_results_)



{'solver': 'adam', 'max_iter': 1000, 'learning_rate': 'constant', 'hidden_layer_sizes': (100,), 'alpha': 0.0001, 'activation': 'tanh'}


# Train Models

Day minus 1

In [10]:
# Best params: {'solver': 'adam', 'max_iter': 1000, 'learning_rate': 'constant', 'hidden_layer_sizes': (100,), 'alpha': 0.0001, 'activation': 'tanh'}

# scale test dataset using scaler fitted on training
scaled_X_test_0 = X_test_0.copy()
scaled_X_test_0[scaled_X_test_0.columns] = r_scaler.transform(scaled_X_test_0[scaled_X_test_0.columns])

mlp_model_0 = MLPClassifier(solver='adam', max_iter=1000, alpha=0.0001, learning_rate='constant', hidden_layer_sizes=(100,), activation='tanh', random_state=90)
mlp_model_0.fit(scaled_X_train_0, y_train_0)

test_predictions_0 = mlp_model_0.predict(scaled_X_test_0)
cm = confusion_matrix(y_true=y_test_0, y_pred=test_predictions_0)
print(cm)

# Evaluate the model on test set
test_accuracy_0 = mlp_model_0.score(scaled_X_test_0, y_test_0)
print("Test Accuracy:", test_accuracy_0)

[[2307  373]
 [ 651 2072]]
Test Accuracy: 0.8104756616694428


In [15]:
from sklearn.inspection import permutation_importance
r = permutation_importance(mlp_model_0, scaled_X_test_0, y_test_0,
                           n_repeats=30,
                           random_state=0,
                           scoring='roc_auc')

for i in r.importances_mean.argsort()[::-1]:
    if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
        print(f"{scaled_X_test_0.columns[i]:<8}\t\t"
              f"{r.importances_mean[i]:.3f}"
              f" +/- {r.importances_std[i]:.3f}")

smart_187_raw		0.114 +/- 0.005
smart_198_raw		0.100 +/- 0.004
smart_5_raw		0.096 +/- 0.005
smart_187_normalized		0.059 +/- 0.003
smart_197_raw		0.054 +/- 0.003
smart_5_normalized		0.048 +/- 0.003
smart_188_raw		0.029 +/- 0.002
smart_242_raw		0.023 +/- 0.002
smart_240_raw		0.023 +/- 0.002
capacity_bytes		0.018 +/- 0.002
smart_241_raw		0.017 +/- 0.002
smart_4_raw		0.011 +/- 0.001
smart_192_raw		0.010 +/- 0.001
smart_198_normalized		0.008 +/- 0.001
smart_197_normalized		0.008 +/- 0.001
smart_7_raw		0.008 +/- 0.001
smart_199_raw		0.006 +/- 0.001
smart_7_normalized		0.005 +/- 0.001
smart_193_normalized		0.004 +/- 0.001
smart_193_raw		0.004 +/- 0.001
smart_9_normalized		0.003 +/- 0.001
smart_9_raw		0.003 +/- 0.001
smart_12_raw		0.003 +/- 0.001
smart_1_normalized		0.003 +/- 0.001
smart_3_normalized		0.003 +/- 0.001
model   		0.003 +/- 0.001


In [27]:
mean = r.importances_mean
std = r.importances_std
permutation_importances_0 = pd.DataFrame({'mean_importance': mean, 'std_importance': std}, index=scaled_X_test_0.columns)
permutation_importances_0['feature'] = permutation_importances_0.index
permutation_importances_0.sort_values(by=['mean_importance'], ascending=False, inplace=True)