# Artificial Neural Networks

## Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neural_network import MLPClassifier
from collections import Counter
from imblearn.over_sampling import SMOTE, ADASYN, SMOTENC
from imblearn.combine import SMOTETomek, SMOTEENN 
import matplotlib.pyplot as plt
from numpy import where
import time

## Read the data from csv

In [2]:
df_train = pd.read_csv('../data/df_train.csv')
df_test = pd.read_csv('../data/df_test.csv')

X_train = df_train.drop('kill', axis=1)
y_train = df_train['kill']
X_test = df_test.drop(['kill'], axis=1)
y_test = df_test['kill']

X_train = X_train.values
y_train = y_train.values
X_test = X_test.values
y_test = y_test.values

In [3]:
scaler = StandardScaler()
#scaler = MinMaxScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
model = MLPClassifier(random_state=42)

In [5]:
model.fit(X_train,y_train)

MLPClassifier(random_state=42)

In [6]:
predictions = model.predict(X_test)

In [7]:
def fit_and_print(model, X_train, y_train):
    model.fit(X_train, y_train)  
    y_pred = model.predict(X_test)
    print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))  
    print("Classification Report: \n", classification_report(y_test, y_pred))  
    print("Accuracy: ", round(accuracy_score(y_test, y_pred),3))
    print("Precision:", round(precision_score(y_test, y_pred),3))
    print("Recall:", round(recall_score(y_test, y_pred),3))
    print("f1: ", round(f1_score(y_test, y_pred),3))

In [8]:
fit_and_print(model,X_train,y_train)

Confusion Matrix: 
 [[19781   440]
 [ 2111   709]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.90      0.98      0.94     20221
           1       0.62      0.25      0.36      2820

    accuracy                           0.89     23041
   macro avg       0.76      0.61      0.65     23041
weighted avg       0.87      0.89      0.87     23041

Accuracy:  0.889
Precision: 0.617
Recall: 0.251
f1:  0.357


## GridSearch

In [9]:
param_grid = {
    'learning_rate_init': [0.001, 0.01, 0.1], 
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001],
    'early_stopping': [True],
    'activation': ['logistic', 'tanh', 'relu'],
    'max_iter': [1000]
     }

In [10]:
grid = GridSearchCV(MLPClassifier(random_state=42),param_grid,verbose=2, cv = 5, n_jobs = -1)

In [11]:
# May take awhile!
grid.fit(X_train,y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


GridSearchCV(cv=5, estimator=MLPClassifier(random_state=42), n_jobs=-1,
             param_grid={'activation': ['logistic', 'tanh', 'relu'],
                         'alpha': [0.0001, 0.001], 'early_stopping': [True],
                         'learning_rate_init': [0.001, 0.01, 0.1],
                         'max_iter': [1000], 'solver': ['sgd', 'adam']},
             verbose=2)

In [12]:
grid.best_params_

{'activation': 'relu',
 'alpha': 0.001,
 'early_stopping': True,
 'learning_rate_init': 0.01,
 'max_iter': 1000,
 'solver': 'adam'}

In [13]:
best_grid = grid.best_estimator_
best_grid

MLPClassifier(alpha=0.001, early_stopping=True, learning_rate_init=0.01,
              max_iter=1000, random_state=42)

In [14]:
grid.best_score_

0.8937693868954162

In [15]:
grid_predictions = grid.predict(X_test)

In [16]:
fit_and_print(best_grid,X_train,y_train)

Confusion Matrix: 
 [[19857   364]
 [ 2179   641]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.90      0.98      0.94     20221
           1       0.64      0.23      0.34      2820

    accuracy                           0.89     23041
   macro avg       0.77      0.60      0.64     23041
weighted avg       0.87      0.89      0.87     23041

Accuracy:  0.89
Precision: 0.638
Recall: 0.227
f1:  0.335


In [17]:
fit_and_print(best_grid,X_train,y_train)

Confusion Matrix: 
 [[19857   364]
 [ 2179   641]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.90      0.98      0.94     20221
           1       0.64      0.23      0.34      2820

    accuracy                           0.89     23041
   macro avg       0.77      0.60      0.64     23041
weighted avg       0.87      0.89      0.87     23041

Accuracy:  0.89
Precision: 0.638
Recall: 0.227
f1:  0.335


In [18]:
fit_and_print(best_grid,X_train,y_train)

Confusion Matrix: 
 [[19857   364]
 [ 2179   641]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.90      0.98      0.94     20221
           1       0.64      0.23      0.34      2820

    accuracy                           0.89     23041
   macro avg       0.77      0.60      0.64     23041
weighted avg       0.87      0.89      0.87     23041

Accuracy:  0.89
Precision: 0.638
Recall: 0.227
f1:  0.335


In [19]:
def calculate_pred_and_inf_time(best_grid, X_test):
    # get the start time
    st_wall_inf = time.time()

    # Generate generalization metrics
    grid_predictions = best_grid.predict(X_test)

    # get the end time
    et_wall_inf = time.time()

    # get execution time
    wall_time_inf = et_wall_inf - st_wall_inf
    print(f'Inference Time: {1000*wall_time_inf:.3f} miliseconds')

calculate_pred_and_inf_time(best_grid, X_test)

Inference Time: 116.641 miliseconds


## Resampling

### SMOTE

In [20]:
# Oversample and plot imbalanced dataset with SMOTE

# summarize class distribution
counter = Counter(y_train)
print(counter)
# transform the dataset
oversample = SMOTE(random_state=42)
X_train_rel, y_train_rel = oversample.fit_resample(X_train, y_train)
# summarize the new class distribution
counter = Counter(y_train_rel)
print(counter)

fit_and_print(best_grid, X_train_rel, y_train_rel)

calculate_pred_and_inf_time(best_grid, X_test)

Counter({0: 114988, 1: 15577})
Counter({0: 114988, 1: 114988})
[CV] END activation=logistic, alpha=0.0001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=adam; total time= 1.2min
[CV] END activation=tanh, alpha=0.0001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=sgd; total time= 1.1min
[CV] END activation=tanh, alpha=0.001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=sgd; total time=  49.9s
[CV] END activation=relu, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=adam; total time= 1.8min
[CV] END activation=logistic, alpha=0.001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=sgd; total time=  29.2s
[CV] END activation=logistic, alpha=0.001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=sgd; total time=  34.1s
[CV] END activation=logistic, alpha=0.001, early_stopping=True, learning_rate_init=0.1, max_iter=1000, solver=adam; total time=  35.

### ADASYN

In [21]:
# Oversample and plot imbalanced dataset with ADASYN

# summarize class distribution
counter = Counter(y_train)
print(counter)
# transform the dataset
oversample = ADASYN(random_state=42)
X_train_rel, y_train_rel = oversample.fit_resample(X_train, y_train)
# summarize the new class distribution
counter = Counter(y_train_rel)
print(counter)

fit_and_print(best_grid, X_train_rel, y_train_rel)

calculate_pred_and_inf_time(best_grid, X_test)

Counter({0: 114988, 1: 15577})
[CV] END activation=logistic, alpha=0.0001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=adam; total time= 1.1min
[CV] END activation=tanh, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=adam; total time= 1.9min
[CV] END activation=relu, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=sgd; total time= 1.9min
[CV] END activation=logistic, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=adam; total time= 1.1min
[CV] END activation=tanh, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=adam; total time= 2.0min
[CV] END activation=relu, alpha=0.0001, early_stopping=True, learning_rate_init=0.001, max_iter=1000, solver=adam; total time= 1.8min
[CV] END activation=logistic, alpha=0.0001, early_stopping=True, learning_rate_init=0.01, max_iter=1000, solver=sgd; total time= 1.0min
[CV] END activation=log

### SMOTE and TL

In [22]:
# Oversample and plot imbalanced dataset with SMOTE and TL

# summarize class distribution
counter = Counter(y_train)
print(counter)
# transform the dataset
oversample = SMOTETomek(random_state=42)
X_train_rel, y_train_rel = oversample.fit_resample(X_train, y_train)
# summarize the new class distribution
counter = Counter(y_train_rel)
print(counter)

fit_and_print(best_grid, X_train_rel, y_train_rel)

calculate_pred_and_inf_time(best_grid, X_test)

Counter({0: 114988, 1: 15577})
Counter({0: 111748, 1: 111748})
Confusion Matrix: 
 [[15338  4883]
 [  723  2097]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.95      0.76      0.85     20221
           1       0.30      0.74      0.43      2820

    accuracy                           0.76     23041
   macro avg       0.63      0.75      0.64     23041
weighted avg       0.87      0.76      0.79     23041

Accuracy:  0.757
Precision: 0.3
Recall: 0.744
f1:  0.428
Inference Time: 157.001 miliseconds


### SMOTE and ENN

In [24]:
# Oversample and plot imbalanced dataset with SMOTE and ENN

# summarize class distribution
counter = Counter(y_train)
print(counter)
# transform the dataset
oversample = SMOTEENN(random_state=42)
X_train_rel, y_train_rel = oversample.fit_resample(X_train, y_train)
# summarize the new class distribution
counter = Counter(y_train_rel)
print(counter)

fit_and_print(best_grid, X_train_rel, y_train_rel)

calculate_pred_and_inf_time(best_grid, X_test)

Counter({0: 114988, 1: 15577})
Counter({1: 96382, 0: 81992})
Confusion Matrix: 
 [[14616  5605]
 [  608  2212]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.96      0.72      0.82     20221
           1       0.28      0.78      0.42      2820

    accuracy                           0.73     23041
   macro avg       0.62      0.75      0.62     23041
weighted avg       0.88      0.73      0.77     23041

Accuracy:  0.73
Precision: 0.283
Recall: 0.784
f1:  0.416
Inference Time: 35.741 miliseconds
