In [8]:
# importing stuff
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, roc_auc_score

#importing data
data = pd.read_csv('final_data.csv', index_col=0, header=0)
data.columns

Index(['year', 'agent', 'pay_method', 'state', 'zip_code', 'status', 'months',
       'pay_method_ACH', 'pay_method_credit card', 'pay_method_paper check',
       'region_central', 'region_north_east', 'region_rocky', 'region_south',
       'region_south_east', 'region_west', 'status_Active', 'status_Decline',
       'status_Returned', 'status_Returned_90', 'income', 'Y'],
      dtype='object')

In [9]:
# Dropping rows that will not be used in the logistic regression
data = data.drop(['agent', 'pay_method', 'state', 'months', 'zip_code', 'status'], axis = 1)

In [10]:
# Splitting the dataset into the features and target variables
X = data.iloc[:,:-1]
y = data['Y']

# Splitting the data using the train_test_split sklearn package
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2019)

In [4]:
# Using the SMOTE package to deal with the imbalanced dataset
random = RandomOverSampler(random_state=2019)
X_train_ros, y_train_ros = random.fit_resample(X_train, y_train)

In [11]:
# Creating the Neural Network
scaler = StandardScaler()
scaler.fit(X_train)

# Transformation of the data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Using the 'MLPClassifier'
mlp = MLPClassifier(hidden_layer_sizes=(24,24,24), max_iter=500)
mlp = mlp.fit(X_train, y_train)

# 'y_pred' variable creation
y_pred = mlp.predict(X_test)

# Printing a confusion matrix
print(confusion_matrix(y_test,y_pred))

[[7635 1268]
 [2093 2646]]


In [12]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.78      0.86      0.82      8903
           1       0.68      0.56      0.61      4739

    accuracy                           0.75     13642
   macro avg       0.73      0.71      0.72     13642
weighted avg       0.75      0.75      0.75     13642



In [13]:
# Printing the accuracy, precision, recall, and f1 score 
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted'))
print("AUC:", roc_auc_score(y_test,y_pred))
print("MCC:",metrics.matthews_corrcoef(y_test, y_pred))

Accuracy: 0.7536285002199091
Precision: 0.6760347470618293
Recall: 0.5583456425406204
F1 Score: 0.7473384364242114
AUC: 0.7079608702425667
MCC: 0.437825418237285
