In [19]:
# Imports
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score

In [20]:
# Import and clean the data
column_names = ["Clump_Thickness", "Uniformity_of_Cell_Size", "Uniformity_of_Cell_Shape", "Marginal_Adhesion", "Single_Epithelial_Cell_Size", "Bland_Chromatin", "Normal_Nucleoli", "Mitoses", "Something", "Cancer"]
data = pd.read_csv(os.path.abspath('wbco.csv'),names=column_names)
data.head()
rows_with_question_mark = data[data.isin(["?"]).any(axis=1)].index
data = data.drop(rows_with_question_mark)
data = data.astype('int')
data['Cancer'] = data['Cancer'].astype('int')
column_names = column_names[:-1]

# Set seed
seed = 7

# Normalize the dataset
scaler_input = MinMaxScaler()
scaler_output = MinMaxScaler()

# Choose predictors and output
X = scaler_input.fit_transform(data[column_names].values)
y = scaler_output.fit_transform(data["Cancer"].values.reshape(-1, 1))

In [21]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = np.squeeze(y_train)

In [22]:
## Train models

# Model 1
regr_1 = MLPClassifier(hidden_layer_sizes=(100,100,100,50),random_state=42, max_iter=500)
regr_1.fit(X_train, y_train)

# Model 2
regr_2 = MLPClassifier(hidden_layer_sizes=(200,100,100,50),random_state=42, max_iter=550)
regr_2.fit(X_train, y_train)

# Model 3
regr_3 = MLPClassifier(hidden_layer_sizes=(200,200,100,50),random_state=42, max_iter=600)
regr_3.fit(X_train, y_train)

# Model 4
regr_4 = MLPClassifier(hidden_layer_sizes=(200,200,200,50),random_state=42, max_iter=650)
regr_4.fit(X_train, y_train)

# Model 5
regr_5 = MLPClassifier(hidden_layer_sizes=(300,200,200,50),random_state=42, max_iter=700)
regr_5.fit(X_train, y_train)

In [23]:
# Get predictions for each model
y_pred_1 = regr_1.predict(X_test)
y_pred_2 = regr_2.predict(X_test)
y_pred_3 = regr_3.predict(X_test)
y_pred_4 = regr_4.predict(X_test)
y_pred_5 = regr_5.predict(X_test)

In [24]:
## Compute classification metrics

print("MODEL 1")
acc_score_1 = accuracy_score(y_test, y_pred_1)
print("Accuracy: {:.3f}".format(acc_score_1))
rec_score_1 = recall_score(y_test, y_pred_1)
print("Recall: {:.3f}".format(rec_score_1))
prec_score_1 = precision_score(y_test, y_pred_1)
print("Precision Score: {:.3f}".format(prec_score_1))
F1_score_1 = f1_score(y_test, y_pred_1)
print("F1-Score: {:.3f}".format(F1_score_1))
kappa_1 = cohen_kappa_score(y_test, y_pred_1)
print("Kappa Score: {:.3f}".format(kappa_1))

print("\nMODEL 2")
acc_score_2 = accuracy_score(y_test, y_pred_2)
print("Accuracy: {:.3f}".format(acc_score_2))
rec_score_2 = recall_score(y_test, y_pred_2)
print("Recall: {:.3f}".format(rec_score_2))
prec_score_2 = precision_score(y_test, y_pred_2)
print("Precision Score: {:.3f}".format(prec_score_2))
F1_score_2 = f1_score(y_test, y_pred_2)
print("F1-Score: {:.3f}".format(F1_score_2))
kappa_2 = cohen_kappa_score(y_test, y_pred_2)
print("Kappa Score: {:.3f}".format(kappa_2))

print("\nMODEL 3")
acc_score_3 = accuracy_score(y_test, y_pred_3)
print("Accuracy: {:.3f}".format(acc_score_3))
rec_score_3 = recall_score(y_test, y_pred_3)
print("Recall: {:.3f}".format(rec_score_3))
prec_score_3 = precision_score(y_test, y_pred_3)
print("Precision Score: {:.3f}".format(prec_score_3))
F1_score_3 = f1_score(y_test, y_pred_3)
print("F1-Score: {:.3f}".format(F1_score_3))
kappa_3 = cohen_kappa_score(y_test, y_pred_3)
print("Kappa Score: {:.3f}".format(kappa_3))

print("\nMODEL 4")
acc_score_4 = accuracy_score(y_test, y_pred_4)
print("Accuracy: {:.3f}".format(acc_score_4))
rec_score_4 = recall_score(y_test, y_pred_4)
print("Recall: {:.3f}".format(rec_score_4))
prec_score_4 = precision_score(y_test, y_pred_4)
print("Precision Score: {:.3f}".format(prec_score_4))
F1_score_4 = f1_score(y_test, y_pred_4)
print("F1-Score: {:.3f}".format(F1_score_4))
kappa_4 = cohen_kappa_score(y_test, y_pred_4)
print("Kappa Score: {:.3f}".format(kappa_4))

print("\nMODEL 5")
acc_score_5 = accuracy_score(y_test, y_pred_5)
print("Accuracy: {:.3f}".format(acc_score_5))
rec_score_5 = recall_score(y_test, y_pred_5)
print("Recall: {:.3f}".format(rec_score_5))
prec_score_5 = precision_score(y_test, y_pred_5)
print("Precision Score: {:.3f}".format(prec_score_5))
F1_score_5 = f1_score(y_test, y_pred_5)
print("F1-Score: {:.3f}".format(F1_score_5))
kappa_5 = cohen_kappa_score(y_test, y_pred_5)
print("Kappa Score: {:.3f}".format(kappa_5))

MODEL 1
Accuracy: 0.964
Recall: 0.948
Precision Score: 0.965
F1-Score: 0.957
Kappa Score: 0.925

MODEL 2
Accuracy: 0.971
Recall: 0.966
Precision Score: 0.966
F1-Score: 0.966
Kappa Score: 0.940

MODEL 3
Accuracy: 0.964
Recall: 0.948
Precision Score: 0.965
F1-Score: 0.957
Kappa Score: 0.925

MODEL 4
Accuracy: 0.971
Recall: 0.966
Precision Score: 0.966
F1-Score: 0.966
Kappa Score: 0.940

MODEL 5
Accuracy: 0.978
Recall: 0.983
Precision Score: 0.966
F1-Score: 0.974
Kappa Score: 0.955
