# 4.3.6 Challenge Make Your Own Network

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore') # Don't show warnings

# Import the model.
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Import Metrics
from sklearn.metrics import adjusted_rand_score
from sklearn.model_selection import cross_val_score

## This dataset has been cleaned in Capstone project #2

Reading the dataset:

In [4]:
df = pd.read_csv('ThoracicSurgery_Cleaned.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Diagnosis,Forced_Capacity,Forced_Expiration,Zubrod_Scale,Pain,Haemoptysis,Dyspnoea,Cough,Weakness,Tumor_Size,Diabetes,MI_6mo,PAD,Smoker,Asthmatic,Age,Death_1yr
0,0,2,2.88,2.16,1,0,0,0,1,1,4,0,0,0,1,0,60,0
1,1,3,3.4,1.88,0,0,0,0,0,0,2,0,0,0,1,0,51,0
2,2,3,2.76,2.08,1,0,0,0,1,0,1,0,0,0,1,0,59,0
3,3,3,3.68,3.04,0,0,0,0,0,0,1,0,0,0,0,0,54,0
4,4,3,2.44,0.96,2,0,1,0,1,1,1,0,0,0,1,0,73,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 18 columns):
Unnamed: 0           454 non-null int64
Diagnosis            454 non-null int64
Forced_Capacity      454 non-null float64
Forced_Expiration    454 non-null float64
Zubrod_Scale         454 non-null int64
Pain                 454 non-null int64
Haemoptysis          454 non-null int64
Dyspnoea             454 non-null int64
Cough                454 non-null int64
Weakness             454 non-null int64
Tumor_Size           454 non-null int64
Diabetes             454 non-null int64
MI_6mo               454 non-null int64
PAD                  454 non-null int64
Smoker               454 non-null int64
Asthmatic            454 non-null int64
Age                  454 non-null int64
Death_1yr            454 non-null int64
dtypes: float64(2), int64(16)
memory usage: 63.9 KB


All the columns are numerical. Let's check the tail of the data set to see if they are at the end of the set.

In [7]:
df.tail()

Unnamed: 0.1,Unnamed: 0,Diagnosis,Forced_Capacity,Forced_Expiration,Zubrod_Scale,Pain,Haemoptysis,Dyspnoea,Cough,Weakness,Tumor_Size,Diabetes,MI_6mo,PAD,Smoker,Asthmatic,Age,Death_1yr
449,465,2,3.88,2.12,1,0,0,0,1,0,3,0,0,0,1,0,63,0
450,466,3,3.76,3.12,0,0,0,0,0,0,1,0,0,0,1,0,61,0
451,467,3,3.04,2.08,1,0,0,0,1,0,3,0,0,0,0,0,52,0
452,468,3,1.96,1.68,1,0,0,0,1,1,2,0,0,0,1,0,79,0
453,469,3,4.72,3.56,0,0,0,0,0,0,2,0,0,0,1,0,51,0


## Building a Model - Default Settings

We will use multi-layer perceptron modeling (MLP) to classify if a patient survived after 1 year of surgery

In [8]:
df.shape

(454, 18)

In [9]:
# Identifying variables
X = df.drop('Death_1yr', axis=1)
Y = df['Death_1yr']

In [10]:
# Establishing and fitting the model, with a single, 100 perceptron layer.
mlp = MLPClassifier()
mlp.fit(X, Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

The following are the ground truth percentages for reference:

In [11]:
Y.value_counts()/len(Y)

0    0.848018
1    0.151982
Name: Death_1yr, dtype: float64

From the above, 15.20% of patients died after 1 year of surgery.

We will calculate the adjusted rand score.  This score will tell us how the prediction relates to the ground truth of the data.
- http://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_rand_score.html

In [14]:
# 10-fold cross validation
ars = cross_val_score(mlp, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Scores: {:.5f}(+/- {:.2f})'.format(ars.mean(), ars.std()*2))

Cross Validation Scores: -0.01048(+/- 0.13)


The adjusted rand score is approximately -0.01

In [35]:
# Get predicted clusters.
full_pred = mlp.predict(X)
pd.crosstab(Y, full_pred) 

col_0,0
Death_1yr,Unnamed: 1_level_1
0,385
1,69


## Model 2 - Logistic Activation
We will keep the default MLP settings, although we will change the activation to logistic.

In [16]:
# Establish and fit the model, with default settings.
mlp2 = MLPClassifier(activation='logistic')
mlp2.fit(X, Y)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [17]:
# 10-fold cross validation
ars2 = cross_val_score(mlp2, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars2.mean(), ars2.std()*2))

Cross Validation Adjusted Rand Scores: -0.02180(+/- 0.09)


In [18]:
# Get predicted clusters.
full_pred2 = mlp2.predict(X)
pd.crosstab(Y, full_pred2) 

col_0,0
Death_1yr,Unnamed: 1_level_1
0,385
1,69


## Model 3 - Playing with Size of Layers
Let's keep the logistic activation and then increase the size of the layer to our model. 

In [21]:
# Establish and fit the model, with default settings.
mlp3 = MLPClassifier(activation='logistic', hidden_layer_sizes=(200))
mlp3.fit(X, Y)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=200, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [22]:
# 10-fold cross validation
ars3 = cross_val_score(mlp3, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars3.mean(), ars3.std()*2))

Cross Validation Adjusted Rand Scores: -0.02556(+/- 0.09)


In [23]:
# Get predicted clusters.
full_pred3 = mlp3.predict(X)
pd.crosstab(Y, full_pred3) 

col_0,0,1
Death_1yr,Unnamed: 1_level_1,Unnamed: 2_level_1
0,385,0
1,67,2


## Model 4 - Multiple Large Layers
We will add two layers with a size of 200 each.

In [24]:
# Establish and fit the model, with default settings.
mlp4 = MLPClassifier(activation='logistic', hidden_layer_sizes=(200, 200))
mlp4.fit(X, Y)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(200, 200), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [25]:
# 10-fold cross validation
ars4 = cross_val_score(mlp4, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars4.mean(), ars4.std()*2))

Cross Validation Adjusted Rand Scores: -0.01797(+/- 0.08)


In [26]:
# Get predicted clusters.
full_pred4 = mlp4.predict(X)
pd.crosstab(Y, full_pred4) 

col_0,0,1
Death_1yr,Unnamed: 1_level_1,Unnamed: 2_level_1
0,384,1
1,65,4


## Model 5 - Alpha
We will reduce alpha:

In [27]:
# Establish and fit the model, with default settings.
mlp5 = MLPClassifier(activation='logistic', hidden_layer_sizes=(200, 200), alpha=1e-6)
mlp5.fit(X, Y)

MLPClassifier(activation='logistic', alpha=1e-06, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(200, 200), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [28]:
# 10-fold cross validation
ars5 = cross_val_score(mlp5, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars5.mean(), ars5.std()*2))

Cross Validation Adjusted Rand Scores: -0.00162(+/- 0.12)


In [29]:
# Get predicted clusters.
full_pred5 = mlp5.predict(X)
pd.crosstab(Y, full_pred5) 

col_0,0,1
Death_1yr,Unnamed: 1_level_1,Unnamed: 2_level_1
0,385,0
1,67,2


## Model 6 - Smaller Layers, Higher Alpha
We will use now a smaller layer, and a higher alpha:

In [30]:
# Establish and fit the model, with default settings.
mlp6 = MLPClassifier(activation='logistic', alpha=1e-7)
mlp6.fit(X, Y)

MLPClassifier(activation='logistic', alpha=1e-07, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [31]:
# 10-fold cross validation
ars6 = cross_val_score(mlp6, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars6.mean(), ars6.std()*2))

Cross Validation Adjusted Rand Scores: -0.02180(+/- 0.09)


In [32]:
# Get predicted clusters.
full_pred6 = mlp6.predict(X)
pd.crosstab(Y, full_pred6) 

col_0,0,1
Death_1yr,Unnamed: 1_level_1,Unnamed: 2_level_1
0,385,0
1,68,1


We will now compare these last results using the gradient bossted classifier on our dataset

# Gradient Boosted Classifier Model


In [33]:
#instantiating and fitting the model
gbc = GradientBoostingClassifier()
gbc.fit(X, Y)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [34]:
# 10-fold cross validation
ars7 = cross_val_score(gbc, X, Y, scoring='adjusted_rand_score', cv=5)
print('Cross Validation Adjusted Rand Scores: {:.5f}(+/- {:.2f})'.format(ars7.mean(), ars7.std()*2))

Cross Validation Adjusted Rand Scores: 0.01499(+/- 0.11)


In [36]:
# Get predicted clusters.
full_pred7 = gbc.predict(X)
pd.crosstab(Y, full_pred7) 

col_0,0,1
Death_1yr,Unnamed: 1_level_1,Unnamed: 2_level_1
0,385,0
1,29,40


Gradient Boosting method performed better than all others.