# M/L Commando Course, Cambridge 2018
## Neural Networks

We revisit our well-known Titanic data again, this time using Keras with Tensorflow to build Neural Network classifiers, for survival prediction (binary classification) and to estimate the class of a passenger (multiclass classification).

In [2]:
from math import exp
from random import random, shuffle, choice, randint
import pandas as pd
import numpy as np

from keras import Input
from numpy import array, mean
from pandas import concat
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense,Reshape

import matplotlib.pyplot as plt

In [3]:
titanic_raw = pd.read_csv('data/titanic.csv')
print (titanic_raw[12:14])

    row.names pclass  survived                          name  age  \
12         13    1st         1  Aubert, Mrs Leontine Pauline  NaN   
13         14    1st         1     Barkworth, Mr Algernon H.  NaN   

       embarked      home.dest  room        ticket boat     sex  
12    Cherbourg  Paris, France  B-35  17477 L69 6s    9  female  
13  Southampton  Hessle, Yorks  A-23           NaN    B    male  


In [4]:
from sklearn import feature_extraction
from sklearn.model_selection import train_test_split

def one_hot_dataframe(data, cols, replace=False):
    dic_vecr = feature_extraction.DictVectorizer()
    stuff_to_transform = data[cols]
    dty_list = stuff_to_transform.to_dict(orient='records') #rjm49 - this call makes a dict for each record in the table, returns them all as a list
    txd = dic_vecr.fit_transform(dty_list) #converts string types to 1-hot-encoded classes as a NumPy "sparse array"
    vecData = pd.DataFrame( txd.toarray())
    vecData.columns = dic_vecr.get_feature_names()
    vecData.index = data.index
    if replace is True: #replace the columns in data with those from our VecData object
        data = data.drop(cols, axis=1)
        data = data.join(vecData)
    return (data, vecData)

titanic, _ = one_hot_dataframe(titanic_raw, ['pclass', 'embarked', 'sex'], replace=True)
titanic_target = titanic['survived']

mean_age = titanic['age'].mean()
titanic['age'].fillna(mean_age, inplace=True)

titanic.fillna(0, inplace=True)

titanic_data = titanic.drop(['name', 'row.names', 'survived', 'embarked', 'home.dest','ticket','boat','room'], axis=1) #can use inplace=True to alter original

print(titanic_data.head())
X_train, X_test, y_train, y_test = train_test_split(titanic_data, titanic_target, test_size=0.25, random_state=42)


       age  embarked=Cherbourg  embarked=Queenstown  embarked=Southampton  \
0  29.0000                 0.0                  0.0                   1.0   
1   2.0000                 0.0                  0.0                   1.0   
2  30.0000                 0.0                  0.0                   1.0   
3  25.0000                 0.0                  0.0                   1.0   
4   0.9167                 0.0                  0.0                   1.0   

   pclass=1st  pclass=2nd  pclass=3rd  sex=female  sex=male  
0         1.0         0.0         0.0         1.0       0.0  
1         1.0         0.0         0.0         1.0       0.0  
2         1.0         0.0         0.0         0.0       1.0  
3         1.0         0.0         0.0         1.0       0.0  
4         1.0         0.0         0.0         0.0       1.0  


In [11]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam

model = Sequential([
    Dense(10, input_shape=(9,)),
    Activation('relu'),
#     Dense(20),
#     Activation('relu'),
    Dense(10),
    Activation('relu'),
    Dense(1),
    Activation('sigmoid'),
])

model.summary()

# opt = Adam(lr=0.01)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc", "mse"])
model.fit(X_train, y_train, epochs=100) #, epochs=100, batch_size=100)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 10)                100       
_________________________________________________________________
activation_17 (Activation)   (None, 10)                0         
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 11        
_________________________________________________________________
activation_18 (Activation)   (None, 1)                 0         
Total params: 111
Trainable params: 111
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7fa045121c88>

In [12]:
score = model.evaluate(X_train, y_train)
print("train score: {}".format(score))
score = model.evaluate(X_test, y_test)
print("test score: {}".format(score))

train score: [0.41641413535529037, 0.8252032525171109, 0.12870446162495186]
test score: [0.4415990382342353, 0.8389057752571569, 0.1334189323306446]


## Multiclass classification
Next we take a look at a classification problem where the result is always exactly one of N classes.  This is called a multi-class problem.
(Do not confuse with a _multi-label_ problem, where we can have any number of labels (or none!) positive for a given datum.)

In our case we'll try to use the passenger's age, gender and embarkation point to determine the likely class of their ticket.

In [None]:
model = Sequential([
    Dense(5, input_shape=(6,)),
    Activation('relu'),
    Dense(3),
    Activation('softmax'),
])

titanic_class_target = titanic[["pclass=1st","pclass=2nd","pclass=3rd"]]
titanic_classless_data = titanic_data.drop(["pclass=1st","pclass=2nd","pclass=3rd"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(titanic_classless_data, titanic_class_target, test_size=0.25, random_state=42)

print(X_train.head())

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc", "mean_squared_error"])
history = model.fit(X_train, y_train, epochs=200)

plt.plot(history.history['loss'])
plt.plot(history.history['acc'])
plt.plot(history.history['mean_squared_error'])
plt.show()

In [132]:
score = model.evaluate(X_train, y_train)
print("train score: {}".format(score))
score = model.evaluate(X_test, y_test)
print("test score: {}".format(score))

train score: [0.64367103649348745, 0.70020325154792973, 0.12299748552523977]
test score: [0.6473649773735406, 0.71732522814469502, 0.12401590176991054]


In [133]:
y_pred=model.predict(X_test)
print("Raw softmax (rows should sum to 1):\n",y_pred[0:5])
y_pred = (y_pred > 0.4)
print("\nDecision boundary at 0.4:\n",y_pred[0:5])
y_args = np.argmax(y_pred, axis=1)
print("\nIndices of maxima:\n", y_args[0:5])

Raw softmax (rows should sum to 1):
 [[ 0.48002222  0.38742185  0.13255596]
 [ 0.23621719  0.45844388  0.30533892]
 [ 0.12852886  0.43304601  0.43842512]
 [ 0.4653298   0.39453498  0.14013521]
 [ 0.0514151   0.05296337  0.89562154]]

Decision boundary at 0.4:
 [[ True False False]
 [False  True False]
 [False  True  True]
 [ True False False]
 [False False  True]]

Indices of maxima:
 [0 1 1 0 2]


In [134]:
from sklearn import metrics
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confusion_matrix=True):
    y_pred=clf.predict(X)
    
    y = np.argmax(np.array(y), axis=1)
    y_pred = np.argmax(y_pred, axis=1)

    if show_accuracy:
        print( "Accuracy:{0:.3f}".format(metrics.accuracy_score(y,y_pred)),"\n")

    if show_classification_report:
        print( "Classification report")
        print( metrics.classification_report(y,y_pred),"\n")
        
    if show_confusion_matrix:
        print( "Confusion matrix")
        print( metrics.confusion_matrix(y,y_pred),"\n")

In [135]:
measure_performance(X_test, y_test, model, show_accuracy=True)

Accuracy:0.717 

Classification report
             precision    recall  f1-score   support

          0       0.66      0.55      0.60        80
          1       0.50      0.71      0.59        79
          2       0.90      0.80      0.85       170

avg / total       0.75      0.72      0.72       329
 

Confusion matrix
[[ 44  31   5]
 [ 13  56  10]
 [ 10  24 136]] 

