# Cambridge M/L Commando Course
## Titanic: Neural Networks (with Keras)

We revisit our well-known Titanic data again, this time using Keras with Tensorflow to build Neural Network classifiers, for survival prediction (binary classification) and to estimate the class of a passenger (multiclass classification).

In [None]:
from math import exp
from random import random, shuffle, choice, randint
import pandas as pd
import numpy as np

from keras import Input
from numpy import array, mean
from pandas import concat
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense,Reshape

import matplotlib.pyplot as plt

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
df = pd.read_csv("./data/titanic.csv")
print(df[0:10])

df = df[["pclass","age","sex","survived","embarked"]]

mean_age = np.mean(df["age"])
df["age"] = df["age"].fillna(mean_age)

mf_enc = LabelEncoder()
df["sex"] = mf_enc.fit_transform(df["sex"])

class_enc = LabelEncoder()
df["pclass"] = class_enc.fit_transform(df["pclass"])
print("\nCLEANED UP DATA----\n",df[0:10])

embark_enc = LabelEncoder()
df["embarked"] = embark_enc.fit_transform(df["embarked"])
print("\nCLEANED UP DATA----\n",df[0:10])


titanic_X = df[["pclass","age","sex","embarked"]]
titanic_y = df["survived"]

print(titanic_X.head())
X_train, X_test, y_train, y_test = train_test_split(titanic_X, titanic_y, test_size=0.25, random_state=42)


In [None]:
from sklearn import metrics
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confusion_matrix=True):
    y_pred=clf.predict(X)
    
    print("First ten outputs")
    print(y_pred[0:10])
    
    if len(y_pred.shape)>1 and y_pred.shape[1]>1:
        y_pred = np.argmax(y_pred, axis=1)
    else:
        y_pred = np.round(y_pred)
    
    if show_accuracy:
        print( "Accuracy:{0:.3f}".format(metrics.accuracy_score(y,y_pred)),"\n")

    if show_classification_report:
        print( "Classification report")
        print( metrics.classification_report(y,y_pred),"\n")
        
    if show_confusion_matrix:
        print( "Confusion matrix")
        print( metrics.confusion_matrix(y,y_pred),"\n")

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import Adam

survival_model = Sequential([
    Dense(5, input_shape=(4,)),
    Activation('relu'),
    Dense(5),
    Activation('relu'),
    Dense(1),
    Activation('sigmoid'),
])

survival_model.summary()
survival_model = model

survival_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc", "mse"])
survival_model.fit(X_train, y_train, epochs=100)

In [None]:
print("Scores are binary_crossentropy (i.e. loss), accuracy and MSE")
score = survival_model.evaluate(X_train, y_train)
print("train score: {}".format(score))
score = survival_model.evaluate(X_test, y_test)
print("test score: {}".format(score))

In [None]:
measure_performance(X_train, y_train, survival_model, show_accuracy=True)
measure_performance(X_test, y_test, survival_model, show_accuracy=True)

In [None]:
from sklearn.neural_network import MLPClassifier
mlp_clf = MLPClassifier(hidden_layer_sizes=(5,5,), max_iter=1000, solver="adam")
mlp_clf.fit(X_train, y_train)

print("Scikit Learn's MLPClassifier:")
print("Scores are binary_crossentropy (i.e. loss), accuracy and MSE")
score = survival_model.evaluate(X_train, y_train)
print("train score: {}".format(score))
score = survival_model.evaluate(X_test, y_test)
print("test score: {}".format(score))

measure_performance(X_train, y_train, mlp_clf, show_accuracy=True)
measure_performance(X_test, y_test, mlp_clf, show_accuracy=True)

## Multiclass classification
Next we take a look at a classification problem where the result is always exactly one of N classes.  This is called a multi-class problem.
(Do not confuse with a _multi-label_ problem, where we can have any number of labels (or none!) positive for a given datum.)

In our case we'll try to use the passenger's age, gender and embarkation point to determine the likely class of their ticket.

In [None]:
class_model = Sequential([
    Dense(5, input_shape=(4,)),
    Activation('relu'),
    Dense(3),
    Activation('softmax'),
])

titanic_class_target = df[["pclass"]]
titanic_classless_data = df.drop(["pclass"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(titanic_classless_data, titanic_class_target, test_size=0.25, random_state=42)

print(X_train.head())

class_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["acc"])
history = class_model.fit(X_train, y_train, epochs=100)

plt.plot(history.history['loss'], label="loss")
plt.plot(history.history['acc'], label="acc")
plt.legend()
plt.show()

In [None]:
score = class_model.evaluate(X_train, y_train)
print("train score: {}".format(score))
score = class_model.evaluate(X_test, y_test)
print("test score: {}".format(score))

measure_performance(X_test, y_test, class_model, show_accuracy=True)

In [None]:
y_pred=class_model.predict(X_test)
print("Raw softmax (rows should sum to 1):\n",y_pred[0:10])
print("Row sums:", np.sum(y_pred[0:10], axis=1))

y_args = np.argmax(y_pred, axis=1)
print("\nIndices of maxima:\n", y_args[0:10])