# Titanic : Machine Learning from Disaster

### 1. Importing in Dataframe

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("train.csv")
data.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
805,806,0,3,"Johansson, Mr. Karl Johan",male,31.0,0,0,347063,7.775,,S
82,83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q
736,737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48.0,1,3,W./C. 6608,34.375,,S


In [3]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


### 2. Transforming Features

In [4]:
def simplify_ages(df):
    df.Age = df.Age.fillna(-0.5)
    bins = (-1, 0, 5, 12, 18, 25, 35, 60, 100)
    group_names = ['Unknown', 'Baby', 'Child', 'Teenage', 'Student', 'Young Adult', 'Adult', 'Senior']
    categories = pd.cut(df.Age, bins, labels=group_names)
    df.Age = categories
    return df

In [5]:
def simplify_cabins(df):
    df.Cabin = df.Cabin.fillna('E')
    df.Cabin = df.Cabin.apply(lambda x: x[0])
    return df

In [6]:
def simplify_fares(df):
    df.Fare = df.Fare.fillna(-0.5)
    bins = (-1, 0, 8, 15, 31, 1000)
    group_names = ['Unknown', '1_quartile', '2_quartile', '3_quartile', '4_quartile']
    categories = pd.cut(df.Fare, bins, labels=group_names)
    df.Fare = categories
    return df

In [7]:
def simplify_embarked(df):
    df.Embarked = df.Embarked.fillna('S')
    return df

In [8]:
def format_name(df):
    df['Lname'] = df.Name.apply(lambda x: x.split(' ')[0])
    df['NamePrefix'] = df.Name.apply(lambda x: x.split(' ')[1])
    return df  

In [9]:
def drop_features(df):
    return df.drop(['Ticket', 'Name'], axis=1)

In [10]:
def transform_features(df):
    df = simplify_ages(df)
    df = simplify_cabins(df)
    df = simplify_fares(df)
    df = simplify_embarked(df)
    df = format_name(df)
    df = drop_features(df)
    return df

In [11]:
data_train = transform_features(data)
data_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked,Lname,NamePrefix
0,1,0,3,male,Student,1,0,1_quartile,E,S,"Braund,",Mr.
1,2,1,1,female,Adult,1,0,4_quartile,C,C,"Cumings,",Mrs.
2,3,1,3,female,Young Adult,0,0,1_quartile,E,S,"Heikkinen,",Miss.
3,4,1,1,female,Young Adult,1,0,4_quartile,C,S,"Futrelle,",Mrs.
4,5,0,3,male,Young Adult,0,0,2_quartile,E,S,"Allen,",Mr.


In [46]:
X = data_train.iloc[:, 2:12].values
y = data_train.iloc[:, 1].values

### 3. Encoding

In [47]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [48]:
column_no = [0,1,2,5,6,7,8,9]
labelencoder_X = LabelEncoder()

for no in column_no:
    X[:,no] = labelencoder_X.fit_transform(X[:,no])

In [15]:
onehotencoder = OneHotEncoder(categorical_features = [0,2,5,6,7,8,9])
X = onehotencoder.fit_transform(X).toarray()
X = X[:,1:]

### 4. Machine Learning

In [49]:
#splitting the dataset into training and testing dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### 5. Feature scaling

In [45]:
#feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)



In [50]:
X_train.shape

(712, 10)

### 6. Creating ANN

In [51]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

In [58]:
#Initaializing the ANN
classifier = Sequential()

#Adding the input layer and the first hidden layer with dropout
classifier.add(Dense(activation="relu", kernel_initializer="uniform", input_dim=10, units=6))
#classifier.add(Dropout(rate = 0.1))

#Adding the second hidden layer
classifier.add(Dense(activation="relu", kernel_initializer="uniform", units=6))
#classifier.add(Dropout(rate = 0.1))

#Adding the output layer
classifier.add(Dense(activation="sigmoid", kernel_initializer="uniform", units=1))

#Compiling the ANN
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics=["accuracy"])

In [59]:
#Fitting the ANN to training set
classifier.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7facd0ee3390>

In [60]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [61]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

In [62]:
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(activation="relu", kernel_initializer="uniform", input_dim=10, units=5))
    #classifier.add(Dropout(0.1))
    classifier.add(Dense(activation="relu", kernel_initializer="uniform", units=5))
    #classifier.add(Dropout(0.1))
    classifier.add(Dense(activation="sigmoid", kernel_initializer="uniform", units=1))
    classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics=["accuracy"])
    return classifier

In [63]:
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 100)

In [65]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1, verbose=1)


Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100


KeyboardInterrupt: 