# Building an ANN to tackle the Titanic Kaggle Dataset

#### Importing necessary libraries

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd

Reading data into a pandas dataframe

In [2]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, [2, 4, 5, 6, 7, 9, 11]].values
y = dataset.iloc[:, 1].values

In [3]:
dataset = pd.read_csv('test.csv')
X_test = dataset.iloc[:, [1, 3, 4, 5, 6, 8, 10]].values
sub_ids = dataset.iloc[:, 0].values

### Taking Care of Missing Data

Age Column

In [4]:
from sklearn.impute import SimpleImputer
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
imp_mean.fit(X[:, 2].reshape(-1, 1))
X[:, 2] = imp_mean.transform(X[:, 2].reshape(-1, 1)).flatten()
X_test[:, 2] = imp_mean.transform(X_test[:, 2].reshape(-1, 1)).flatten()

## Encoding Categorical Data

Gender Column

In [5]:
from sklearn import preprocessing
leGender = preprocessing.LabelEncoder()
X[:, 1] = leGender.fit_transform(X[:, 1])
X_test[:, 1] = leGender.transform(X_test[:, 1])

Embarked Column

In [6]:
from sklearn import preprocessing
leEmbarked = preprocessing.LabelEncoder()
X[:, -1] = leEmbarked.fit_transform(X[:, -1])
X_test[:, -1] = leEmbarked.transform(X_test[:, -1])

In [7]:
print(X.shape)

(891, 7)


In [8]:
print(X_test)

[[3 1 34.5 ... 0 7.8292 1]
 [3 0 47.0 ... 0 7.0 2]
 [2 1 62.0 ... 0 9.6875 1]
 ...
 [3 1 38.5 ... 0 7.25 2]
 [3 1 29.69911764705882 ... 0 8.05 2]
 [3 1 29.69911764705882 ... 1 22.3583 0]]


## Feature Scaling

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
X_test = sc.transform(X_test)

In [10]:
print(X)

[[ 0.82737724  0.73769513 -0.5924806  ... -0.47367361 -0.50244517
   0.58111394]
 [-1.56610693 -1.35557354  0.63878901 ... -0.47367361  0.78684529
  -1.93846038]
 [ 0.82737724 -1.35557354 -0.2846632  ... -0.47367361 -0.48885426
   0.58111394]
 ...
 [ 0.82737724 -1.35557354  0.         ...  2.00893337 -0.17626324
   0.58111394]
 [-1.56610693  0.73769513 -0.2846632  ... -0.47367361 -0.04438104
  -1.93846038]
 [ 0.82737724  0.73769513  0.17706291 ... -0.47367361 -0.49237783
  -0.67867322]]


In [11]:
print(X_test)

[[ 0.82737724  0.73769513  0.36944878 ... -0.47367361 -0.49078316
  -0.67867322]
 [ 0.82737724 -1.35557354  1.33137817 ... -0.47367361 -0.50747884
   0.58111394]
 [-0.36936484  0.73769513  2.48569343 ... -0.47367361 -0.45336687
  -0.67867322]
 ...
 [ 0.82737724  0.73769513  0.67726619 ... -0.47367361 -0.50244517
   0.58111394]
 [ 0.82737724  0.73769513  0.         ... -0.47367361 -0.48633742
   0.58111394]
 [ 0.82737724  0.73769513  0.         ...  0.76762988 -0.19824428
  -1.93846038]]


## Building and Training the ANN

Importing necessary attributes

In [12]:
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.models import Model

Building the model

In [13]:
i = Input(shape=(7,))
x = Dense(128, activation = 'relu', kernel_initializer='uniform', kernel_regularizer=tf.keras.regularizers.L2(0.01),
    bias_regularizer=tf.keras.regularizers.L2(0.01))(i)
x = Dense(64, activation = 'relu', kernel_initializer='uniform', kernel_regularizer=tf.keras.regularizers.L2(0.01),
    bias_regularizer=tf.keras.regularizers.L2(0.01)) (x)
x = Dense(1, activation = 'sigmoid', kernel_initializer='uniform')(x)

model = Model(i, x)

In [14]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

Training the ANN

In [15]:
model.fit(X, y, batch_size = 42, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f15c20b3280>

## Compiling Predictions

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score
X_test = np.nan_to_num(X_test)
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.75)
submission = np.concatenate([sub_ids.reshape(-1, 1), y_pred.reshape(-1, 1)], axis=1)
print(submission)

[[ 892    0]
 [ 893    0]
 [ 894    0]
 [ 895    0]
 [ 896    0]
 [ 897    0]
 [ 898    0]
 [ 899    0]
 [ 900    1]
 [ 901    0]
 [ 902    0]
 [ 903    0]
 [ 904    1]
 [ 905    0]
 [ 906    1]
 [ 907    1]
 [ 908    0]
 [ 909    0]
 [ 910    0]
 [ 911    0]
 [ 912    0]
 [ 913    0]
 [ 914    1]
 [ 915    0]
 [ 916    1]
 [ 917    0]
 [ 918    1]
 [ 919    0]
 [ 920    0]
 [ 921    0]
 [ 922    0]
 [ 923    0]
 [ 924    0]
 [ 925    0]
 [ 926    0]
 [ 927    0]
 [ 928    0]
 [ 929    0]
 [ 930    0]
 [ 931    0]
 [ 932    0]
 [ 933    0]
 [ 934    0]
 [ 935    1]
 [ 936    1]
 [ 937    0]
 [ 938    0]
 [ 939    0]
 [ 940    1]
 [ 941    0]
 [ 942    0]
 [ 943    0]
 [ 944    1]
 [ 945    1]
 [ 946    0]
 [ 947    0]
 [ 948    0]
 [ 949    0]
 [ 950    0]
 [ 951    1]
 [ 952    0]
 [ 953    0]
 [ 954    0]
 [ 955    0]
 [ 956    0]
 [ 957    1]
 [ 958    0]
 [ 959    0]
 [ 960    0]
 [ 961    1]
 [ 962    0]
 [ 963    0]
 [ 964    0]
 [ 965    0]
 [ 966    1]
 [ 967    0]
 [ 968    0]

In [17]:
df = pd.DataFrame(submission, columns=['PassengerId', 'Survived'])
df.to_csv('submission.csv', index=False)

To Download the submissions file

In [18]:
# from google.colab import files

# files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>