In [14]:
import pandas as pd

In [15]:
titanic_data = pd.read_csv('datasets/train.csv')
titanic_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [16]:
unwanted_features = ['PassengerId','Name', 'Ticket', 'Cabin', 'SibSp', 'Parch', 'Embarked']

In [18]:
titanic_data = titanic_data.drop(unwanted_features, axis=1)
titanic_data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,male,22.0,7.25
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.925
3,1,1,female,35.0,53.1
4,0,3,male,35.0,8.05


In [19]:
titanic_data = titanic_data.dropna()

In [20]:
from sklearn import preprocessing

In [21]:
le = preprocessing.LabelEncoder()

In [22]:
titanic_data['Sex'] = le.fit_transform(titanic_data['Sex'])
titanic_data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,1,22.0,7.25
1,1,1,0,38.0,71.2833
2,1,3,0,26.0,7.925
3,1,1,0,35.0,53.1
4,0,3,1,35.0,8.05


In [23]:
features = ['Pclass','Sex', 'Age', 'Fare']

In [24]:
titanic_features = titanic_data[features]

In [25]:
titanic_features.head()

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,1,22.0,7.25
1,1,0,38.0,71.2833
2,3,0,26.0,7.925
3,1,0,35.0,53.1
4,3,1,35.0,8.05


In [26]:
titanic_features = pd.get_dummies(titanic_features, columns = ['Pclass'])
titanic_features.head()

Unnamed: 0,Sex,Age,Fare,Pclass_1,Pclass_2,Pclass_3
0,1,22.0,7.25,0,0,1
1,0,38.0,71.2833,1,0,0
2,0,26.0,7.925,0,0,1
3,0,35.0,53.1,1,0,0
4,1,35.0,8.05,0,0,1


In [27]:
titanic_target = titanic_data[['Survived']]

In [28]:
titanic_target.head()

Unnamed: 0,Survived
0,0
1,1
2,1
3,1
4,0


In [29]:
from sklearn.model_selection import train_test_split

In [30]:
X_train, x_test, Y_train, y_test = train_test_split(titanic_features,
                                                    titanic_target,
                                                    test_size = 0.2,
                                                    random_state = 0
                                                   )

In [31]:
 X_train.shape, Y_train.shape

((571, 6), (571, 1))

In [33]:
import torch
import numpy as np

In [65]:
Xtrain_ = torch.from_numpy(X_train.values).float()
Xtest_ = torch.from_numpy(x_test.values).float()

In [75]:
Xtrain_.shape

torch.Size([571, 6])

In [76]:
Ytrain_ = torch.from_numpy(Y_train.values).view(1,-1)[0]
Ytest_ = torch.from_numpy(y_test.values).view(1,-1)[0]

In [77]:
Ytrain_.shape

torch.Size([571])

In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [79]:
input_size = 6
output_size = 2
hidden_size = 10

In [80]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = self.fc3(x)
        
        return F.log_softmax(x, dim = -1)

In [81]:
model = Net()

In [82]:
##Manually update the model weight during back propagation using Adam Optimizer

import torch.optim as optim
optimizer = optim.Adam(model.parameters())
loss_fn = nn.NLLLoss()

In [83]:
epoch_data = []
epochs = 1001

In [85]:
for epoch in range(1, epochs):

    optimizer.zero_grad()
    Ypred = model(Xtrain_)

    loss = loss_fn(Ypred , Ytrain_)
    loss.backward()

    optimizer.step()
        
    Ypred_test = model(Xtest_)
    loss_test = loss_fn(Ypred_test, Ytest_)
    
    _,pred = Ypred_test.data.max(1)
    
    accuracy = pred.eq(Ytest_.data).sum().item() / y_test.values.size
    epoch_data.append([epoch, loss.data.item(), loss_test.data.item(), accuracy])
    
    if epoch % 100 == 0:
        print ('epoch - %d (%d%%) train loss - %.2f test loss - %.2f accuracy - %.4f'\
               % (epoch, epoch/150 * 10 , loss.data.item(), loss_test.data.item(), accuracy))



epoch - 100 (6%) train loss - 0.61 test loss - 0.61 accuracy - 0.6783
epoch - 200 (13%) train loss - 0.59 test loss - 0.59 accuracy - 0.6853
epoch - 300 (20%) train loss - 0.56 test loss - 0.57 accuracy - 0.6853
epoch - 400 (26%) train loss - 0.51 test loss - 0.49 accuracy - 0.7832
epoch - 500 (33%) train loss - 0.46 test loss - 0.43 accuracy - 0.8392
epoch - 600 (40%) train loss - 0.44 test loss - 0.41 accuracy - 0.8462
epoch - 700 (46%) train loss - 0.43 test loss - 0.41 accuracy - 0.8182
epoch - 800 (53%) train loss - 0.42 test loss - 0.41 accuracy - 0.8322
epoch - 900 (60%) train loss - 0.40 test loss - 0.40 accuracy - 0.8322
epoch - 1000 (66%) train loss - 0.39 test loss - 0.40 accuracy - 0.8462
