# DataSet & Library Loading

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv('titanic/train.csv')

# Making the dataset ready for the model

- let's drop the unnecessary columns
- encode the categorical (no details)
- impute the necessary columns (again no details)
- scale both the train and test data for linear models
- split the data for the model

In [2]:
y = df.loc[:, 'Survived'].values
df.drop(['Name', 'Ticket', 'Cabin', 'Survived'],axis=1,inplace=True)

sex      = pd.get_dummies(df['Sex'],drop_first=True)
embark   = pd.get_dummies(df['Embarked'],drop_first=True)
df = pd.concat([df,sex,embark],axis=1)

df.drop(['Sex','Embarked'],axis=1,inplace=True)

df.fillna(df.mean(),inplace=True)

Scaler = StandardScaler()
df = pd.DataFrame(Scaler.fit_transform(df))

X = df.iloc[:,1:].values

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

# Pytorch

In [4]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable

# Pytorch Logistic Regression Model

In [5]:
#thank you very much https://www.kaggle.com/mburakergenc/ttianic-minimal-pytorch-mlp
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = Net()
print(model)

Net(
  (fc1): Linear(in_features=8, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)


# Pytorch Loss Function (Cross Entropy CE)

In [6]:
criterion = nn.CrossEntropyLoss()

# Pytorch Optimizer (Stochastic Gradient Descent SGD)

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Pytorch Training

In [8]:
#thank you very much https://www.kaggle.com/mburakergenc/ttianic-minimal-pytorch-mlp

batch_size = 128
n_epochs = 1000
batch_no = len(X_train) // batch_size

train_loss = 0
train_loss_min = np.Inf
for epoch in range(n_epochs):
    for i in range(batch_no):
        start = i * batch_size
        end   = start + batch_size
        x_var = Variable(torch.FloatTensor(X_train[start:end]))
        y_var = Variable(torch.LongTensor(y_train[start:end])) 
        
        optimizer.zero_grad()
        output = model(x_var)
        loss   = criterion(output,y_var)
        loss.backward()
        optimizer.step()
        
        values, labels = torch.max(output, 1)
        num_right   = np.sum(labels.data.numpy() == y_train[start:end])
        train_loss += loss.item()*batch_size
    
    train_loss = train_loss / len(X_train)
    if train_loss <= train_loss_min:
        print("Validation loss decreased ({:6f} ===> {:6f}). Saving the model...".format(train_loss_min,train_loss))
        torch.save(model.state_dict(), "model.pt")
        train_loss_min = train_loss
    
    if epoch % 200 == 0:
        print('')
        print("Epoch: {} \tTrain Loss: {} \tTrain Accuracy: {}".format(epoch+1, train_loss,num_right / len(y_train[start:end]) ))
print('Training Ended! ')

Validation loss decreased (   inf ===> 0.577225). Saving the model...

Epoch: 1 	Train Loss: 0.577224552231347 	Train Accuracy: 0.5703125
Validation loss decreased (0.577225 ===> 0.576002). Saving the model...
Validation loss decreased (0.576002 ===> 0.573898). Saving the model...
Validation loss decreased (0.573898 ===> 0.571868). Saving the model...
Validation loss decreased (0.571868 ===> 0.569902). Saving the model...
Validation loss decreased (0.569902 ===> 0.567989). Saving the model...
Validation loss decreased (0.567989 ===> 0.566127). Saving the model...
Validation loss decreased (0.566127 ===> 0.564313). Saving the model...
Validation loss decreased (0.564313 ===> 0.562535). Saving the model...
Validation loss decreased (0.562535 ===> 0.560792). Saving the model...
Validation loss decreased (0.560792 ===> 0.559075). Saving the model...
Validation loss decreased (0.559075 ===> 0.557385). Saving the model...
Validation loss decreased (0.557385 ===> 0.555721). Saving the model..

Validation loss decreased (0.399681 ===> 0.398995). Saving the model...
Validation loss decreased (0.398995 ===> 0.398322). Saving the model...
Validation loss decreased (0.398322 ===> 0.397665). Saving the model...
Validation loss decreased (0.397665 ===> 0.397022). Saving the model...
Validation loss decreased (0.397022 ===> 0.396391). Saving the model...
Validation loss decreased (0.396391 ===> 0.395773). Saving the model...
Validation loss decreased (0.395773 ===> 0.395167). Saving the model...
Validation loss decreased (0.395167 ===> 0.394574). Saving the model...
Validation loss decreased (0.394574 ===> 0.393995). Saving the model...
Validation loss decreased (0.393995 ===> 0.393428). Saving the model...
Validation loss decreased (0.393428 ===> 0.392874). Saving the model...
Validation loss decreased (0.392874 ===> 0.392332). Saving the model...
Validation loss decreased (0.392332 ===> 0.391800). Saving the model...
Validation loss decreased (0.391800 ===> 0.391277). Saving the m

Validation loss decreased (0.364209 ===> 0.364062). Saving the model...
Validation loss decreased (0.364062 ===> 0.363917). Saving the model...
Validation loss decreased (0.363917 ===> 0.363772). Saving the model...
Validation loss decreased (0.363772 ===> 0.363629). Saving the model...
Validation loss decreased (0.363629 ===> 0.363486). Saving the model...
Validation loss decreased (0.363486 ===> 0.363343). Saving the model...
Validation loss decreased (0.363343 ===> 0.363201). Saving the model...
Validation loss decreased (0.363201 ===> 0.363061). Saving the model...
Validation loss decreased (0.363061 ===> 0.362920). Saving the model...
Validation loss decreased (0.362920 ===> 0.362780). Saving the model...
Validation loss decreased (0.362780 ===> 0.362641). Saving the model...
Validation loss decreased (0.362641 ===> 0.362503). Saving the model...
Validation loss decreased (0.362503 ===> 0.362366). Saving the model...
Validation loss decreased (0.362366 ===> 0.362229). Saving the m

Validation loss decreased (0.349705 ===> 0.349606). Saving the model...
Validation loss decreased (0.349606 ===> 0.349507). Saving the model...
Validation loss decreased (0.349507 ===> 0.349408). Saving the model...
Validation loss decreased (0.349408 ===> 0.349309). Saving the model...
Validation loss decreased (0.349309 ===> 0.349210). Saving the model...
Validation loss decreased (0.349210 ===> 0.349112). Saving the model...
Validation loss decreased (0.349112 ===> 0.349014). Saving the model...
Validation loss decreased (0.349014 ===> 0.348916). Saving the model...
Validation loss decreased (0.348916 ===> 0.348820). Saving the model...
Validation loss decreased (0.348820 ===> 0.348722). Saving the model...
Validation loss decreased (0.348722 ===> 0.348624). Saving the model...
Validation loss decreased (0.348624 ===> 0.348526). Saving the model...
Validation loss decreased (0.348526 ===> 0.348428). Saving the model...
Validation loss decreased (0.348428 ===> 0.348330). Saving the m

Validation loss decreased (0.338562 ===> 0.338481). Saving the model...
Validation loss decreased (0.338481 ===> 0.338401). Saving the model...
Validation loss decreased (0.338401 ===> 0.338321). Saving the model...
Validation loss decreased (0.338321 ===> 0.338242). Saving the model...
Validation loss decreased (0.338242 ===> 0.338161). Saving the model...
Validation loss decreased (0.338161 ===> 0.338081). Saving the model...
Validation loss decreased (0.338081 ===> 0.338001). Saving the model...
Validation loss decreased (0.338001 ===> 0.337920). Saving the model...
Validation loss decreased (0.337920 ===> 0.337839). Saving the model...
Validation loss decreased (0.337839 ===> 0.337759). Saving the model...
Validation loss decreased (0.337759 ===> 0.337679). Saving the model...
Validation loss decreased (0.337679 ===> 0.337599). Saving the model...
Validation loss decreased (0.337599 ===> 0.337520). Saving the model...
Validation loss decreased (0.337520 ===> 0.337442). Saving the m

Validation loss decreased (0.329471 ===> 0.329406). Saving the model...
Validation loss decreased (0.329406 ===> 0.329341). Saving the model...
Validation loss decreased (0.329341 ===> 0.329277). Saving the model...
Validation loss decreased (0.329277 ===> 0.329212). Saving the model...
Validation loss decreased (0.329212 ===> 0.329148). Saving the model...
Validation loss decreased (0.329148 ===> 0.329084). Saving the model...
Validation loss decreased (0.329084 ===> 0.329020). Saving the model...
Validation loss decreased (0.329020 ===> 0.328956). Saving the model...
Validation loss decreased (0.328956 ===> 0.328895). Saving the model...
Validation loss decreased (0.328895 ===> 0.328830). Saving the model...
Validation loss decreased (0.328830 ===> 0.328769). Saving the model...
Validation loss decreased (0.328769 ===> 0.328704). Saving the model...
Validation loss decreased (0.328704 ===> 0.328643). Saving the model...
Validation loss decreased (0.328643 ===> 0.328581). Saving the m

Validation loss decreased (0.322141 ===> 0.322090). Saving the model...
Validation loss decreased (0.322090 ===> 0.322035). Saving the model...
Validation loss decreased (0.322035 ===> 0.321983). Saving the model...
Validation loss decreased (0.321983 ===> 0.321930). Saving the model...
Validation loss decreased (0.321930 ===> 0.321876). Saving the model...
Validation loss decreased (0.321876 ===> 0.321825). Saving the model...
Validation loss decreased (0.321825 ===> 0.321771). Saving the model...
Validation loss decreased (0.321771 ===> 0.321717). Saving the model...
Validation loss decreased (0.321717 ===> 0.321665). Saving the model...
Validation loss decreased (0.321665 ===> 0.321614). Saving the model...
Validation loss decreased (0.321614 ===> 0.321560). Saving the model...
Validation loss decreased (0.321560 ===> 0.321508). Saving the model...
Validation loss decreased (0.321508 ===> 0.321456). Saving the model...
Validation loss decreased (0.321456 ===> 0.321405). Saving the m

Validation loss decreased (0.316143 ===> 0.316094). Saving the model...
Validation loss decreased (0.316094 ===> 0.316046). Saving the model...
Validation loss decreased (0.316046 ===> 0.315999). Saving the model...
Validation loss decreased (0.315999 ===> 0.315950). Saving the model...
Validation loss decreased (0.315950 ===> 0.315901). Saving the model...
Validation loss decreased (0.315901 ===> 0.315852). Saving the model...
Validation loss decreased (0.315852 ===> 0.315804). Saving the model...
Validation loss decreased (0.315804 ===> 0.315756). Saving the model...
Validation loss decreased (0.315756 ===> 0.315707). Saving the model...
Validation loss decreased (0.315707 ===> 0.315660). Saving the model...
Validation loss decreased (0.315660 ===> 0.315615). Saving the model...
Validation loss decreased (0.315615 ===> 0.315563). Saving the model...
Validation loss decreased (0.315563 ===> 0.315518). Saving the model...
Validation loss decreased (0.315518 ===> 0.315471). Saving the m

Validation loss decreased (0.311027 ===> 0.310987). Saving the model...
Validation loss decreased (0.310987 ===> 0.310946). Saving the model...
Validation loss decreased (0.310946 ===> 0.310909). Saving the model...
Validation loss decreased (0.310909 ===> 0.310870). Saving the model...
Validation loss decreased (0.310870 ===> 0.310830). Saving the model...
Validation loss decreased (0.310830 ===> 0.310794). Saving the model...
Validation loss decreased (0.310794 ===> 0.310752). Saving the model...
Validation loss decreased (0.310752 ===> 0.310712). Saving the model...
Validation loss decreased (0.310712 ===> 0.310677). Saving the model...
Validation loss decreased (0.310677 ===> 0.310637). Saving the model...
Validation loss decreased (0.310637 ===> 0.310598). Saving the model...
Validation loss decreased (0.310598 ===> 0.310561). Saving the model...
Validation loss decreased (0.310561 ===> 0.310522). Saving the model...
Validation loss decreased (0.310522 ===> 0.310482). Saving the m

# predictions

In [9]:
X_test_var = Variable(torch.FloatTensor(X_test), requires_grad=False) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.data.numpy()

In [10]:
accuracy_score(y_test, survived)

0.823728813559322