# DataSet & Library Loading

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv('titanic/train.csv')

# Making the dataset ready for the model

- let's drop the unnecessary columns
- encode the categorical (no details)
- impute the necessary columns (again no details)
- scale both the train and test data for linear models
- split the data for the model

In [2]:
y = df.loc[:, 'Survived'].values
df.drop(['Name', 'Ticket', 'Cabin', 'Survived'],axis=1,inplace=True)

sex      = pd.get_dummies(df['Sex'],drop_first=True)
embark   = pd.get_dummies(df['Embarked'],drop_first=True)
df = pd.concat([df,sex,embark],axis=1)

df.drop(['Sex','Embarked'],axis=1,inplace=True)

df.fillna(df.mean(),inplace=True)

Scaler = StandardScaler()
df = pd.DataFrame(Scaler.fit_transform(df))

X = df.iloc[:,1:].values

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

# Pytorch

In [4]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable

# Pytorch Logistic Regression Model

In [5]:
#thank you very much https://www.kaggle.com/mburakergenc/ttianic-minimal-pytorch-mlp
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(8, 2)
        
    def forward(self, x):
        x = self.fc(x)
        return x

model = Net()
print(model)

Net(
  (fc): Linear(in_features=8, out_features=2, bias=True)
)


# Pytorch Loss Function (Cross Entropy CE)

In [6]:
criterion = nn.CrossEntropyLoss()

# Pytorch Optimizer (Stochastic Gradient Descent SGD)

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Pytorch Training

In [8]:
#thank you very much https://www.kaggle.com/mburakergenc/ttianic-minimal-pytorch-mlp

batch_size = 128
n_epochs = 1000
batch_no = len(X_train) // batch_size

train_loss = 0
train_loss_min = np.Inf
for epoch in range(n_epochs):
    for i in range(batch_no):
        start = i * batch_size
        end   = start + batch_size
        x_var = Variable(torch.FloatTensor(X_train[start:end]))
        y_var = Variable(torch.LongTensor(y_train[start:end])) 
        
        optimizer.zero_grad()
        output = model(x_var)
        loss   = criterion(output,y_var)
        loss.backward()
        optimizer.step()
        
        values, labels = torch.max(output, 1)
        num_right   = np.sum(labels.data.numpy() == y_train[start:end])
        train_loss += loss.item()*batch_size
    
    train_loss = train_loss / len(X_train)
    if train_loss <= train_loss_min:
        print("Validation loss decreased ({:6f} ===> {:6f}). Saving the model...".format(train_loss_min,train_loss))
        torch.save(model.state_dict(), "model.pt")
        train_loss_min = train_loss
    
    if epoch % 200 == 0:
        print('')
        print("Epoch: {} \tTrain Loss: {} \tTrain Accuracy: {}".format(epoch+1, train_loss,num_right / len(y_train[start:end]) ))
print('Training Ended! ')

Validation loss decreased (   inf ===> 0.592334). Saving the model...

Epoch: 1 	Train Loss: 0.5923335696226798 	Train Accuracy: 0.5625
Validation loss decreased (0.592334 ===> 0.583612). Saving the model...
Validation loss decreased (0.583612 ===> 0.574380). Saving the model...
Validation loss decreased (0.574380 ===> 0.565619). Saving the model...
Validation loss decreased (0.565619 ===> 0.557308). Saving the model...
Validation loss decreased (0.557308 ===> 0.549424). Saving the model...
Validation loss decreased (0.549424 ===> 0.541944). Saving the model...
Validation loss decreased (0.541944 ===> 0.534848). Saving the model...
Validation loss decreased (0.534848 ===> 0.528114). Saving the model...
Validation loss decreased (0.528114 ===> 0.521724). Saving the model...
Validation loss decreased (0.521724 ===> 0.515658). Saving the model...
Validation loss decreased (0.515658 ===> 0.509900). Saving the model...
Validation loss decreased (0.509900 ===> 0.504431). Saving the model...


Validation loss decreased (0.382895 ===> 0.382754). Saving the model...
Validation loss decreased (0.382754 ===> 0.382615). Saving the model...
Validation loss decreased (0.382615 ===> 0.382480). Saving the model...
Validation loss decreased (0.382480 ===> 0.382347). Saving the model...
Validation loss decreased (0.382347 ===> 0.382217). Saving the model...
Validation loss decreased (0.382217 ===> 0.382090). Saving the model...
Validation loss decreased (0.382090 ===> 0.381966). Saving the model...
Validation loss decreased (0.381966 ===> 0.381844). Saving the model...
Validation loss decreased (0.381844 ===> 0.381725). Saving the model...
Validation loss decreased (0.381725 ===> 0.381608). Saving the model...
Validation loss decreased (0.381608 ===> 0.381493). Saving the model...
Validation loss decreased (0.381493 ===> 0.381381). Saving the model...
Validation loss decreased (0.381381 ===> 0.381271). Saving the model...
Validation loss decreased (0.381271 ===> 0.381164). Saving the m

Validation loss decreased (0.376099 ===> 0.376081). Saving the model...
Validation loss decreased (0.376081 ===> 0.376064). Saving the model...
Validation loss decreased (0.376064 ===> 0.376047). Saving the model...
Validation loss decreased (0.376047 ===> 0.376030). Saving the model...
Validation loss decreased (0.376030 ===> 0.376014). Saving the model...
Validation loss decreased (0.376014 ===> 0.375997). Saving the model...
Validation loss decreased (0.375997 ===> 0.375981). Saving the model...
Validation loss decreased (0.375981 ===> 0.375966). Saving the model...
Validation loss decreased (0.375966 ===> 0.375950). Saving the model...
Validation loss decreased (0.375950 ===> 0.375934). Saving the model...
Validation loss decreased (0.375934 ===> 0.375919). Saving the model...
Validation loss decreased (0.375919 ===> 0.375904). Saving the model...
Validation loss decreased (0.375904 ===> 0.375890). Saving the model...
Validation loss decreased (0.375890 ===> 0.375875). Saving the m

Validation loss decreased (0.375045 ===> 0.375041). Saving the model...
Validation loss decreased (0.375041 ===> 0.375038). Saving the model...
Validation loss decreased (0.375038 ===> 0.375034). Saving the model...
Validation loss decreased (0.375034 ===> 0.375030). Saving the model...
Validation loss decreased (0.375030 ===> 0.375027). Saving the model...
Validation loss decreased (0.375027 ===> 0.375023). Saving the model...
Validation loss decreased (0.375023 ===> 0.375020). Saving the model...
Validation loss decreased (0.375020 ===> 0.375016). Saving the model...
Validation loss decreased (0.375016 ===> 0.375013). Saving the model...
Validation loss decreased (0.375013 ===> 0.375009). Saving the model...
Validation loss decreased (0.375009 ===> 0.375006). Saving the model...
Validation loss decreased (0.375006 ===> 0.375003). Saving the model...
Validation loss decreased (0.375003 ===> 0.374999). Saving the model...
Validation loss decreased (0.374999 ===> 0.374996). Saving the m

Validation loss decreased (0.374795 ===> 0.374794). Saving the model...
Validation loss decreased (0.374794 ===> 0.374793). Saving the model...
Validation loss decreased (0.374793 ===> 0.374792). Saving the model...
Validation loss decreased (0.374792 ===> 0.374791). Saving the model...
Validation loss decreased (0.374791 ===> 0.374790). Saving the model...
Validation loss decreased (0.374790 ===> 0.374789). Saving the model...
Validation loss decreased (0.374789 ===> 0.374788). Saving the model...
Validation loss decreased (0.374788 ===> 0.374787). Saving the model...
Validation loss decreased (0.374787 ===> 0.374786). Saving the model...
Validation loss decreased (0.374786 ===> 0.374785). Saving the model...
Validation loss decreased (0.374785 ===> 0.374784). Saving the model...
Validation loss decreased (0.374784 ===> 0.374784). Saving the model...
Validation loss decreased (0.374784 ===> 0.374783). Saving the model...
Validation loss decreased (0.374783 ===> 0.374782). Saving the m

Validation loss decreased (0.374726 ===> 0.374726). Saving the model...
Validation loss decreased (0.374726 ===> 0.374726). Saving the model...
Validation loss decreased (0.374726 ===> 0.374725). Saving the model...
Validation loss decreased (0.374725 ===> 0.374725). Saving the model...
Validation loss decreased (0.374725 ===> 0.374725). Saving the model...
Validation loss decreased (0.374725 ===> 0.374724). Saving the model...
Validation loss decreased (0.374724 ===> 0.374724). Saving the model...
Validation loss decreased (0.374724 ===> 0.374724). Saving the model...
Validation loss decreased (0.374724 ===> 0.374724). Saving the model...
Validation loss decreased (0.374724 ===> 0.374723). Saving the model...
Validation loss decreased (0.374723 ===> 0.374723). Saving the model...
Validation loss decreased (0.374723 ===> 0.374723). Saving the model...
Validation loss decreased (0.374723 ===> 0.374723). Saving the model...
Validation loss decreased (0.374723 ===> 0.374722). Saving the m

Validation loss decreased (0.374707 ===> 0.374707). Saving the model...
Validation loss decreased (0.374707 ===> 0.374707). Saving the model...
Validation loss decreased (0.374707 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the model...
Validation loss decreased (0.374706 ===> 0.374706). Saving the m

Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the model...
Validation loss decreased (0.374700 ===> 0.374700). Saving the m

# predictions

In [9]:
X_test_var = Variable(torch.FloatTensor(X_test), requires_grad=False) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.data.numpy()

In [10]:
accuracy_score(y_test, survived)

0.7932203389830509