# Intro 

Titanic Classifier made with PyTorch.

Testing different NN architecture, learing rates, optimizers ...

# Imports

In [1]:
import pandas as pd
import numpy as np
import re

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Load and process traing data

In [1]:
train_data = pd.read_csv('/kaggle/input/titanic/train.csv')

In [1]:
train_data.head()

In [1]:
# Filling age nulls with means based on their Pclass
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):
        if Pclass == 1:
            return 37
        elif Pclass == 2:
            return 29
        else:
            return 24
    else:
        return Age
train_data['Age'] = train_data[['Age','Pclass']].apply(impute_age,axis=1)

In [1]:
train_data.drop('Cabin',axis=1,inplace=True)

In [1]:
train_data['Name']

In [1]:
# Extracting title from name col
def title(name):
    pattern = r"([A-Za-z]+)\."
    return re.search(pattern,name)[0]

train_data['Title'] = train_data['Name'].apply(title)
train_data['Title'].value_counts()

In [1]:
# Marking unique titles
def unique_title(title):
    ut = ['Mr.','Miss.','Mrs.']
    return int(title not in ut)
train_data['unique_title'] = train_data['Title'].apply(unique_title)
train_data['unique_title']

In [1]:
sex = pd.get_dummies(train_data['Sex'],drop_first=True)
embark = pd.get_dummies(train_data['Embarked'],drop_first=True)
train_data.drop(['Sex','Embarked','Name','Title','Ticket','PassengerId'],axis=1,inplace=True)
train_data = pd.concat([train_data,sex,embark],axis=1)

In [1]:
train_data.head()

In [1]:
y = train_data['Survived'].values
X = train_data.drop('Survived',axis=1)

In [1]:
X.head()

In [1]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

# NNet and learning process

In [1]:
LEARNING_RATE = 0.0001
EPOCHS = 200

In [1]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc0 = nn.Linear(9, 10)
        self.fc1 = nn.Linear(10, 15)     
        self.fc2 = nn.Linear(15, 15)  
        self.fc3 = nn.Linear(15, 10)  
        self.fc4 = nn.Linear(10, 1)
        
        
    def forward(self,x):
        x = self.fc0(x)        
        x = self.fc1(x)
        x = self.fc2(x) 
        x = torch.sigmoid(x)
        x = self.fc3(x)
        x = self.fc4(x) 
        x = torch.sigmoid(x)
        
        return x
    
net = Net().double()

In [1]:
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)#, momentum=0.9)

In [1]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [1]:
epoch_loss = []
for epoch in range(EPOCHS):  
    
    running_loss = 0.0
    
    for i in range(x_train.shape[0]): 
        labels = torch.Tensor([y_train[i]]).double()
        inputs = torch.from_numpy(x_train[i]).double()
        optimizer.zero_grad()
        
        outputs = net(inputs)
        
        
        loss = criterion(outputs, labels)        
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    # print every epoch
    epoch_loss.append((running_loss / (i+1)))
    print('Epochs: [%d/%d]: loss: %.5f' % (epoch+1,EPOCHS,running_loss / (i+1)))   
    

print('Finished Training')

# Check net performance

In [1]:
predictions = net(torch.from_numpy(x_test).double())
predictions = (predictions>=0.5).numpy().astype('float')

In [1]:
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

# Load and preprocess test data

In [1]:
def preprocess(data):
    data['Age'] = data[['Age','Pclass']].apply(impute_age,axis=1)
    data.drop('Cabin',axis=1,inplace=True)
    data['Title'] = data['Name'].apply(title)    
    data['unique_title'] = data['Title'].apply(unique_title)
    sex = pd.get_dummies(data['Sex'],drop_first=True)
    embark = pd.get_dummies(data['Embarked'],drop_first=True)
    data.drop(['Sex','Embarked','Name','Title','Ticket','PassengerId'],axis=1,inplace=True)
    data = pd.concat([data,sex,embark],axis=1)
    return data

In [1]:
test_data = pd.read_csv('/kaggle/input/titanic/test.csv')
test_data = preprocess(test_data)
test_data = scaler.transform(test_data)

# Make predictions

In [1]:
predictions = net(torch.from_numpy(test_data).double())
predictions = (predictions>=0.5).numpy().astype('int')

# Save output

In [1]:
sub = pd.DataFrame(predictions)
subs_d = pd.read_csv('../input/titanic/gender_submission.csv')
subs_d['Survived'] = sub
subs_d.to_csv('Submissions.csv', index = False)

# Check if all predictions aren't '0'

In [1]:
predictions.sum()/predictions.shape[0]