## Neural Network (Score: 0.70574)

In [1]:
import os
import torch
import numpy as np
import pandas as pd
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score

DATA_PATH = '../data/'
MODEL_PARAMS_PATH = 'params/'
SUBMISSIONS_PATH = '../submissions/'

In [2]:
# define custom dataset class for Titanic CSV dataset
class TitanicDataset(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        self.X = data.drop('Survived', axis=1)
        self.y = data['Survived']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return [
            self.X.loc[idx].values.astype(np.float32),
            self.y[idx].astype(np.float32)
        ]
    
    def get_splits(self, n_train=0.8):
        train_size = int(0.8 * len(train_data))
        valid_size = len(train_data) - train_size
        return random_split(train_data, [train_size, valid_size])

In [3]:
# load training and validation datasets
train_data = TitanicDataset(os.path.join(DATA_PATH + 'preprocessed_train.csv'))
train_data, valid_data = train_data.get_splits()

In [4]:
# prepare training and validation data loaders
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)
valid_dl = DataLoader(valid_data, batch_size=1024, shuffle=False)

In [5]:
# define neural network model
class DenseLayer(nn.Module):
    def __init__(self, n_inputs, n_outputs, activation=nn.ReLU):
        super(DenseLayer, self).__init__()
        self.layer = nn.Linear(n_inputs, n_outputs)
        self.activation = activation()
        
    def forward(self, X):
        X = self.layer(X)
        X = self.activation(X)
        return X

class MLP(nn.Module):
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        self.layer = nn.Sequential(
            DenseLayer(n_inputs, 128),
            DenseLayer(128, 64),
            DenseLayer(64, 32),
            DenseLayer(32, 16),
            DenseLayer(16, 1, activation=nn.Sigmoid)
        )
        
    def forward(self, X):
        X = self.layer(X)
        return X
    
    def validate(self, dl):
        all_preds, all_targets = [], []
        for i, (inputs, targets) in enumerate(dl):
            preds = model(inputs).reshape(-1, 1).detach().numpy().round()
            targets = targets.reshape(-1, 1)
            all_preds.append(preds)
            all_targets.append(targets)
        all_preds, all_targets = np.vstack(all_preds), np.vstack(all_targets)
        acc = accuracy_score(all_targets, all_preds)
        return acc

In [6]:
# create model
model = MLP(10)

# hyperparameters
lr = 1e-3
epochs = 100

# loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# train/validate model
best_model = model
lowest_loss = 10000
highest_valid_acc = 0

for epoch in range(1, epochs+1):
    # train model
    for i, (inputs, targets) in enumerate(train_dl):
        optimizer.zero_grad()
        preds = model(inputs).squeeze()
        loss = criterion(preds, targets)
        loss.backward()
        optimizer.step()
        
    # validate model
    if epoch%10 == 0:
        print(str(epoch) + "/" + str(epochs)  + " Epochs: ")
        print("Loss: " + str(loss.detach().numpy()))
        train_acc = model.validate(train_dl)
        valid_acc = model.validate(valid_dl)
        print("Training Accuracy Score: " + str(train_acc*100) + "%")
        print("Validation Accuracy Score: " + str(valid_acc*100) + "%\n")
        
        if loss < lowest_loss and valid_acc > highest_valid_acc:
            torch.save(model.state_dict(), os.path.join(MODEL_PARAMS_PATH, 'neural_network.pt'))

10/100 Epochs: 
Loss: 0.4058285
Training Accuracy Score: 66.29213483146067%
Validation Accuracy Score: 61.452513966480446%

20/100 Epochs: 
Loss: 0.69341767
Training Accuracy Score: 60.39325842696629%
Validation Accuracy Score: 60.33519553072626%

30/100 Epochs: 
Loss: 0.29570058
Training Accuracy Score: 71.20786516853933%
Validation Accuracy Score: 64.80446927374301%

40/100 Epochs: 
Loss: 0.61159146
Training Accuracy Score: 74.15730337078652%
Validation Accuracy Score: 67.0391061452514%

50/100 Epochs: 
Loss: 0.5422526
Training Accuracy Score: 74.15730337078652%
Validation Accuracy Score: 64.80446927374301%

60/100 Epochs: 
Loss: 0.34001803
Training Accuracy Score: 75.28089887640449%
Validation Accuracy Score: 68.71508379888269%

70/100 Epochs: 
Loss: 0.7440057
Training Accuracy Score: 78.79213483146067%
Validation Accuracy Score: 70.94972067039106%

80/100 Epochs: 
Loss: 0.22028174
Training Accuracy Score: 76.96629213483146%
Validation Accuracy Score: 67.59776536312849%

90/100 Epoc

In [7]:
# load test dataset
test_df = pd.read_csv(os.path.join(DATA_PATH, 'preprocessed_test.csv'))
test_data = torch.tensor(test_df.values).float()

In [8]:
# load best model and 
model.load_state_dict(torch.load(os.path.join(MODEL_PARAMS_PATH, 'neural_network.pt')))
model.eval()

# predict using test dataset
submission = pd.DataFrame()
submission['PassengerId'] = test_df['PassengerId']
submission['Survived'] = model(test_data).round().int().numpy()
submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [9]:
# create submission file
submission.to_csv(os.path.join(SUBMISSIONS_PATH, 'neural_network.csv'), index=False)