## Neural Network (Score: 0.70574)

In [1]:
import torch
import numpy as np
import pandas as pd
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score

In [2]:
# define custom dataset class for Titanic CSV dataset
class TitanicDataset(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        self.X = data.drop('Survived', axis=1)
        self.y = data['Survived']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return [
            self.X.loc[idx].values.astype(np.float32),
            self.y[idx].astype(np.float32)
        ]
    
    def get_splits(self, n_train=0.8):
        train_size = int(0.8 * len(train_data))
        valid_size = len(train_data) - train_size
        return random_split(train_data, [train_size, valid_size])

In [3]:
# load training and validation datasets
train_data = TitanicDataset('../data/preprocessed_train.csv')
train_data, valid_data = train_data.get_splits()

In [4]:
# prepare training and validation data loaders
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)
valid_dl = DataLoader(valid_data, batch_size=1024, shuffle=False)

In [86]:
# define neural network model
class DenseLayer(nn.Module):
    def __init__(self, n_inputs, n_outputs, activation=nn.ReLU):
        super(DenseLayer, self).__init__()
        self.layer = nn.Linear(n_inputs, n_outputs)
        self.activation = activation()
        
    def forward(self, X):
        X = self.layer(X)
        X = self.activation(X)
        return X

class MLP(nn.Module):
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        self.layer = nn.Sequential(
            DenseLayer(n_inputs, 128),
            DenseLayer(128, 64),
            DenseLayer(64, 32),
            DenseLayer(32, 16),
            DenseLayer(16, 1, activation=nn.Sigmoid)
        )
        
    def forward(self, X):
        X = self.layer(X)
        return X
    
    def validate(self, dl):
        all_preds, all_targets = [], []
        for i, (inputs, targets) in enumerate(dl):
            preds = model(inputs).reshape(-1, 1).detach().numpy().round()
            targets = targets.reshape(-1, 1)
            all_preds.append(preds)
            all_targets.append(targets)
        all_preds, all_targets = np.vstack(all_preds), np.vstack(all_targets)
        acc = accuracy_score(all_targets, all_preds)
        return acc

In [87]:
# create neural network model
model = MLP(10)
model

MLP(
  (layer): Sequential(
    (0): DenseLayer(
      (layer): Linear(in_features=10, out_features=128, bias=True)
      (activation): ReLU()
    )
    (1): DenseLayer(
      (layer): Linear(in_features=128, out_features=64, bias=True)
      (activation): ReLU()
    )
    (2): DenseLayer(
      (layer): Linear(in_features=64, out_features=32, bias=True)
      (activation): ReLU()
    )
    (3): DenseLayer(
      (layer): Linear(in_features=32, out_features=16, bias=True)
      (activation): ReLU()
    )
    (4): DenseLayer(
      (layer): Linear(in_features=16, out_features=1, bias=True)
      (activation): Sigmoid()
    )
  )
)

In [88]:
# hyperparameters
lr = 1e-3
epochs = 100

# loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# train/validate model
for epoch in range(1, epochs+1):
    # train model
    for i, (inputs, targets) in enumerate(train_dl):
        optimizer.zero_grad()
        preds = model(inputs).squeeze()
        loss = criterion(preds, targets)
        loss.backward()
        optimizer.step()
        
    # validate model
    if epoch%10 == 0:
        print(str(epoch) + "/" + str(epochs)  + " Epochs: ")
        print("Loss: " + str(loss.detach().numpy()))
        acc = model.validate(valid_dl)
        print("Validation Accuracy Score: " + str(acc*100) + "%\n")

10/100 Epochs: 
Loss: 0.42981446
Validation Accuracy Score: 50.27932960893855%

20/100 Epochs: 
Loss: 0.6129082
Validation Accuracy Score: 65.36312849162012%

30/100 Epochs: 
Loss: 0.43251526
Validation Accuracy Score: 67.59776536312849%

40/100 Epochs: 
Loss: 0.6966103
Validation Accuracy Score: 68.15642458100558%

50/100 Epochs: 
Loss: 0.39492115
Validation Accuracy Score: 68.71508379888269%

60/100 Epochs: 
Loss: 0.73012197
Validation Accuracy Score: 69.27374301675978%

70/100 Epochs: 
Loss: 0.32848075
Validation Accuracy Score: 70.39106145251397%

80/100 Epochs: 
Loss: 0.8577157
Validation Accuracy Score: 72.06703910614524%

90/100 Epochs: 
Loss: 0.46775386
Validation Accuracy Score: 72.62569832402235%

100/100 Epochs: 
Loss: 0.3566735
Validation Accuracy Score: 75.41899441340783%



In [62]:
# load test dataset
test_df = pd.read_csv('../data/preprocessed_test.csv')
test_data = torch.tensor(test_df.values).float()

In [63]:
submission = pd.DataFrame()
submission['PassengerId'] = test_df['PassengerId']
submission['Survived'] = model(test_data).round().int().numpy()
submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [64]:
# Create submission file
submission.to_csv('../submissions/neural_network.csv', index=False)