In [1]:
import torch
import numpy as np
import pandas as pd
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score

In [2]:
# define custom dataset class for Titanic CSV dataset
class TitanicDataset(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        self.X = data.drop('Survived', axis=1)
        self.y = data['Survived']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return [
            self.X.loc[idx].values.astype(np.float32),
            self.y[idx].astype(np.float32)
        ]
    
    def get_splits(self, n_train=0.8):
        train_size = int(0.8 * len(train_data))
        valid_size = len(train_data) - train_size
        return random_split(train_data, [train_size, valid_size])

In [3]:
# load training and validation datasets
train_data = TitanicDataset('../data/preprocessed_train.csv')
train_data, valid_data = train_data.get_splits()

In [4]:
# prepare training and validation data loaders
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)
valid_dl = DataLoader(valid_data, batch_size=1024, shuffle=False)

In [57]:
# define neural network model class
class MLP(nn.Module):
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(n_inputs, 1024),
            nn.ReLU(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
        self.activation = nn.Sigmoid()
        
    def forward(self, X):
        X = self.layer(X)
        X = self.activation(X)
        return X

In [58]:
# create neural network model
model = MLP(10)
model

MLP(
  (layer): Sequential(
    (0): Linear(in_features=10, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=16, bias=True)
    (7): ReLU()
    (8): Linear(in_features=16, out_features=1, bias=True)
  )
  (activation): Sigmoid()
)

In [61]:
# train model
lr = 1e-7
epochs = 100

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

for epoch in range(epochs):
    for i, (inputs, targets) in enumerate(train_dl):
        optimizer.zero_grad()
        preds = model(inputs).squeeze()
        loss = criterion(preds, targets)
        loss.backward()
        optimizer.step()
    if (epoch+1)%10 == 0:
        print(loss)
        
# validate model
all_preds, all_targets = [], []
for i, (inputs, targets) in enumerate(valid_dl):
    preds = model(inputs).reshape(-1, 1).detach().numpy().round()
    targets = targets.reshape(-1, 1)
    all_preds.append(preds)
    all_targets.append(targets)
all_preds, all_targets = np.vstack(all_preds), np.vstack(all_targets)
acc = accuracy_score(all_targets, all_preds)
print("\nAccuracy Score: " + str(acc*100) + "%")

tensor(0.2401, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5917, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.2161, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.4697, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6100, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.2165, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.4069, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5940, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.3140, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5757, grad_fn=<BinaryCrossEntropyBackward>)

Accuracy Score: 75.97765363128491%


In [62]:
# load test dataset
test_df = pd.read_csv('../data/preprocessed_test.csv')
test_data = torch.tensor(test_df.values).float()

In [63]:
submission = pd.DataFrame()
submission['PassengerId'] = test_df['PassengerId']
submission['Survived'] = model(test_data).round().int().numpy()
submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [64]:
# Create submission file
submission.to_csv('../submissions/neural_network.csv', index=False)