In [1]:
# All Required imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
device = "cuda" if torch.cuda.is_available() else "cpu"

In [25]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [26]:
def preProcessData(data,is_test=False):
    test_df = data.drop(['Ticket','Name','Cabin'], axis=1)
    test_df['Female'] = test_df['Sex'] == 'female'
    test_df = test_df.drop('Sex', axis=1)
    test_df['Embarked'] = test_df['Embarked'] == 'S'
    if not is_test:
        test_df = test_df.dropna()
    return test_df

In [27]:
class TitanicDataset(Dataset):
    def __init__(self, data_set):
        self.data_set = data_set
    def __len__(self):
        return len(self.data_set)

    def __getitem__(self, idx):
#         print(f"Index: {idx}")
        data = torch.tensor(self.data_set.iloc[idx, ~self.data_set.columns.isin(['Survived','PassengerId'])], dtype=torch.float32)
        label = nn.functional.one_hot(torch.tensor(self.data_set.iloc[idx, self.data_set.columns == 'Survived'], dtype=torch.int64),num_classes=2)
        return data, label

In [44]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork,self).__init__()
        self.simple_stack = nn.Sequential(
            nn.Linear(7, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 2),
        )
        
    def forward(self, x):
        x = self.simple_stack(x)
        return x

In [66]:
num_epochs = 200
batch_size = 2
lr = 0.0007

In [67]:
ptrain_data = TitanicDataset(preProcessData(train_data))
ptest_data = TitanicDataset(preProcessData(test_data, is_test=True))
print(ptest_data.data_set.iloc[0,~ptest_data.data_set.columns.isin(['Survived','PassengerId'])])
print(test_data)
train_dataloader = DataLoader(ptrain_data, batch_size=1, shuffle=True)
test_dataloader = DataLoader(ptest_data, batch_size=4, shuffle=True)

Pclass           3
Age           34.5
SibSp            0
Parch            0
Fare        7.8292
Embarked     False
Female       False
Name: 0, dtype: object
     PassengerId  Pclass                                          Name  \
0            892       3                              Kelly, Mr. James   
1            893       3              Wilkes, Mrs. James (Ellen Needs)   
2            894       2                     Myles, Mr. Thomas Francis   
3            895       3                              Wirz, Mr. Albert   
4            896       3  Hirvonen, Mrs. Alexander (Helga E Lindqvist)   
..           ...     ...                                           ...   
413         1305       3                            Spector, Mr. Woolf   
414         1306       1                  Oliva y Ocana, Dona. Fermina   
415         1307       3                  Saether, Mr. Simon Sivertsen   
416         1308       3                           Ware, Mr. Frederick   
417         1309       3      

In [70]:
model = NeuralNetwork().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.7)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for epoch in range(num_epochs):
    for batch, (X, y) in enumerate(ptrain_data):
        # Compute prediction and loss
            pred = model(X.to(device))
            loss = loss_fn(pred, y[0].to(device).type(torch.float32))

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

#             if batch % 10 == 0:
#                 loss, current = loss.item(), batch * len(X)
#                 print(f"loss: {loss:>7f} ")
    print(f"Epoch {epoch}, Loss: {loss}")

Epoch 0, Loss: 0.2616930305957794
Epoch 1, Loss: 0.28006258606910706
Epoch 2, Loss: 0.25322332978248596
Epoch 3, Loss: 0.25371211767196655
Epoch 4, Loss: 0.2442663162946701
Epoch 5, Loss: 0.24345530569553375
Epoch 6, Loss: 0.24142445623874664
Epoch 7, Loss: 0.23993222415447235
Epoch 8, Loss: 0.24295513331890106
Epoch 9, Loss: 0.2520650625228882
Epoch 10, Loss: 0.2514893710613251
Epoch 11, Loss: 0.24763405323028564
Epoch 12, Loss: 0.24609887599945068
Epoch 13, Loss: 0.24031983315944672
Epoch 14, Loss: 0.21751776337623596
Epoch 15, Loss: 0.2171425074338913
Epoch 16, Loss: 0.21179543435573578
Epoch 17, Loss: 0.1995636373758316
Epoch 18, Loss: 0.1872711181640625
Epoch 19, Loss: 0.18471603095531464
Epoch 20, Loss: 0.1818184107542038
Epoch 21, Loss: 0.1781008392572403
Epoch 22, Loss: 0.16804197430610657
Epoch 23, Loss: 0.16827210783958435
Epoch 24, Loss: 0.16571682691574097
Epoch 25, Loss: 0.1509459763765335
Epoch 26, Loss: 0.14339575171470642
Epoch 27, Loss: 0.14329174160957336
Epoch 28, Lo

In [58]:
pass_id = []
survived = []

for batch, (X, y) in enumerate(ptest_data):
    # Compute prediction and loss
    pred = model(X.to(device))

#     print(f"{ptest_data.data_set.iloc[batch, ptest_data.data_set.columns == 'PassengerId' ]['PassengerId']} softmax: {torch.argmax(nn.Softmax()(pred))}")
    pass_id.append(ptest_data.data_set.iloc[batch, ptest_data.data_set.columns == 'PassengerId' ]['PassengerId'])
    survived.append(int(torch.argmax(nn.Softmax()(pred))))

solution = pd.DataFrame()
solution['PassengerId'] = pass_id
solution['Survived'] = survived

print(solution)
solution.to_csv("solution.csv", index=False)

  # Remove the CWD from sys.path while we load stuff.


     PassengerId  Survived
0            892         0
1            893         0
2            894         0
3            895         0
4            896         0
..           ...       ...
413         1305         0
414         1306         1
415         1307         0
416         1308         0
417         1309         0

[418 rows x 2 columns]
