In [1]:
import warnings
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from torch.optim import Adam

BATCH = 128
SEED = 57
LR = 6e-3

torch.manual_seed(SEED)
warnings.filterwarnings('ignore')

In [2]:
df_tv = pd.read_csv('data/train_preprocessed_meta.csv', index_col='PassengerId')
df_tt = pd.read_csv('data/test_preprocessed_meta.csv', index_col='PassengerId')

In [3]:
ind_cols = ['lr', 'svc', 'lsvc', 'gnb', 'mnb', 'knn', 'dt', 'rf', 'ada', 'gb', 'sgd', 'xgb']
dep_col = 'Survived'

In [4]:
class Net(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(12, 200),
            nn.BatchNorm1d(200),
            nn.ReLU(),
            nn.Dropout1d(0.2),

            nn.Linear(200, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Dropout1d(0.2),

            nn.Linear(100, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [5]:
t_xtr, t_ytr = torch.tensor(df_tv[ind_cols].values, dtype=torch.float32), torch.tensor(df_tv[dep_col].values, dtype=torch.float32)
t_xtt = torch.tensor(df_tt[ind_cols].values, dtype=torch.float32)

dl_tr = DataLoader(TensorDataset(t_xtr, t_ytr), batch_size=BATCH, shuffle=True)

In [6]:
model = Net()
optimizer = Adam(model.parameters(), lr=LR)
criterion = nn.BCELoss()

model.train()
for e in range(20):
    for xtr, ytr in dl_tr:
        ytr = ytr.unsqueeze(1)
        ypd = model(xtr)
        loss = criterion(ypd, ytr)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


In [7]:
model.eval()
ypd = model(t_xtt).squeeze(1).tolist()
df_tt['Survived'] = [round(e) for e in ypd]
df_tt.head(10)

Unnamed: 0_level_0,Pclass_2nd,Pclass_3rd,Sex_male,Fare_low,Fare_medium,Fare_v_high,Cabin_B,Cabin_C,Cabin_D,Cabin_E,...,gnb,mnb,knn,dt,rf,ada,gb,sgd,xgb,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
892,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
893,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,1,0,0,0,1,0,1,0,1
894,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
895,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
896,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,1,1,0,1,1,1,1,1,1
897,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
898,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,1,1,0,1,1,1,1,1,1
899,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
900,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,1,1,0,1,1,1,1,1,1
901,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0


In [8]:
df_tt.to_csv('prediction.csv', columns=['Survived'])