In [1]:
import warnings
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from torch.optim import Adam

BATCH = 128
SEED = 57
LR = 6e-3

torch.manual_seed(SEED)
warnings.filterwarnings('ignore')

In [2]:
df_tv = pd.read_csv('data/train_preprocessed_meta.csv', index_col='PassengerId')
df_tt = pd.read_csv('data/test_preprocessed_meta.csv', index_col='PassengerId')

In [3]:
ind_cols = ['lr', 'svc', 'lsvc', 'gnb', 'mnb', 'knn', 'dt', 'rf', 'ada', 'gb', 'sgd', 'xgb']
dep_col = 'Survived'

In [4]:
class Net(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(12, 200),
            nn.BatchNorm1d(200),
            nn.ReLU(),
            nn.Dropout1d(0.2),

            nn.Linear(200, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Dropout1d(0.2),

            nn.Linear(100, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [5]:
def calculate_accuracy(fold):
    df_tr = df_tv[df_tv.kfold != fold]
    df_vl = df_tv[df_tv.kfold == fold]

    t_xtr, t_ytr = torch.tensor(df_tr[ind_cols].values, dtype=torch.float32), torch.tensor(df_tr[dep_col].values, dtype=torch.float32)
    t_xvl, t_yvl = torch.tensor(df_vl[ind_cols].values, dtype=torch.float32), torch.tensor(df_vl[dep_col].values, dtype=torch.float32)

    dl_tr = DataLoader(TensorDataset(t_xtr, t_ytr), batch_size=BATCH, shuffle=True)
    dl_vl = DataLoader(TensorDataset(t_xvl, t_yvl), batch_size=BATCH)

    model = Net()
    optimizer = Adam(model.parameters(), lr=LR)
    criterion = nn.BCELoss()

    vl_losses = []
    vl_accues = []

    for e in range(20):
        model.train()
        vl_loss = 0
        vl_acc = 0

        # Traning
        for xtr, ytr in dl_tr:
            ytr = ytr.unsqueeze(1)
            ypd = model(xtr)
            loss = criterion(ypd, ytr)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()



        # Model evaluation
        model.eval()
        with torch.no_grad():
            for xvl, yvl in dl_vl:
                yvl = yvl.unsqueeze(1)
                ypd = model(xvl)
                loss = criterion(ypd, yvl)
                acc = torch.tensor((torch.round(ypd) == yvl).sum().item() / ypd.shape[0])
                vl_loss += loss.item()
                vl_acc += acc.item()

        vl_loss /= len(dl_vl)
        vl_acc /= len(dl_vl)


        vl_losses.append(vl_loss)
        vl_accues.append(vl_acc)

    print(f'FOLD[{fold}] ==> valid_loss: {format(np.mean(vl_losses), ".3f")} | valid_acc: {format(np.mean(vl_accues), ".3f")}')
    return np.mean(vl_accues)

In [6]:
print('####===> average accuracy', np.mean([calculate_accuracy(fold) for fold in range(5)]))

FOLD[0] ==> valid_loss: 0.402 | valid_acc: 0.877
FOLD[1] ==> valid_loss: 0.463 | valid_acc: 0.833
FOLD[2] ==> valid_loss: 0.399 | valid_acc: 0.865
FOLD[3] ==> valid_loss: 0.406 | valid_acc: 0.869
FOLD[4] ==> valid_loss: 0.410 | valid_acc: 0.865
####===> average accuracy 0.8617746311426163
