In [18]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import functional as F
import random
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import time
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold

batch_size = 32
train_epochs = 6

train = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/data/titanic.csv')
test = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/data/titanic_test.csv')
target = train["Survived"]

train = pd.concat([train, test], sort=True)

def get_text_features(train):
    train['Length_Name'] = train['Name'].astype(str).map(len)
    return train

train = get_text_features(train)

cols = ['Cabin','Embarked','Name','Sex','Ticket',]

num_cols = list(set(train.columns) - set(cols) - set(["Survived"]))

def encode(encoder, x):
    len_encoder = len(encoder)
    try:
        id = encoder[x]
    except KeyError:
        id = len_encoder
    return id

encoders = [{} for col in cols]


for i, col in enumerate(cols):
    print('encoding %s ...' % col, end=' ')
    encoders[i] = {l: id for id, l in enumerate(train.loc[:, col].astype(str).unique())}
    train[col] = train[col].astype(str).apply(lambda x: encode(encoders[i], x))
    print('Done')

embed_sizes = [len(encoder) for encoder in encoders]

train[num_cols] = train[num_cols].fillna(0)

scaler = StandardScaler()
train[num_cols] = scaler.fit_transform(train[num_cols])

class Model(nn.Module):
    def __init__(self, in_features, out_features, bias=True, p=0.5):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features, bias)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(p)
        
    def forward(self, x):
        x = self.linear(x)
        x = self.relu(x)
        x = self.drop(x)
        return x

net = nn.Sequential(Model(12, 32), nn.Linear(32, 1)) 

X_train = train.loc[np.isfinite(train.Survived), :]
X_train = X_train.drop(["Survived"], axis=1).values
Y_train = target.values

X_test = train.loc[~np.isfinite(train.Survived), :]

X_test = X_test.drop(["Survived"], axis=1).values      
splits = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=None).split(X_train, Y_train))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

train_preds = np.zeros((len(X_train)))
test_preds = np.zeros((len(X_test)))


x_test_cuda = torch.tensor(X_test, dtype=torch.float32)
test = torch.utils.data.TensorDataset(x_test_cuda)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

for i, (train_idx, valid_idx) in enumerate(splits):
    x_train_fold = torch.tensor(X_train[train_idx], dtype=torch.float32)
    y_train_fold = torch.tensor(Y_train[train_idx, np.newaxis], dtype=torch.float32)
    x_val_fold = torch.tensor(X_train[valid_idx], dtype=torch.float32)
    y_val_fold = torch.tensor(Y_train[valid_idx, np.newaxis], dtype=torch.float32)
    
    model = net    
    loss_fn = torch.nn.BCEWithLogitsLoss(reduction="sum")
    optimizer = torch.optim.Adam(model.parameters())
    
    train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
    valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
    
    print(f'Fold {i + 1}')
    
    for epoch in range(train_epochs):
        
        model.train()
        avg_loss = 0.
        for x_batch, y_batch in tqdm(train_loader, disable=True):
            y_pred = model(x_batch)
            loss = loss_fn(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_loss += loss.item() / len(train_loader)
        
        model.eval()
        valid_preds_fold = np.zeros((x_val_fold.size(0)))
        test_preds_fold = np.zeros(len(X_test))
        avg_val_loss = 0.
        for i, (x_batch, y_batch) in enumerate(valid_loader):
            y_pred = model(x_batch).detach()
            avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
            valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]
        
        print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f}'.format(epoch + 1, train_epochs, avg_loss, avg_val_loss))
        
    for i, (x_batch,) in enumerate(test_loader):
        y_pred = model(x_batch).detach()

        test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]

    train_preds[valid_idx] = valid_preds_fold
    test_preds += test_preds_fold / len(splits)

encoding Cabin ... Done
encoding Embarked ... Done
encoding Name ... Done
encoding Sex ... Done
encoding Ticket ... Done
Fold 1
Epoch 1/6 	 loss=1678.7789 	 val_loss=1025.9364
Epoch 2/6 	 loss=792.5968 	 val_loss=135.7170
Epoch 3/6 	 loss=550.8131 	 val_loss=168.0948
Epoch 4/6 	 loss=424.7154 	 val_loss=135.4763
Epoch 5/6 	 loss=381.3714 	 val_loss=81.1264
Epoch 6/6 	 loss=325.0955 	 val_loss=65.8386
Fold 2
Epoch 1/6 	 loss=266.9927 	 val_loss=48.1998
Epoch 2/6 	 loss=211.0362 	 val_loss=54.1199
Epoch 3/6 	 loss=154.1299 	 val_loss=25.9360
Epoch 4/6 	 loss=112.5927 	 val_loss=28.7123
Epoch 5/6 	 loss=73.1350 	 val_loss=25.0544
Epoch 6/6 	 loss=57.5428 	 val_loss=22.4604
Fold 3
Epoch 1/6 	 loss=41.1001 	 val_loss=21.8561
Epoch 2/6 	 loss=28.4941 	 val_loss=22.5243
Epoch 3/6 	 loss=24.1932 	 val_loss=20.8069
Epoch 4/6 	 loss=23.3517 	 val_loss=18.7704
Epoch 5/6 	 loss=22.3143 	 val_loss=17.8407
Epoch 6/6 	 loss=21.6347 	 val_loss=19.4776
Fold 4
Epoch 1/6 	 loss=21.8632 	 val_loss=17.9547