In [342]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data as torch_data
from sklearn.model_selection import KFold

In [396]:
df = pd.read_csv("train.csv")
df = df.fillna(0)
train_data = df.values
X = train_data[:,1:27]
y = train_data[:,27].astype(int)
N = y.shape[0]

In [397]:
1 - np.sum(y) / N

0.6430416286842905

In [398]:
model = nn.Sequential(
    nn.Linear(26, 100),
    nn.ReLU(),
    nn.Linear(100,2),
    nn.Softmax(dim=1)
)

In [399]:
def train_model(model, optimizer, n_epochs, stop_thresh):
    loss_fn = nn.CrossEntropyLoss()
    model.train()
    
    for epoch in range(n_epochs):
        
        num_folds = 5
        kf = KFold(n_splits=num_folds)
        sum_train_mse = 0
        sum_val_mse = 0
        
        for train_index, val_index in kf.split(X[0:N]):
            # Retreive training and test data
            train_data_x, val_data_x = X[train_index], X[val_index]
            train_data_y, val_data_y = y[train_index], y[val_index]
            tensor_x = torch.Tensor(train_data_x) # transform to torch tensor
            tensor_y = torch.tensor(train_data_y).long()
            train_dataset = torch_data.TensorDataset(tensor_x,tensor_y) 
            train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=False)
            
            for batch_idx, (data, target) in enumerate(train_loader):
                # Erase accumulated gradients
                optimizer.zero_grad() # Forward pass
                output = model(data) # Calculate loss
                loss = loss_fn(output, target) # Backward pass
                loss.backward() # Weight update
                optimizer.step()
            
            
            tensor_x = torch.Tensor(val_data_x) # transform to torch tensor
            tensor_y = torch.tensor(val_data_y).long()
            test_dataset = torch_data.TensorDataset(tensor_x,tensor_y) 
            
            test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=True)
            test_model(model, train_loader, "Training set")
            test_model(model, test_loader, "Validation set")
                
        # Track loss each epoch
        # print('Train Epoch: %d Loss: %.4f' % (epoch + 1, loss.item()))

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
train_model(model, optimizer, 1, 97.5)

In [395]:
def test_model(model, test_loader, set_name):
    
    loss_fn = nn.CrossEntropyLoss()
    model.eval()
    test_loss = 0
    correct = 0

    # Turning off automatic differentiation
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += loss_fn(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max class score
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('%s: Average loss: %.4f, Accuracy: %d/%d (%.4f)' %
          (set_name, test_loss, correct, len(test_loader.dataset),
           100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

In [None]:
test_model(model, print_output=True)