# Simple End-to-End Training

Here we experiment with a simple neural network to learn embeddings.

In [1]:
# General imports
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
# Custom imports
import sys
sys.path.append("..")
from utils.metrics import Metrics
from utils.misc import set_random_seeds

In [3]:
# Retrieve cuda device
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"

device = torch.device(dev)  

In [4]:
# Global Variables
DATASET = "assistments09"
SPLIT = 0.8
BATCH_SIZE = 1000
EPOCHS = 100

SEED = 1
set_random_seeds(SEED)

## Dataset Definition

In [5]:
# Now create a dataset

class EducationDataset(Dataset):
    
    def __init__(self, name):
        self.name = name
        data_path = "../data/" + name + "/preprocessed_data.csv" 
        self.df = pd.read_csv(data_path, sep="\t")
       
        # renumber columns and drop unnecessary information
        self.df = self.df.drop(['timestamp', 'skill_id'], axis=1) # 'Unnamed: 0', 
        self.num_students = self._renumber_attribute(self.df, "user_id")
        self.num_items = self._renumber_attribute(self.df, "item_id")
        
    def __len__(self):
         return self.df.shape[0]
        
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        return row.user_id, row.item_id, row.correct    

    def _renumber_attribute(self, df, attr_name):
        counter = 0
        attr_dict = {}
        overwrite = []
        for e in df[attr_name]:
            if e not in attr_dict:
                attr_dict[e] = counter
                counter += 1
            overwrite.append(attr_dict[e])
        df[attr_name] = np.array(overwrite)   
        return len(attr_dict)
    

In [6]:
# Partition the dataset and prepare loaders

dataset = EducationDataset(DATASET)
n_train = int(SPLIT * dataset.__len__())
n_test = dataset.__len__() - n_train
train_set, test_set = random_split(dataset, [n_train, n_test])

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Not sure if I require this
classes = (0, 1)

In [7]:
# Compute some meta information at this point
# Proportion of 1 replies
# Number of 



In [8]:
# Helper function to compute AUC score

from sklearn.metrics import roc_auc_score, accuracy_score

def compute_auc(preds, labels):     
    #print("Preds Before ", preds)
    preds = preds[labels >= 0].flatten()
    labels = labels[labels >= 0].float()
    #print("Preds After ", preds)
    #print("Labels ", labels)
    if len(torch.unique(labels)) == 1:  # Only one class
        #print("COMPUTING ACCURACY")
        auc = accuracy_score(labels, preds.round())
    else:
        #print("COMPUTING ROC_AUC")
        auc = roc_auc_score(labels, preds)
    return auc

def compute_accuracy(preds, labels):
    assert labels.ndim == 1 and labels.size() == preds.size()
    classification = preds >= 0.5
    return (labels == classification).sum().item() / labels.size(0)


In [9]:
# Define a simple neural network

class Net(nn.Module):
    
    def __init__(self, num_users, num_items):
        super().__init__()
        
        self.num_users = num_users
        self.fu1 = nn.Linear(num_users, 200)
        #self.fu2 = nn.Linear(50, 20)
        
        self.num_items = num_items
        self.fi1 = nn.Linear(num_items, 200)
        # self.fi2 = nn.Linear(50, 20)
        
    # Print here and see what we get as input from the dataloader
    def forward(self, x):
        # student forward
        xs = F.one_hot(x[0], num_classes=self.num_users).float()         
        xs = F.relu(self.fu1(xs))
        # xs = F.relu(self.fu2(xs))
        
        # item forward
        xi = F.one_hot(x[1], num_classes=self.num_items).float()
        xi = F.relu(self.fi1(xi))
        # xi = F.relu(self.fi2(xi))
        
        # combine network output
        x = torch.mul(xs, xi)
        x = torch.sum(x, dim=1)
        x = torch.sigmoid(x)
        return x.squeeze()
    


In [None]:
# Train model

model = Net(dataset.num_students, dataset.num_items)
model.to(device)
criterion = nn.BCELoss()
# TODO: Maybe use adam instead???
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
metrics = Metrics()

for epoch in range(EPOCHS):  # loop over the dataset multiple times
    print("\nEpoch:", epoch)
    print("---------------------------")

    # Train Loop
    t0 = time.time()
    model.train()
    for i, data in enumerate(train_loader):
        user_data, item_data, labels = data
        user_data = user_data.to(device)
        item_data = item_data.to(device)

        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model((user_data, item_data))
        loss = criterion(outputs, labels.float().to(device))
        loss.backward()
        optimizer.step()

        # store statistics
        train_auc = compute_auc(outputs.detach().cpu(), labels)
        train_acc = compute_accuracy(outputs.detach().cpu(), labels)
        metrics.store({'bce/train': loss.item()})
        metrics.store({'auc/train': train_auc})
        metrics.store({'acc/train': train_acc})
    print('Training time: {} seconds'.format(time.time() - t0))
    
    # Test Loop
    t0 = time.time()
    model.eval()
    for i, data in enumerate(train_loader):
        with torch.no_grad():
            user_data, item_data, labels = data
            user_data = user_data.to(device)
            item_data = item_data.to(device)
            outputs = model((user_data, item_data))
        loss = criterion(outputs, labels.float().to(device))
        
        test_auc = compute_auc(outputs.detach().cpu(), labels)
        test_acc = compute_accuracy(outputs.detach().cpu(), labels)
        metrics.store({'bce/test': loss.item()})
        metrics.store({'auc/test': test_auc})
        metrics.store({'acc/test': train_acc})
    print('Test time: {} seconds'.format(time.time() - t0))
    model.train()      
        
    # Compute average metrics and print result
    average_metrics = metrics.average()
    print("")
    for k in average_metrics:
        print(k, average_metrics[k])
        
print('Finished Training')


Epoch: 0
---------------------------
Training time: 7.3756372928619385 seconds
Test time: 6.201389312744141 seconds

bce/train 0.692452901681977
auc/train 0.5022691002644984
acc/train 0.6586443973040473
bce/test 0.692197551909049
auc/test 0.5032759261258177
acc/test 0.649700598802395

Epoch: 1
---------------------------
Training time: 6.676344871520996 seconds
Test time: 6.832866907119751 seconds

bce/train 0.6917898772543322
auc/train 0.5044561841568331
acc/train 0.658713487822562
bce/test 0.6912757845202904
auc/test 0.505795563816051
acc/test 0.696107784431138

Epoch: 2
---------------------------
Training time: 6.425450086593628 seconds
Test time: 6.895883798599243 seconds

bce/train 0.6904934981478704
auc/train 0.507156785477268
acc/train 0.6586087376815879
bce/test 0.689511468592246
auc/test 0.5085583047133613
acc/test 0.6257485029940135

Epoch: 3
---------------------------
Training time: 7.35345196723938 seconds
Test time: 6.8409812450408936 seconds

bce/train 0.68805844821202

Training time: 6.836486101150513 seconds
Test time: 6.8967320919036865 seconds

bce/train 0.6409829657174012
auc/train 0.6096167136588055
acc/train 0.6586377111248358
bce/test 0.6409708649053701
auc/test 0.6111010730186422
acc/test 0.6452095808383255

Epoch: 30
---------------------------
Training time: 6.7543113231658936 seconds
Test time: 6.792243719100952 seconds

bce/train 0.6409487376833176
auc/train 0.6121321982672006
acc/train 0.6586310249456249
bce/test 0.6408943063475091
auc/test 0.6136333223092301
acc/test 0.6407185628742514

Epoch: 31
---------------------------
Training time: 6.774718999862671 seconds
Test time: 6.467438459396362 seconds

bce/train 0.6409055649432366
auc/train 0.6145808734745771
acc/train 0.6586377111248358
bce/test 0.640869856949879
auc/test 0.6161017427304999
acc/test 0.6452095808383255

Epoch: 32
---------------------------
Training time: 6.808610439300537 seconds
Test time: 6.689460754394531 seconds

bce/train 0.6408773738172556
auc/train 0.617074785200

In [None]:
# Question what is average correct percentage?
# How do we evaluate AUC?