In [94]:
import numpy as np
import pandas as pd
import torch
import transformers
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoModel, AutoTokenizer
from transformers import BertConfig, BertModel
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import sys
import os
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
sys.path.append(parent_directory)
from utils import load_train_csv, load_valid_csv, load_public_test_csv, load_private_test_csv

In [95]:
import torch
import torch.nn as nn
from torch.nn import MultiheadAttention

class CustomTransformer(nn.Module):
    def __init__(self, num_layers, d_model=128, nhead=8, dim_feedforward=2048, dropout=0.1):
        super(CustomTransformer, self).__init__()

        self.student_embed = nn.Linear(1774, d_model)
        self.question_embed = nn.Linear(768, d_model)

        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        # Create custom Transformer architecture
        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
        self.transformer = TransformerEncoder(encoder_layer, num_layers)

        self.ff = nn.Sequential(nn.Linear(2 * d_model, 64), nn.ReLU(), nn.Linear(64, 2))

    def forward(self, student_pos, student_neg, question_pos, question_neg, question):
        student_pos_embed = self.student_embed(student_pos)
        student_neg_embed = self.student_embed(student_neg)
        question_pos_embed = self.question_embed(question_pos)
        question_neg_embed = self.question_embed(question_neg)
        question_embed = self.question_embed(question)

        key_value = torch.cat((student_pos_embed, student_neg_embed, question_pos_embed, question_neg_embed), dim=0)

        # Compute the attention using the question as the query and the other embeddings as key and value
        context_vector, _ = self.multihead_attn(query=question_embed.unsqueeze(1), key=key_value.unsqueeze(1), value=key_value.unsqueeze(1))
        context_vector = context_vector.squeeze(1)

        combined_vector = torch.cat((context_vector, question_embed), dim=1)
        output = self.ff(combined_vector)

        return output


In [96]:
def train_epoch(model, data_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0

    for batch in data_loader:
        student_ids = batch['user_id'].to(device)
        question_embedding = batch['question_embedding'].to(device)
        labels = batch['is_correct'].float().to(device)

        optimizer.zero_grad()
        logits = model(student_ids, question_embedding).squeeze()
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    return running_loss / len(data_loader)

In [97]:
def evaluate(model, data_loader, criterion, device):
    model.eval()
    running_loss = 0.0

    with torch.no_grad():
        for batch in data_loader:
            student_ids = batch['user_id'].to(device)
            question_embedding = batch['question_embedding'].to(device)
            labels = batch['is_correct'].float().to(device)

            logits = model(student_ids, question_embedding).squeeze()
            loss = criterion(logits, labels)
            running_loss += loss.item()

    return running_loss / len(data_loader)

In [98]:
class ModelDataset(Dataset):
    def __init__(self, data, question_embeddings):
        self.user_ids = data['user_id']
        self.question_ids = data['question_id']
        self.is_correct = data['is_correct']
        self.question_embeddings = question_embeddings

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return {
            'user_id': self.user_ids[idx],
            'question_embedding': self.question_embeddings[self.question_ids[idx]],
            'is_correct': self.is_correct[idx],
        }

In [99]:
transformer = Transformer(hidden_size=768, num_students=542, nhead=6, num_layers=6)

In [100]:
train_data = load_train_csv('../data/')
val_data = load_valid_csv('../data/')
# print(data)
question_embeddings = np.load('../data/extracted_feats_question_comp.npy')
train_ds = ModelDataset(train_data, question_embeddings)
train_dataloader = DataLoader(train_ds, batch_size=32, shuffle=True)

val_ds = ModelDataset(val_data, question_embeddings)
val_dataloader = DataLoader(val_ds, batch_size=32, shuffle=True)

In [101]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [102]:
num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(transformer.parameters(), lr=1e-3)

# Train and evaluate the model
for epoch in range(num_epochs):
    train_loss = train_epoch(transformer, train_dataloader, criterion, optimizer, device)
    val_loss = evaluate(transformer, val_dataloader, criterion, device)

    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

KeyboardInterrupt: 