### Phase 3: Transition to Production-Ready Architecture
After modularizing the code into `src/` components, the model achieved:
- **Recall@1:** 0.8543
- **Recall@5:** 0.9790

**Observation:** While the model shows exceptional pattern recognition, current metrics reflect performance on the training distribution. Future work involves rigorous cross-validation to ensure generalizability across different learner demographics.

In [4]:
import sys
import os

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, PROJECT_ROOT)

print("PROJECT ROOT:", PROJECT_ROOT)

PROJECT ROOT: c:\Users\MASTER\OneDrive\Desktop\projects for master\large projects that prove that you a researcher\Intelligent Recommendation System for Coding Courses Based on Learner Behavior


In [5]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim

from src.feature_engineering import build_sequences
from src.model.gru_kt import GRUKT
from src.recommender import recommend_next

In [6]:
processed_csv = r"C:\Users\MASTER\OneDrive\Desktop\projects for master\large projects that prove that you a researcher\Intelligent Recommendation System for Coding Courses Based on Learner Behavior\Data\data\processed\ednet_sequences.csv"

df = pd.read_csv(processed_csv)
print(f"Loaded {len(df)} rows")
print(df.head())

Loaded 7729 rows
       timestamp subject_id item_id  is_correct
0  1567413540117     u12531   q3605       False
1  1567413573276     u12531   q4895       False
2  1567413619332     u12531   q5365       False
3  1567413640139     u12531   q5577       False
4  1567413670061     u12531    q869       False


In [7]:
X_items, X_corrs, y, item2idx, idx2item = build_sequences(df, max_len=50)

print(f"Samples: {len(X_items)}")
print(f"Unique items: {len(item2idx)}")

Samples: 7582
Unique items: 3478


In [8]:
class EDNetDataset(Dataset):
    def __init__(self, X_items, X_corrs, y):
        self.X_items = torch.LongTensor(X_items)
        self.X_corrs = torch.FloatTensor(X_corrs).clamp(0, 1)
        self.y = torch.LongTensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_items[idx], self.X_corrs[idx], self.y[idx]

dataset = EDNetDataset(X_items, X_corrs, y)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

In [9]:
device = "cpu"

n_items = len(item2idx) + 1
model = GRUKT(
    n_items=n_items,
    embed_dim=64,
    hidden_dim=128
).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
n_epochs = 12
model.train()

for epoch in range(n_epochs):
    total_loss = 0
    for items, corrs, labels in loader:
        items = items.to(device)
        corrs = corrs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(items, corrs)
        loss = criterion(outputs, labels)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{n_epochs} - Loss: {total_loss/len(loader):.4f}")

Epoch 1/12 - Loss: 8.1283
Epoch 2/12 - Loss: 7.6662
Epoch 3/12 - Loss: 7.1519
Epoch 4/12 - Loss: 6.5091
Epoch 5/12 - Loss: 5.8002
Epoch 6/12 - Loss: 5.1109
Epoch 7/12 - Loss: 4.4474
Epoch 8/12 - Loss: 3.8477
Epoch 9/12 - Loss: 3.3015
Epoch 10/12 - Loss: 2.8090
Epoch 11/12 - Loss: 2.3947
Epoch 12/12 - Loss: 2.0241


In [11]:
def recall_at_k(model, dataset, k=5, device="cpu"):
    model.eval()
    hits, total = 0, 0

    with torch.no_grad():
        for items, corrs, y_true in DataLoader(dataset, batch_size=64):
            items, corrs, y_true = items.to(device), corrs.to(device), y_true.to(device)
            logits = model(items, corrs)
            topk = torch.topk(logits, k, dim=1).indices

            for i in range(len(y_true)):
                hits += int(y_true[i] in topk[i])
                total += 1

    return hits / total

recall1 = recall_at_k(model, dataset, k=1)
recall5 = recall_at_k(model, dataset, k=5)

print(f"GRU + Correct Recall@1: {recall1:.4f}")
print(f"GRU + Correct Recall@5: {recall5:.4f}")

GRU + Correct Recall@1: 0.8543
GRU + Correct Recall@5: 0.9790


In [12]:
example_student = df['subject_id'].iloc[0]
student_seq = df[df['subject_id'] == example_student]

hist_items = [item2idx[i] for i in student_seq['item_id'].tolist()[:-1]]
hist_corrs = student_seq['is_correct'].astype(int).tolist()[:-1]

rec_item, confidence = recommend_next(
    model,
    idx2item,
    hist_items,
    hist_corrs,
    max_len=50,
    device=device
)

print(f"Recommended Item: {rec_item}")
print(f"Confidence Score: {confidence:.2f}%")

Recommended Item: q743
Confidence Score: 15.41%
