# Deep Knowledge Tracing (DKT) - Training Notebook

This notebook demonstrates:
1. Creating synthetic student interaction data
2. Training a DKT LSTM model
3. Evaluating model performance (AUC, Brier score)
4. Comparing with Beta-Bernoulli baseline
5. Saving the trained model for deployment


In [None]:
import sys
sys.path.append('..')

import numpy as np
import torch
import matplotlib.pyplot as plt
from pathlib import Path

from models.dkt import DKT, DKTPredictor, create_synthetic_data, train_dkt
from models.beta_kt import BetaKT
from evaluation import KTEvaluator

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")


## 1. Generate Synthetic Data

We'll create synthetic student interaction sequences with varying abilities and learning effects.


In [None]:
# Parameters
N_STUDENTS = 200
N_QUESTIONS = 25
SEED = 42

# Generate data
data = create_synthetic_data(
    n_students=N_STUDENTS,
    n_questions=N_QUESTIONS,
    seq_len_range=(15, 60),
    seed=SEED
)

print(f"Generated {len(data)} student sequences")
print(f"Example sequence length: {len(data[0]['attempts'])}")
print(f"Example first 5 attempts: {data[0]['attempts'][:5]}")


## 2. Train DKT Model


In [None]:
# Split data
np.random.shuffle(data)
train_data = data[:140]
test_data = data[140:]

# Train
model = train_dkt(
    data=train_data,
    n_questions=N_QUESTIONS,
    epochs=20,
    batch_size=32,
    lr=0.001,
    device='cpu',
    save_path='../models/dkt_model.pt'
)

print("Training complete! Model saved.")


## 3. Evaluate and Compare

Now evaluate the DKT model and compare with the Beta-Bernoulli baseline.


## 4. STEM Learning Path Generation

Now let's use our trained models to generate personalized STEM learning pathways!


In [None]:
# Load and evaluate
predictor = DKTPredictor(model_path='../models/dkt_model.pt', device='cpu')

y_true = []
y_pred = []

for student in test_data:
    attempts = student['attempts']
    for i in range(5, len(attempts)):
        history = attempts[:i]
        next_attempt = attempts[i]
        prob = predictor.predict_next_question(history, next_attempt['question_id'])
        y_true.append(int(next_attempt['correct']))
        y_pred.append(prob)

# Metrics
dkt_metrics = KTEvaluator.evaluate_model(y_true, y_pred)
print(f"DKT AUC: {dkt_metrics['auc']:.4f}")
print(f"DKT Brier: {dkt_metrics['brier_score']:.4f}")
print(f"DKT Accuracy: {dkt_metrics['accuracy']:.4f}")

# Compare with Beta-KT baseline
beta_kt = BetaKT(alpha=1.0, beta=1.0)
print(f"\n✅ DKT model trained and evaluated successfully!")


In [None]:
sys.path.append('..')
from stem_path_generator import STEMPathGenerator, LearningStyle
import json

# Initialize STEM path generator
generator = STEMPathGenerator()

print(f"📚 Knowledge Graph loaded with {len(generator.knowledge_graph)} concepts")
print(f"Subjects available: {set(c.subject for c in generator.knowledge_graph.values())}")


### Example 1: Beginner Programmer - Visual Learner


In [None]:
# User A: Beginner with some exposure to variables
user_a_attempts = [
    {"concept": "variables", "correct": True},
    {"concept": "variables", "correct": True},
    {"concept": "variables", "correct": False},  # Still learning
    {"concept": "control_structures", "correct": False},  # Struggling
]

path_a = generator.generate_path(
    user_id="user_a_beginner",
    subject="programming",
    user_attempts=user_a_attempts,
    learning_style=LearningStyle.VISUAL,
    learning_goal="Master Python basics"
)

print("=" * 80)
print(f"🎯 Learning Path for {path_a.user_id}")
print(f"📊 Overall Mastery: {path_a.overall_mastery:.1%}")
print(f"⏱️  Estimated Time: {path_a.metadata['estimated_total_hours']:.1f} hours")
print("=" * 80)

for i, node in enumerate(path_a.nodes[:5], 1):  # Show first 5 concepts
    status_emoji = {"completed": "✅", "in_progress": "🔄", "not_started": "⭕", "locked": "🔒"}
    print(f"\n{i}. {status_emoji[node.status]} {node.concept.name}")
    print(f"   Current Mastery: {node.current_mastery:.1%} → Target: {node.target_mastery:.1%}")
    print(f"   Status: {node.status.upper()}")
    print(f"   Prerequisites: {', '.join(node.concept.prerequisites) or 'None'}")
    print(f"   Recommended Resources:")
    for res in node.recommended_resources[:2]:  # Show top 2
        print(f"      • {res.title} ({res.type}, {res.duration_minutes}min, difficulty: {res.difficulty:.1%})")


### Example 2: Intermediate Programmer - Kinesthetic Learner


In [None]:
# User B: Intermediate learner, good at fundamentals but needs work on advanced topics
user_b_attempts = [
    # Strong fundamentals
    {"concept": "variables", "correct": True},
    {"concept": "variables", "correct": True},
    {"concept": "control_structures", "correct": True},
    {"concept": "control_structures", "correct": True},
    {"concept": "functions", "correct": True},
    {"concept": "functions", "correct": False},  # Some challenges
    # Struggling with advanced concepts
    {"concept": "arrays", "correct": False},
    {"concept": "arrays", "correct": False},
]

path_b = generator.generate_path(
    user_id="user_b_intermediate",
    subject="programming",
    user_attempts=user_b_attempts,
    learning_style=LearningStyle.KINESTHETIC,  # Prefers hands-on labs and projects
    learning_goal="Prepare for data structures course"
)

print("=" * 80)
print(f"🎯 Learning Path for {path_b.user_id}")
print(f"📊 Overall Mastery: {path_b.overall_mastery:.1%}")
print(f"⏱️  Estimated Time: {path_b.metadata['estimated_total_hours']:.1f} hours")
print(f"🎨 Learning Style: {path_b.metadata['learning_style']}")
print("=" * 80)

# Show concepts with different statuses
for node in path_b.nodes:
    if node.status in ["in_progress", "completed"]:
        status_emoji = {"completed": "✅", "in_progress": "🔄"}
        print(f"\n{status_emoji[node.status]} {node.concept.name}")
        print(f"   Mastery: {node.current_mastery:.1%} (Target: {node.target_mastery:.1%})")
        
        # Show lab/project resources prioritized for kinesthetic learners
        hands_on = [r for r in node.recommended_resources if r.type in ['lab', 'project', 'interactive']]
        if hands_on:
            print(f"   🛠️ Hands-on Resources:")
            for res in hands_on[:2]:
                print(f"      • {res.title} ({res.type}, {res.duration_minutes}min)")


### Example 3: Mathematics Path - Calculus Preparation


In [None]:
# User C: Math student preparing for calculus
user_c_attempts = [
    {"concept": "algebra_basics", "correct": True},
    {"concept": "algebra_basics", "correct": True},
    {"concept": "linear_equations", "correct": True},
    {"concept": "linear_equations", "correct": False},
]

path_c = generator.generate_path(
    user_id="user_c_math",
    subject="math",
    user_attempts=user_c_attempts,
    learning_style=LearningStyle.VISUAL,
    learning_goal="Prepare for AP Calculus"
)

print("=" * 80)
print(f"📐 Mathematics Learning Path for {path_c.user_id}")
print(f"📊 Overall Mastery: {path_c.overall_mastery:.1%}")
print(f"⏱️  Estimated Time: {path_c.metadata['estimated_total_hours']:.1f} hours")
print("=" * 80)

for i, node in enumerate(path_c.nodes, 1):
    status_emoji = {"completed": "✅", "in_progress": "🔄", "not_started": "⭕", "locked": "🔒"}
    print(f"\n{i}. {status_emoji[node.status]} {node.concept.name}")
    print(f"   Mastery: {node.current_mastery:.1%} → {node.target_mastery:.1%}")
    
    # Show prerequisite chain
    if node.concept.prerequisites:
        prereq_status = []
        for prereq_id in node.concept.prerequisites:
            prereq_node = next((n for n in path_c.nodes if n.concept.id == prereq_id), None)
            if prereq_node:
                prereq_status.append(f"{prereq_node.concept.name} ({prereq_node.current_mastery:.0%})")
        print(f"   Prerequisites: {' → '.join(prereq_status)}")


### Example 4: Adaptive Path Updates

Simulating how the path adapts as the user makes progress
