In [6]:
import numpy as np
from scipy.optimize import minimize
from sklearn.metrics import log_loss
import pandas as pd

class BayesianKnowledgeTracer:
    def __init__(self, learning_rate=0.1):
        self.learning_rate = learning_rate
        self.parameters = {}

    def initialize_parameters(self, categories):
        """Initialize parameters for each category."""
        for category in categories:
            self.parameters[category] = {
                'learn': 0.3,  # Probability of learning from a question
                'forget': 0.1,  # Probability of forgetting a concept
                'guess': 0.2,  # Probability of guessing correctly
                'slip': 0.1   # Probability of making a mistake despite knowing
            }

    def update_mastery(self, category, prev_mastery, correctness):
        """Update mastery probability based on correctness."""
        params = self.parameters[category]
        if correctness == 1:
            mastery = prev_mastery * (1 - params['slip']) / (prev_mastery * (1 - params['slip']) + (1 - prev_mastery) * params['guess'])
        else:
            mastery = prev_mastery * params['slip'] / (prev_mastery * params['slip'] + (1 - prev_mastery) * (1 - params['guess']))

        mastery = mastery + params['learn'] * (1 - mastery) - params['forget'] * mastery
        return mastery

    def train(self, data):
        """Train the BKT model on the dataset."""
        users = data['UserID'].unique()
        categories = data['Category'].unique()

        self.initialize_parameters(categories)

        mastery_tracker = {user: {category: 0.5 for category in categories} for user in users}
        
        for _, row in data.iterrows():
            user, category, correctness = row['UserID'], row['Category'], row['Correctness']
            prev_mastery = mastery_tracker[user][category]
            new_mastery = self.update_mastery(category, prev_mastery, correctness)
            mastery_tracker[user][category] = new_mastery

        return mastery_tracker

data = pd.read_csv("synthetic_math_data.csv")
data['Correctness'] = data['Correctness'].astype(int)

bkt_model = BayesianKnowledgeTracer()
mastery_states = bkt_model.train(data)

mastery_states_sample = {user: mastery_states[user] for user in list(mastery_states.keys())}
mastery_states_sample


{1: {'Logic': 0.5518239877948005,
  'Geometry': 0.8719411540982412,
  'Arithmetic': 0.8087487796112881,
  'Algebra': 0.7356681212402115},
 2: {'Logic': 0.3446523686556199,
  'Geometry': 0.44684094717944045,
  'Arithmetic': 0.5506630921267125,
  'Algebra': 0.7838391715432478},
 3: {'Logic': 0.5760784753793684,
  'Geometry': 0.34276656923913995,
  'Arithmetic': 0.7438362933494014,
  'Algebra': 0.5579536669053763},
 4: {'Logic': 0.5579527636410193,
  'Geometry': 0.7456388046379233,
  'Arithmetic': 0.5541389722738217,
  'Algebra': 0.8200164793147998},
 5: {'Logic': 0.5899868146295025,
  'Geometry': 0.8780402638244603,
  'Arithmetic': 0.8717501957406619,
  'Algebra': 0.8826831168888205},
 6: {'Logic': 0.5068391422798775,
  'Geometry': 0.8078732790218195,
  'Arithmetic': 0.33626662675901636,
  'Algebra': 0.8819673382273503},
 7: {'Logic': 0.8810570453250772,
  'Geometry': 0.8078710637193032,
  'Arithmetic': 0.867169692887945,
  'Algebra': 0.588467184861641},
 8: {'Logic': 0.33875364412714404