A model basted on ELO 

Key Ideas
- Give each student a score per tag and use that to see how well they perform for questions in that tag group
- Give each question a rating to judge how hard a question it is
- Some charts to show results

Some code and ideas borrowed from this model : https://www.kaggle.com/stevemju/riiid-simple-elo-rating/log#ELO-functions, give `stevemju` some love

# Import Modules

In [None]:
# Imports
import importlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Loading Data

In [None]:
# Loading Data
root = '/kaggle/input/riiid-test-answer-prediction'

def get_data( nrows = 1000000) :
    return {
        "lectures" : pd.read_csv(f'{root}/lectures.csv'),
        "questions" : pd.read_csv(f'{root}/questions.csv'),
        "train" : pd.read_csv(f'{root}/train.csv', nrows=nrows)
    }


data = get_data()
data.keys()

# Processing Data

In [None]:
# A bit sloppy, but creating a unique question id for each sub-tag of a question
question_data = data['questions'].assign(tags=data['questions'].tags.str.split(" ")).explode('tags')
question_data['question_id'] = question_data['question_id'].apply(str)
question_data['question'] = question_data['question_id'] + '_' + question_data['tags']
question_data = question_data[['question_id', 'tags', 'question']].set_index('question')
question_data

In [None]:
# Furthermore, creating a lookup table that gives these question ids for each question in the data set
question_data_lookup = question_data.reset_index().dropna().groupby('question_id')[['question', 'tags']].apply(lambda x: x.values.tolist())
question_data_lookup = question_data_lookup.to_dict()

for i in question_data_lookup:
    for t in question_data_lookup[i]:
        t[1] = int(t[1])
    
question_data_lookup['0']

# Elo

In [None]:
# Class I wrote that tries to use ELO for the classification of each question segmented by tags

class Elo:

    def __init__ (self):
        self.left_asymptote = 0.25
    
    ## These Functions pulled from https://www.kaggle.com/stevemju/riiid-simple-elo-rating/log#ELO-functions
    ## Credit to `stevemju`
    ## -------------------

    def get_delta_student_rating(self, correct, projected_score, samples):
        return + self.learning_rate_student(samples) * ( correct - projected_score )

    def get_delta_question_rating(self, correct, projected_score, samples):
        return - self.learning_rate_question(samples) * ( correct - projected_score )

    def learning_rate_student(self, samples):
        return max(0.3 / (1 + 0.01 * samples), 0.04)

    def learning_rate_question(self, samples):
        return 1 / (1 + 0.05 * samples)

    def probability_of_good_answer(self, student_rating, question_rating):
        return self.left_asymptote + (1 - self.left_asymptote) * self.sigmoid(student_rating - question_rating)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    ## -------------------

    # Give Each person a rating per category of question
    def initialize ( self, student_ids, question_ids, categories ):

        # Students
        self.students = {
            student : { 
                "rating" : [ 0 for _ in range(categories) ], 
                "samples" : 0
            }
            for student in student_ids
        }

        # Questions
        self.questions = {
            question : { 
                "rating" : 0, 
                "samples" : 0
            }
            for question in question_ids
        }

    def reset_students ( self, student_ids, categories ):
        
        # Students
        self.students = {
            student : { 
                "rating" : [ 0 for _ in range(categories) ], 
                "samples" : 0
            }
            for student in student_ids
        }

    
    # Takes in a single example and trains on it
    def process_training_example ( self, student_id, question_id, category_id, correct ) :

        # Select targets
        target_student = self.students[student_id]
        target_question = self.questions[question_id]
        
        projected_score = self.probability_of_good_answer(target_student['rating'][category_id], target_question['rating'])

        # Update Rating
        
        delta_student = self.get_delta_student_rating (
            correct, 
            projected_score,
            target_student['samples']
        )

        delta_question = self.get_delta_question_rating (
            correct, 
            projected_score,
            target_question['samples']
        )


        # Log outputs for analysis

        logged_output = {
            'question' : question_id,
            'student' : student_id,
            'category' : category_id,
            'correct' : correct,
            'prediction' : projected_score,
            'student_rating' : target_student['rating'][category_id],
            'student_samples' : target_student['samples'],
            'delta_student_rating' : delta_student,
            'question_rating' : target_question['rating'],
            'question_samples': target_question['samples'],
            'delta_question_rating' : delta_question,
        }

        # Update Counts & Ratings

        target_student['rating'][category_id] += delta_student
        target_question['rating'] += delta_question

        target_student['samples'] += 1
        target_question['samples'] += 1


        # Return output

        return logged_output

In [None]:
user_ids = data['train'].user_id.unique()
question_ids = question_data.index.values

Elo_Model = Elo()
Elo_Model.initialize( user_ids, question_ids, 188 ) # There are 188 unique categories

# Train

In [None]:
# Will take a bit to run

batch = []
full = []

for i,row in data['train'].iterrows():
    
    student_id = row['user_id']
    question_id = str(row['content_id'])
    correct = row['answered_correctly']
    
    if correct < 0 : continue
    
    question_ids = question_data_lookup[question_id]
    
    for question,category in question_ids:
        _log = Elo_Model.process_training_example( student_id, question, category, correct )
        batch.append( _log )
        
    if ( len(batch) > 50000 ):
        full.extend(batch)
        batch = []
        print( len(full), end='\r')
        
    if ( len(full) > 1000000):
        break

# Analysis

In [None]:
# The output results of the first million training examples
train_data = pd.DataFrame(full)
train_data

In [None]:
# Most questions got slightly negative ratings
plt.hist( train_data['question_rating'] )

In [None]:
# Most students got very little rating change
plt.hist( train_data['student_rating'] )

In [None]:
#The Range of predictions by the model
plt.hist(train_data['prediction'])

In [None]:
# Questions seem to migrate along paths, thats kindof interesting. Could explore more why that happens
plt.scatter(train_data['question_samples'],train_data['question_rating'] )

In [None]:
# See how our predictions line up with the actual student scores
train_data['prediction_bucket'] = train_data['prediction'].round(2)
train_sample = train_data[['prediction_bucket', 'correct']].groupby('prediction_bucket').mean()
train_sample

In [None]:
# We seem to predict actual student scores shockingly well
plt.plot(np.arange(0.2,1,0.01),np.arange(0.2,1,0.01), color='r')
plt.scatter(train_sample.index, train_sample['correct'])

In [None]:
# Do rolling averages to see what we are really predicting
train_data['score'] = train_data['prediction'].round(0)
train_data['accuracy'] = (train_data['score'] == train_data['correct']) * 1
train_data['accuracy_rolling'] = train_data['accuracy'].rolling(window=5000).mean()
train_data['correct_rolling'] = train_data['correct'].rolling(window=5000).mean()
train_data

In [None]:
# And the model ultimantly is just predicting the average student scores as the prediction. :( sad
plt.figure(figsize=(20,10))

plt.plot(train_data['accuracy_rolling'], label='model_prediction')
plt.plot(train_data['correct_rolling'], label='student_correctness')
plt.legend()

In [None]:
# How good are our predictions
train_data['accuracy'].mean()

In [None]:
# How good the students did
train_data['correct'].mean()

In [None]:
# Overall, kinda bad, would love feedback tho