In [645]:
# ELO implementation with explanatory comments

In [646]:
from datetime import datetime
import json
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import math
import csv
import os

In [647]:
# Load csv file 

#dataEvents = pd.read_csv('C:\\Users\\struk\\Downloads\\Knowledge Inference & Adaptive Learning\\paper\\anonymized_dataset.csv', sep=";")

dataEvents = pd.read_csv('C:\\Users\\struk\\Downloads\\Knowledge Inference & Adaptive Learning\\paper\\anonamyze_all_data_collection.csv', sep=";")

In [648]:
# Name and number of the columns of the input file
student_id = 'user'
timestamp = 'initial timestamp'
student_column_number = 1
group_column_number = 0
completed = 'n_completed'
puzzle_name = 'task_id'
puzzle_column_number = 2
kc_column = 'kc'
kc_column_number = 4

# Different Knowledge components
kcs = ['MIX']

# Puzzle by KC
mixPuzzles = ['Tall and Small', 'Ramp Up and Can It', '6. Stretch a Ramp', '7. Max 2 Boxes', '45-Degree Rotations', 'Boxes Obscure Spheres', 'More Than Meets Your Eye', 'Angled Silhouette', 'Not Bird', 'Stranger Shapes', 'Few Clues', 'Bird Fez', 'Pi Henge', 'Bull Market', '1. One Box', '2. Separated Boxes', '3. Rotate a Pyramid', '4. Match Silhouettes', '5. Removing Objects', '8. Combine 2 Ramps', '9. Scaling Round Objects', 'Square Cross-Sections', 'Pyramids are Strange', 'Object Limits', 'Square Cross-Sections', 'Pyramids are Strange', 'Object Limits', 'Tetromino', 'Warm Up', 'Sugar Cones', 'Unnecessary', 'Zzz', 'Orange Dance', 'Bear Market']

In [649]:
# Puzzle and component mapping 
typeMappingKC = {'1. One Box': 'MIX', '2. Separated Boxes': 'MIX', '3. Rotate a Pyramid': 'MIX', '4. Match Silhouettes': 'MIX', '5. Removing Objects': 'MIX', '6. Stretch a Ramp': 'MIX', '7. Max 2 Boxes': 'MIX', '8. Combine 2 Ramps': 'MIX', '9. Scaling Round Objects': 'MIX', 
               'Square Cross-Sections': 'MIX', 'Bird Fez': 'MIX', 'Pi Henge': 'MIX', '45-Degree Rotations': 'MIX',  'Pyramids are Strange': 'MIX', 'Boxes Obscure Spheres': 'MIX', 'Object Limits': 'MIX', 'Tetromino': 'MIX', 'Warm Up': 'MIX', 'Angled Silhouette': 'MIX','Sugar Cones': 'MIX', 'Stranger Shapes': 'MIX', 'Tall and Small': 'MIX', 'Ramp Up and Can It': 'MIX', 'More Than Meets Your Eye': 'MIX', 'Not Bird': 'MIX', 'Unnecessary': 'MIX', 'Zzz': 'MIX', 'Bull Market': 'MIX', 'Few Clues': 'MIX', 'Orange Dance': 'MIX', 'Bear Market': 'MIX'}

# Preparation data function
def adaptedData(dataEvents, group = 'all'):
    
    # Sort events by time
    dataEvents['time'] = pd.to_datetime(dataEvents['time'])
    dataEvents = dataEvents.sort_values('time')
    
    #iterates in the groups and users of the data
    dataEvents['group'] = [json.loads(x)['group'] if 'group' in json.loads(x).keys() else '' for x in dataEvents['data']]
    dataEvents['user'] = [json.loads(x)['user'] if 'user' in json.loads(x).keys() else '' for x in dataEvents['data']]
    dataEvents['task_id'] = [json.loads(x)['task_id'] if 'task_id' in json.loads(x).keys() else '' for x in dataEvents['data']]
    
    # removing those rows where we dont have a group and a user that is not guest
    dataEvents = dataEvents[((dataEvents['group'] != '') & (dataEvents['user'] != '') & (dataEvents['user'] != 'guest'))]
    dataEvents['group_user_id'] = dataEvents['group'] + '~' + dataEvents['user']
    dataEvents['group_user_task_id'] = dataEvents['group'] + '~' + dataEvents['user']+'~'+dataEvents['task_id']

         
    # filtering to only take the group passed as argument
    activity_by_user = dataEvents.groupby(['group_user_id']).agg({'id':'count',
                                             'type':'nunique'}).reset_index().rename(columns={'id':'events',
                                                                                              'type':'different_events'}) 
    
    
                                                                                              
    #initialize the output metrics          
    activity_by_user['active_time'] = np.nan
    activity_by_user['n_completed'] = 0
    activity_by_user['kc'] = ''
    
    # Number of user events per puzzle
    puzzleEvents = dict()

    # If the user has completed the puzzle, puzzCom = 1, if not puzzCom = 0
    puzzCom= dict()
    
    # Save the competences by puzzle
    puzzDestr = dict()
    
    # Number of attempts
    n_attempts = dict()
    # Data structure with characteristics per puzzle
    attData = dict()
    
    # If the user has started the puzzle, userPuzzleInit = 1, if not userPuzzleInit = 0
    userPuzzleInit = dict()
    n_attemptsAux = dict()
    
    # Separation of users for train and test
    userTrain = set()
    userTest = set()
    userTotal = set()
    
    # Loop by users
    for user in dataEvents['group_user_id'].unique():
        
        # Computing active time
        previousEvent = None
        # Activity threshold in seconds 
        theresHoldActivity = 60 
        activeTime = []
        
        user_events = dataEvents[dataEvents['group_user_id'] == user]
        user_puzzle_key = None

        # Loop by events
        for enum, event in user_events.iterrows():
            
            # Started events
            if(event['type'] in ['ws-start_level', 'ws-puzzle_started']):
                
                # Delete Sandbox
                if(json.loads(event['data'])['task_id'] == 'Sandbox'): continue
                
                # key with user and puzzle
                partialKey = event['group'] + '~' + event['user'] + '~' + json.loads(event['data'])['task_id']
                
                # Initialize structure with all users
                if(event['user'] not in userTotal):
                    userTotal.add(event['user'])
                
                # Initialize data structures with partial key
                if(partialKey not in n_attemptsAux.keys()): 
                    n_attemptsAux[partialKey] = 0
                    puzzCom[partialKey] = 0
                    
                # Initialize data structures with partial key. Register the first event   
                if(partialKey not in userPuzzleInit.keys()): 
                    
                    # First attempt
                    n_attempts[partialKey] = 1
                    
                    # Complete key: group+user+puzzle+attempt
                    user_puzzle_key = event['group'] + '~' + event['user'] + '~' + json.loads(event['data'])['task_id'] + '~' + str(n_attempts[partialKey])
                    
                    # The user starts the puzzle
                    userPuzzleInit[partialKey] = 1
                
                # Register the event and update the key
                else: 
                    
                    # New event
                    n_attempts[partialKey] += 1
                    
                    # Complete key: group+user+puzzle+attempt
                    user_puzzle_key = event['group'] + '~' + event['user'] + '~' + json.loads(event['data'])['task_id'] + '~' + str(n_attempts[partialKey])
                    
            
                # initialize if the id is new                                                                              
                if(user_puzzle_key not in puzzleEvents.keys()):
                    
                    # Initialize:
                    # att: Validate attempts
                    # Completed: If the users complete the puzzle
                    # dataCompleted: if the user data is complete
                    # accept: The user has checked the puzzle
                    # timestamp: Time 
                    # repeat: The user returns to the puzzle after completion
                    attData[user_puzzle_key] = {'att': 0, 'completed': 0,'dataCompleted': 0, 'accept': 0, 'timestamp': event['time'], 'repeat':0}
                    puzzleEvents[user_puzzle_key]= 1
                    puzzDestr[user_puzzle_key] = ''
                    #initialTime[user_puzzle_key] = 0
                                        
                # Time of first event    
                if(event['type'] in ['ws-puzzle_started']): 
                    attData[user_puzzle_key]['timestamp'] = event['time']
                    
            # the event is not final event
            if(event['type'] not in ['ws-exit_to_menu', 'ws-puzzle_complete', 'ws-create_user', 'ws-login_user']): 
                # the user is not new
                if(user_puzzle_key in puzzleEvents.keys()):
                    # Increase the events counter
                    puzzleEvents[user_puzzle_key] += 1
                    splitDes = user_puzzle_key.split("~")
                    # Records the puzzle knowledge components
                    puzzDestr[user_puzzle_key] = typeMappingKC[splitDes[2]] 
                    # Accept flag = 1 if the user checks the solution at least once
                    if(event['type'] == 'ws-check_solution'):
                        attData[user_puzzle_key]['accept'] = 1
                        
                        
                       
                        
            # the puzzle ends        
            if(event['type'] in ['ws-exit_to_menu', 'ws-puzzle_complete', 'ws-disconnect']):
                # the user is not new
                if(user_puzzle_key in puzzleEvents.keys()):
                    #the data is consistent
                    attData[user_puzzle_key]['dataCompleted'] += 1
                    #the data is valid
                    if(attData[user_puzzle_key]['accept'] == 1 and attData[user_puzzle_key]['dataCompleted']==1):
                        # Increase the attempts count
                        n_attemptsAux[partialKey]+=1
                        # Record the attempt number
                        attData[user_puzzle_key]['att'] = n_attemptsAux[partialKey]
                        #attempt after solving
                        if(event['type'] in ['ws-puzzle_complete']):
                            # Flag repeat = 1 if the user accesses the puzzle after solving it
                            if(puzzCom[partialKey] !=0 and n_attemptsAux[partialKey] > 1):
                                attData[user_puzzle_key]['repeat'] = 1
                    # If the user solves the puzzle the first time
                    if(event['type'] in ['ws-puzzle_complete']):
                        if(puzzCom[partialKey] ==0):
                            attData[user_puzzle_key]['completed'] = 1
                            if(attData[user_puzzle_key]['accept'] == 1):
                                puzzCom[partialKey] +=1
    
    # add the data by group_user_task_id            
    for i in attData.keys(): 
        key_split = i.split('~')
        # Train and test users
        if(len(userTrain) < round(len(userTotal)*0.7)):
            userTrain.add(key_split[1])
        else: 
            if(key_split[1] not in userTrain): userTest.add(key_split[1])    
        
        # Data Output preparation
        if(key_split[2] != '' and key_split[2] != 'Sandbox' and key_split[3] != '' and i != '' and key_split[1] != ''):
            # Register the data if the nexts flags == 1
            if(attData[i]['accept'] != 0 and attData[i]['dataCompleted'] != 0 and attData[i]['repeat'] == 0):
                activity_by_user.at[i, 'group_user_task_att'] = key_split[0] + '~' + key_split[1] + '~' + key_split[2] + '~' + str(attData[i]['att'])
                activity_by_user.at[i, 'group'] = key_split[0]
                activity_by_user.at[i, 'user'] = key_split[1]
                activity_by_user.at[i, 'task_id'] = key_split[2]
                activity_by_user.at[i, 'attempt'] = attData[i]['att']
                activity_by_user.at[i, 'repeat'] = attData[i]['repeat']
                activity_by_user.at[i, 'kc'] = puzzDestr[i]
                activity_by_user.at[i, 'n_completed'] = attData[i]['completed']
                activity_by_user.at[i, 'initial timestamp'] = attData[i]['timestamp']
    
    #delete row with NaN
    activity_by_user.dropna(subset = ['user'], inplace=True)
    
    #data output preparation             
    activity_by_user = pd.DataFrame(activity_by_user, columns = ['group_user_task_att', 'group','user','task_id','n_completed', 'kc', 'initial timestamp'])
    
    # Train and Test preparation per users
    train = activity_by_user[activity_by_user['user'].isin(userTrain)]
    test = activity_by_user[activity_by_user['user'].isin(userTest)]
    
    return activity_by_user, train, test

In [650]:
# Dict users: uDict
def usersDict(datafile):
    csv_file = datafile
    mapUsers = {}
    mapGroups = {}
    cont =0
    puzzles_count = dict()
    
    # Iterates and register users and groups
    for row in csv_file.iterrows():
        user = row[1]['user']
        group = row[1]['group']
        puzzle = row[1]['task_id']
        if puzzle not in puzzles_count:
            puzzles_count[puzzle] = 1
        else:
            puzzles_count[puzzle]+= 1
        if user not in mapUsers.keys():
            mapUsers[user]=cont
            # Group that contains the user
            mapGroups[user] = group
            cont = cont+1
    print(puzzles_count)
    return mapUsers, mapGroups  


# Dict puzzles: qDict
def puzzlesDict(datafile):
    csv_file = datafile
    mapPuzzles = {}
    cont =0
    # Iterates and register the puzzles
    for row in csv_file.iterrows():
        question = row[1]['task_id']
        if question not in mapPuzzles.keys():
            mapPuzzles[question]=cont
            cont = cont+1
    return mapPuzzles



# Dict kcs: kcDict 
def kcsDict(datafile):
    QT = []
    csv_file = datafile
    mapKc = {}
    cont =0
    # Iterates and register the kcs
    for row in csv_file.iterrows():
        tags = row[1]['kc'] 
        if tags:
            tag = tags.split("~")
            for topics in tag:
                if topics not in mapKc.keys():
                    mapKc[topics]=cont
                    cont = cont + 1
    return mapKc

# Weight of the knowledge component in each puzzle
def createKcDict(datafile):
    
    QTMat = dict()
    csv_file = datafile
    for row in csv_file.iterrows():
        qid = row[1]['task_id']
        kcs = row[1]['kc']
        if(qid not in QTMat.keys()):
            QTMat[qid]=dict()
        if kcs:
            kc = kcs.split("~")
            for k in kc:
                QTMat[qid][k] =0


    for puzzle in QTMat.keys():
        tam = len(QTMat[puzzle])
        if tam>0:           
            if(puzzle in mixPuzzles):  
                QTMat[puzzle]['MIX'] = 1
                for x in QTMat[puzzle].keys():
                    if(x != 'MIX'):
                        QTMat[puzzle][x] = 0
            else:
                for x in QTMat[puzzle].keys():
                    QTMat[puzzle][x] = 1
    return QTMat

# Call the functions
def loadDataset(datafile):
    uDict, gDict = usersDict(datafile) 
    qDict =puzzlesDict(datafile)
    kcDict =kcsDict(datafile)
    kcsPuzzleDict =  createKcDict(datafile) 

    return uDict, gDict,qDict,kcDict, kcsPuzzleDict

In [651]:
def rmseFunction(prob, ans, lenProb):
    prob = np.array(prob)
    ground = np.array(ans)
    error = (prob - ans) 
    err_sqr = error*error
    rmse = math.sqrt(err_sqr.sum()/lenProb)
    return rmse  

#Obtener un valor de accuracy basado en las predicciones de los modelos y las respuestas reales
def accuracyFunction(ans, prob): 
    ans = np.array(ans)
    prob = np.array(prob)
    prob[prob >= 0.5] = 1
    prob[prob < 0.5] = 0
    acc = metrics.accuracy_score(ans, prob)
    return acc

def auc_roc(y, pred): 
    y = np.array(y)
    pred = np.array(pred)
    fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=1)
    auc = metrics.auc(fpr, tpr) 
    return auc

In [652]:
def multiTopic_ELO(inputData, Competency, Diff, A_count, Q_count, kcsPuzzleDict ,gDict,gamma, beta): 

    alpha = 1
    alpha_denominator = 0
    correct = 0
    prob_test = dict()
    ans_test = dict()  
    userPuzzles = dict()
    
    completedPartialData = dict()

    response = np.zeros((len(inputData), 1))
    
    # Main loop for input data
    for count, (index, item) in enumerate(inputData.iterrows()):
        
        alpha_denominator = 0
        # student id
        uid = item[student_id] 
        # Puzzle name
        qid = item[puzzle_name] 
        # initial time stamp
        time = item[timestamp]
            
        # Initialize users structure    
        if(uid not in userPuzzles.keys()): userPuzzles[uid] = []
        userPuzzles[uid].append(qid)
        
        diff = dict()
        diff[qid]=[]
        comp= dict()
        comp[uid]=[]
        
        # The student's current competence by component is multiplied by each component of the question he or she is facing. 
        # Same method for difficulty
        for k in kcsPuzzleDict[qid]:
            comp[uid].append(Competency[uid][k] * kcsPuzzleDict[qid][k])
            diff[qid].append(Diff[qid][k] * kcsPuzzleDict[qid][k])
            
        # Adding up the competencies per component to obtain the global competence    
        # Same method for difficulty
        compTotal = np.sum(comp[uid])
        diffTotal = np.sum(diff[qid])
        
        # With the global competition and the difficulty of the question, the probability of solving it is calculated
        probability = (1)/(1 + math.exp( -1 * (compTotal - diffTotal)))
        
        # Initialize probability test structure
        if(uid not in prob_test.keys()):
            prob_test[uid] = dict()
        
        # Save the probabilities
        prob_test[uid][qid]=probability
        
        # Answered puzzles count
        q_answered_count = Q_count[qid] 
        
        # The puzzle is completed or no
        if item[completed] == 1:

            response[count] = 1
            correct = 1
        else:
            response[count] = 0
            correct = 0
            
        # Initialize answers test structure
        if(uid not in ans_test.keys()):
            ans_test[uid] = dict()
            
        # Save the real result    
        ans_test[uid][qid] = correct 
                         
        
        #Alpha component is calculated (normalization factor)
        alpha_numerator = probability - correct
        for k in kcsPuzzleDict[qid]:
            # Competency
            c_lambda = Competency[uid][k]
            # Probability
            probability_lambda = (1)/(1 + math.exp( -1 * (c_lambda - Diff[qid][k])))
            alpha_denominator = alpha_denominator + (correct - probability_lambda)
        alpha = abs(alpha_numerator / alpha_denominator)

        
        # Increase question and answer count
        Q_count[qid] += 1
        A_count[uid] += 1
        for k in kcsPuzzleDict[qid]:
            
            u_answered_count = A_count[uid]
            c = Competency[uid][k] 
            prevDiff = Diff[qid][k]
            
            # New key
            key = uid+'~'+qid+'~'+k+'~'+str(round(Competency[uid][k],3)) + '~'+str(round(prevDiff,3))
            
            # Competency probability is calculated
            probability = (1)/(1 + math.exp( -1 * (Competency[uid][k] - prevDiff)))
            
            # Update the difficulty
            changeDiff = ((gamma)/(1 + beta * q_answered_count)) * alpha * (probability - correct)
            Diff[qid][k] = Diff[qid][k] + kcsPuzzleDict[qid][k] * changeDiff
                        
            # Update the competency
            Competency[uid][k] = Competency[uid][k]+kcsPuzzleDict[qid][k] * (gamma)/(1 + beta * u_answered_count) * alpha * (correct - probability)
            
            # Save the new data
            completedPartialData[key] = {'prob': 0, 'kcs importance': 0, 'correct': -1, 'Difficulty': 0, 'Group Difficulty': 0, 'update competency': 0}
            completedPartialData[key]['prob'] = probability
            completedPartialData[key]['kcs importance'] = kcsPuzzleDict[qid][k]
            completedPartialData[key]['correct'] = correct
            completedPartialData[key]['Difficulty'] = round(Diff[qid][k],3)
            completedPartialData[key]['timestamp'] = time
            completedPartialData[key]['changeComp'] = kcsPuzzleDict[qid][k] * (gamma)/(1 + beta * u_answered_count) * alpha * (correct - probability)
            completedPartialData[key]['changeDiff'] = kcsPuzzleDict[qid][k] * changeDiff
            
                
    return Competency, Diff, A_count , Q_count, prob_test, ans_test, userPuzzles, completedPartialData

In [653]:
def run(output, gamma, beta):
    
    # Data input
    totalData, train_set, test_set = adaptedData(dataEvents)
    # Data structures
    uDict,gDict,qDict,kcDict,kcsPuzzleDict = loadDataset(totalData)

    competency_ELO = pd.DataFrame()
    difficulty_ELO = pd.DataFrame()
                                                                                              
    #initialize the metrics       
    difficulty_ELO['group'] = ''
    difficulty_ELO['task_id'] = ''
    difficulty_ELO['difficulty'] = np.nan
    competency_ELO['group'] = ''
    competency_ELO['user'] = ''
    competency_ELO['kc'] = ''
    competency_ELO['competency'] = np.nan
    
    
    # Initialize idComplet with the key (group + user + kcs)
    idComplet = dict()
    for g in gDict.values():
        for u in gDict.keys():
            for k in kcs:
                iCom = g+'~'+u+'~'+k
                idComplet[iCom] = 0
    
    
    
    if(output == 'metrics'):
        
        question_counter_Model = dict() 
        for q in qDict.keys():
            if(q not in question_counter_Model.keys()):
                question_counter_Model[q]=dict()
                question_counter_Model[q]=0
 


        learner_competency_Model = dict() 
        response_counter_Model = dict()
        for user in uDict.keys():
            if(user not in learner_competency_Model.keys()):
                learner_competency_Model[user]=dict()
                response_counter_Model[user]=dict()
                response_counter_Model[user]=0
            for k in kcDict.keys():
                learner_competency_Model[user][k]=0
                
                
        question_difficulty = dict() 
        question_counter = dict() 
        concatedGroupTask = dict()
        
        # Initialize the question difficulty structure
        for q in qDict.keys():
            if(q not in question_difficulty.keys()):
                question_difficulty[q]=dict()
                question_counter[q]=dict()
                question_counter[q]=0
            for k in kcDict.keys():
                question_difficulty[q][k]=0          

        learner_competency_train, question_difficulty_train,response_counter_train, question_counter_train, prob_train, ans_train,userPuzzles, completedPartialData = multiTopic_ELO(train_set, learner_competency_Model,question_difficulty, response_counter_Model, question_counter_Model, kcsPuzzleDict,gDict,gamma, beta)
        
        def new_df_puzzles(test_set, puzzle):
            test_set_new = []
            for index, row in test_set.iterrows():
                if row['task_id'] == puzzle:
                    test_set_new.append([row["group_user_task_att"], row["group"], row["user"], row["task_id"], row["n_completed"], row["kc"], row["initial timestamp"]])
            return pd.DataFrame(test_set_new, columns = ['group_user_task_att', 'group', 'user', 'task_id', 'n_completed', 'kc', 'initial timestamp'])

        users_all = dict()
        
        def record_users(test_set):
            for index, row in test_set.iterrows():
                if row['user'] not in users_all:
                    users_all[row["user"]] = 1
                else:
                    users_all[row["user"]] += 1
            return users_all
        
        def new_df_users(test_set, user):
            test_set_new = []
            for index, row in test_set.iterrows():
                if row['user'] == user:
                    #print('CORRECT')
                    #print(puzzle)
                    test_set_new.append([row["group_user_task_att"], row["group"], row["user"], row["task_id"], row["n_completed"], row["kc"], row["initial timestamp"]])
                #else:
                    #print('ERROR')
                    #print(row['task_id'])
            return pd.DataFrame(test_set_new, columns = ['group_user_task_att', 'group', 'user', 'task_id', 'n_completed', 'kc', 'initial timestamp'])

        
        users_all = record_users(test_set)
        
        acc_per_user = dict()
        acc_per_puzzle = dict()
    
        for user in users_all:
            test_set2 = new_df_users(test_set, user)
        
        #for puzzle in mixPuzzles:
            #test_set2 = new_df_puzzles(test_set, puzzle)
            
            learner_competency_test, question_difficulty_test,response_counter_test, question_counter_test, prob_test, ans_test,userPuzzles, completedPartialData = multiTopic_ELO(test_set2, learner_competency_train,question_difficulty_train, response_counter_train, question_counter_train, kcsPuzzleDict,gDict,gamma, beta)

            
            # Quality metrics
            group_prob_test = []
            contUser =0
            contT = 0
            for user in prob_test.keys():
                contUser+=1
                for task in prob_test[user].keys():
                    contT+=1
                    group_prob_test.append(prob_test[user][task])

            group_ans_test = []
            for user in ans_test.keys():
                for task in ans_test[user].keys():
                    group_ans_test.append(ans_test[user][task])        


            accuracy = accuracyFunction(group_ans_test, group_prob_test)    
            #auc = auc_roc(group_ans_test, group_prob_test)
            #rmse = rmseFunction(group_prob_test, group_ans_test, len(group_prob_test))
            #round_group_prob_test = [round(num) for num in group_prob_test]
            #f1 = f1_score(group_ans_test, round_group_prob_test, average='binary')
        
            acc_per_user[user]=accuracy
            #acc_per_puzzle[puzzle]= accuracy
            
        print('____')
        print(acc_per_user)
        
        return acc_per_user#, acc_per_puzzle#, accuracy, auc, rmse, f1, 
        
    

    if(output == 'multiTopic'):
        
        question_difficulty = dict() 
        question_counter = dict() 
        concatedGroupTask = dict()
        
        # Initialize the question difficulty structure
        for q in qDict.keys():
            if(q not in question_difficulty.keys()):
                question_difficulty[q]=dict()
                question_counter[q]=dict()
                question_counter[q]=0
            for k in kcDict.keys():
                question_difficulty[q][k]=0    
        
        # Initialize the learner competency structure
        learner_competency = dict()  
        response_counter = dict() 
        for user in uDict.keys():
            if(user not in learner_competency.keys()):
                learner_competency[user]=dict()
                response_counter[user]=dict()
                response_counter[user]=0
            for k in kcDict.keys():
                learner_competency[user][k]=0

        # Multi-ELO algorithm
        learner_competency_train, question_difficulty_train, response_counter_train, question_counter_train, prob_train, ans_train, userPuzzles, completedPartialData = multiTopic_ELO(totalData, learner_competency, question_difficulty, response_counter, question_counter, kcsPuzzleDict,gDict,gamma, beta)

    totalCompetencyGMD = []
    totalCompetencyCO5 = []
    totalCompetencyCO6 = []
    totalCompetencyMG1 = []
    totalCompetencyMIX = []

    # Records the competences by KC
    for user in learner_competency.keys():
        for x in learner_competency[user]:
            if(x == 'MIX'):
                totalCompetencyMIX.append(learner_competency[user][x]) 
    
    # Min and max competence by KC
    
    minCompetencyMIX = min(totalCompetencyMIX)   
    maxCompetencyMIX = max(totalCompetencyMIX)
    
    # Normalized the competency by kc
    normalized_learner_competency = dict()
    normalized_global_competency = dict()
    for user in learner_competency.keys():
        normalized_learner_competency[user]=dict()
        normalized_global_competency[user] = 0
        for x in learner_competency[user]:     
            if(x == 'MIX'):
                normalized_learner_competency[user][x]= (learner_competency[user][x]- minCompetencyMIX)/(maxCompetencyMIX-minCompetencyMIX)
                normalized_global_competency[user] += normalized_learner_competency[user][x]
                
    # Global normalization        
    for user in normalized_global_competency.keys():
        normalized_global_competency[user] = normalized_global_competency[user]/len(kcs)
        
    
    # Normalization Difficulty    
    totalDiffGMD = []
    totalDiffCO5 = []
    totalDiffCO6 = []
    totalDiffMG1 = []
    totalDiffMIX = []
        
    # Records the difficulty by KC
    for puzzle in question_difficulty.keys():
        for x in question_difficulty[puzzle]:
            if(x == 'MIX'):
                totalDiffMIX.append(question_difficulty[puzzle][x]) 
    
    # Min and max difficulty by KC
    
    minDiffMIX = min(totalDiffMIX)   
    maxDiffMIX = max(totalDiffMIX)
    
    normalized_question_difficulty = dict()
    
    # Normalized the difficulty by kc
    for puzzle in question_difficulty.keys():
        normalized_question_difficulty[puzzle]=dict()
        for x in question_difficulty[puzzle]:
            if(x == 'MIX'):
                normalized_question_difficulty[puzzle][x]= (question_difficulty[puzzle][x]- minDiffMIX)/(maxDiffMIX-minDiffMIX)
        
    #group_prob_test = []
    #for user in prob_test.keys():
    #    for task in prob_test[user].keys():
    #        group_prob_test.append(prob_test[user][task])
            
    #group_ans_test = []
    #for user in ans_test.keys():
    #    for task in ans_test[user].keys():
    #        group_ans_test.append(ans_test[user][task])        
                   
    #rmse = rmseFunction(group_prob_test, group_ans_test, len(group_prob_test))
    #auc = auc_roc(group_ans_test, group_prob_test)
    #accuracy = accuracyFunction(group_ans_test, group_prob_test)
    
    # Data output preparation            
    for i in completedPartialData.keys():
        key_split = i.split('~')
        competency_ELO.at[i, 'group'] = gDict[key_split[0]]    
        competency_ELO.at[i, 'user'] = key_split[0] 
        competency_ELO.at[i, 'task_id'] = key_split[1]
        competency_ELO.at[i, 'kc'] = key_split[2]
        # Normalized competency by kc
        competency_ELO.at[i, 'final_kc_competency'] = round(normalized_learner_competency[key_split[0]][key_split[2]],3)
        # Normalized average competency
        competency_ELO.at[i, 'final_global_competency'] = round(normalized_global_competency[key_split[0]],3)
        # Current competency
        competency_ELO.at[i, 'current_competency'] = key_split[3]
        # Probability
        competency_ELO.at[i, 'probability'] = round(completedPartialData[i]['prob'],3)
        # If the puzzle is completed
        competency_ELO.at[i, 'correct'] = completedPartialData[i]['correct']
        # KC weight
        competency_ELO.at[i, 'kcs_importance'] = round(completedPartialData[i]['kcs importance'],3)
        # Difficulty
        competency_ELO.at[i, 'difficulty'] = key_split[4]
        # Time stamp
        competency_ELO.at[i, 'timestamp'] = completedPartialData[i]['timestamp']
        # Update competency
        competency_ELO.at[i, 'change_competency'] = round(completedPartialData[i]['changeComp'],3)
        # Update difficulty
        competency_ELO.at[i, 'change_difficulty'] = round(completedPartialData[i]['changeDiff'],3)
           
    #data output preparation  
    difficulty_ELO = pd.DataFrame(difficulty_ELO, columns = ['group','task_id', 'difficulty'])
    competency_ELO = pd.DataFrame(competency_ELO, columns = ['group','user','task_id', 'timestamp','kc','kcs_importance','final_kc_competency', 'final_global_competency','current_competency','change_competency', 'probability', 'correct', 'difficulty', 'change_difficulty'])
    
    return competency_ELO#, rmse, accuracy, auc

In [654]:
#competency_ELO = run('multiTopic',1.8, 0.05)
acc_per_puzzle = run('metrics', 1.8, 0.05)

{'1. One Box': 269, '2. Separated Boxes': 259, '3. Rotate a Pyramid': 255, '4. Match Silhouettes': 252, 'Sugar Cones': 119, '8. Combine 2 Ramps': 201, '9. Scaling Round Objects': 195, 'Square Cross-Sections': 186, 'Bird Fez': 183, 'Pi Henge': 160, '45-Degree Rotations': 148, 'Pyramids are Strange': 152, 'Boxes Obscure Spheres': 164, 'Object Limits': 152, 'Tetromino': 11, 'Angled Silhouette': 122, 'Stranger Shapes': 128, 'Tall and Small': 100, '5. Removing Objects': 241, '6. Stretch a Ramp': 228, '7. Max 2 Boxes': 227, 'Ramp Up and Can It': 84, 'More Than Meets Your Eye': 78, 'Bear Market': 38, 'Not Bird': 91, 'Warm Up': 116, 'Unnecessary': 70, 'Zzz': 66, 'Bull Market': 54, 'Few Clues': 38, 'Orange Dance': 24}
____
{'953ea80f89aa793f2d8a17d212646301': 0.9545454545454546, '62201eb12dcba7f781f1f2e75482e1ef': 0.9375, 'bf0a784fe1efa01dc5a2e1ccd1f731fc': 0.8888888888888888, 'fb039fa921dd55d1b7eb210d8a5f75fc': 1.0, 'a86925711409ca9c76b1f1c1ef43d281': 1.0, 'cc9b0177c4af699f8c39c9f7126fa4a4': 1

In [655]:
acc

0.7727272727272727

In [656]:
auc

0.075

In [657]:
f1

0.8717948717948718