In [None]:
#######################################################################################
#   Title: algo-tests.ipynb
#   Authors: Nadia Goralski, Stacey Koornneef
#   Code used to test accuracy of different KNN algorithms in Surprise
#   
#   References: 
#   https://surprise.readthedocs.io/en/stable/getting_started.html
#######################################################################################

import numpy as np
import pandas as pd
from sklearn.metrics import balanced_accuracy_score
from surprise import Dataset, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, Reader
from tabulate import tabulate


# read user data
survey = pd.read_csv('./survey_result.csv')
df = pd.read_csv('./random.csv')
# rating scale for case difficulty
reader = Reader(rating_scale=(0, 4))

# load data from dataframe
data = Dataset.load_from_df(df[["survey_result_id", "errors", "adjust_level"]], reader) 

options_dict = {"name": "cosine", "user_based": False}

# different algorithms to test
knn_baseline = KNNBaseline(sim_options=options_dict)
knn_basic = KNNBasic(sim_options=options_dict)
knn_means = KNNWithMeans(sim_options=options_dict)
knn_zscore = KNNWithZScore(sim_options=options_dict)

# put algorithms in a dictionary with their name
algs = [ 
    {"name": "KNNBaseline", "obj": knn_baseline},
    {"name": "KNNBasic", "obj": knn_basic},
    {"name": "KNNWithMeans", "obj": knn_means},
    {"name": "KNNWithZScore", "obj": knn_zscore}
]

# error counts to test
NO_ERROR = 0
LOW_ERROR = 2
HIGH_ERROR = 5

# adjust level function for manual adjusting
def adjust_level(level, error_count):
    if error_count > 2 and level > 0:
        return level - 1
    return level


predictions = []
trainset = data.build_full_trainset()

# iterate through each survey result
for index, row in survey.iterrows():

    # iterate through each alg
    for i, alg in enumerate(algs):
        id = row['id']
        knn = alg['obj']
        knn.fit(trainset)

        prediction_none = knn.predict(id, NO_ERROR)
        prediction_low = knn.predict(id, LOW_ERROR)
        prediction_high = knn.predict(id, HIGH_ERROR)
        
        #print(row)
        predict =[id,
                  alg['name'],
                  row['level'],
                  prediction_none.est,
                  prediction_low.est,
                  prediction_high.est,
                  ]
        # print(predict)          
        predictions.append(predict)






In [None]:

# ##################################
# Comparison Tables 
# per Algorithm & Prediction Type
# ##################################

prediction_types = [[0, 'Prediction (No Errors)'], [2, 'Prediction (Low Errors)'], [5, 'Prediction (High Errors)']]
table_header = ['Survey ID',  'Start Level']


for a, alg in enumerate(algs):
    name = alg['name']
   
    
    for pt, ptype in enumerate(prediction_types):
        error_count = ptype[0]
        typename = ptype[1]

        manual_vals = []
        predict_vals = []
    
        
        print(name, '-', typename, '\n==================================================================')
        th = table_header
        th.append('Manual Suggested')
        th.append('Predicted')
        

        table = [table_header]
        algo_predictions = [p for p in predictions if p[1] == name]
        #print(algo_predictions)
        for algp in algo_predictions:
            level = algp[2]
            manual_adjust_level = adjust_level(level, error_count)
            manual_vals.append(manual_adjust_level)
            predict_val = algp[pt+3]
            predict_vals.append(predict_val)
            table.append([
                algp[0],
                algp[2],
                manual_adjust_level,
                predict_val,
            ])
        tab = tabulate(table, headers='firstrow', tablefmt='grid')
        print(tab)
        # predict_vals_round = np.round(np.array(predict_vals)) 
        # accuracy = balanced_accuracy_score(np.array(manual_vals), predict_vals_round)
        # if (accuracy <= 0.5):
        #     print(np.array(manual_vals))
        #     print(predict_vals_round)
        # print("Accuracy:",accuracy)
    


In [None]:

# ##################################
# Accuracy Table
# ##################################
prediction_types = [[0, 'Prediction (No Errors)'], [2, 'Prediction (Low Errors)'], [5, 'Prediction (High Errors)']]
table_header = ['Algo', 'Prediction (No Errors) Acc',  'Prediction (Low Errors) Acc', 'Prediction (High Errors) Acc']

table = [table_header]
for a, alg in enumerate(algs):
    name = alg['name']
 
    accuracies = []
    for pt, ptype in enumerate(prediction_types):
        error_count = ptype[0]
        typename = ptype[1]

        manual_vals = []
        predict_vals = []
    
        
        # print(name, '-', typename, '\n==================================================================')
        th = table_header
        th.append(ptype)
    

        
        algo_predictions = [p for p in predictions if p[1] == name]
        #print(algo_predictions)
        for algp in algo_predictions:
            level = algp[2]
            manual_adjust_level = adjust_level(level, error_count)
            manual_vals.append(manual_adjust_level)
            predict_val = algp[pt+3]
            predict_vals.append(predict_val)

        # round to get closest level
        predict_vals_round = np.round(np.array(predict_vals)) 
        accuracy = balanced_accuracy_score(np.array(manual_vals), predict_vals_round)
        accuracies.append(accuracy)
        # print("Accuracy:",accuracy)
    table.append([
        name, 
        accuracies[0],
        accuracies[1],
        accuracies[2],
    ])

tab = tabulate(table, headers='firstrow')
print(tab)
    
        