In [1]:
import json
import time

In [2]:
with open("train_item.json") as training:
    training = json.loads(training.read())
    
with open("test_item.json") as testing:
    testing = json.loads(testing.read())

In [3]:
training_dict = {}
testing_dict = {}

for user in training.keys():
    for [item, rating] in training[user]:
        if user in training_dict:
            training_dict[user][item] = rating
        else:
            training_dict[user] = {item: rating}
            
for user in testing.keys():
    for [item, rating] in testing[user]:
        if user in testing_dict:
            testing_dict[user][item] = rating
        else:
            testing_dict[user] = {item: rating}



In [4]:
def sub_row_mean(ratings_dict):
    sub_row_mean_ratings_dict = {}
    for user in ratings_dict:
        list_item = list(ratings_dict[user].values())
        avg = sum(list_item) / len(list_item)
        for item in ratings_dict[user].keys():
            if user in sub_row_mean_ratings_dict:
                sub_row_mean_ratings_dict[user][item] = ratings_dict[user][item] - avg
            else:
                sub_row_mean_ratings_dict[user] = {item: ratings_dict[user][item] - avg}
    
    return sub_row_mean_ratings_dict
        


In [5]:
import numpy as np
from numpy.linalg import norm
import math

def cosine_sim(ratings_dict, user):
    cosine_sim_val = {}

    for other_user in ratings_dict.keys():
        dot_prod = sum(ratings_dict[user][item]*ratings_dict[other_user].get(item, 0) for item in ratings_dict[user])
        user_norm = np.linalg.norm(np.array([rating for rating in ratings_dict[user].values()]))
        other_user_norm = np.linalg.norm(np.array([rating for rating in ratings_dict[other_user].values()]))
        norm_mul = user_norm * other_user_norm
        if norm_mul == 0:
            cos_sim = 0
        else:
            cos_sim = dot_prod / norm_mul
        if math.isnan(cos_sim):
            cos_sim = 0
        cosine_sim_val[other_user] = cos_sim
    
        if cosine_sim_val[other_user] > 1:
            cosine_sim_val[other_user] = 1
        if cosine_sim_val[other_user] < 0:

            cosine_sim_val[other_user] = 0        
    
    return cosine_sim_val

In [6]:

def predict_item_rating_avg(ratings_dict, cos_similarities_dict, k, item, user):

    cos_similarities_dict.pop(user)
    other_users = list(cos_similarities_dict.keys())
    for other_user in other_users:
        if item not in ratings_dict[other_user]:
            cos_similarities_dict.pop(other_user)
    
    list_cos_sim = sorted(cos_similarities_dict.items(), key=lambda x: x[1], reverse=True)[:k]

    avg_rating = 0
    
    if len(list_cos_sim) != 0 and list_cos_sim[0][1] != 0:
        for pair in list_cos_sim:
            avg_rating += ratings_dict[pair[0]][item]
        
        avg_rating = avg_rating / len(list_cos_sim)
    else:
        total = 0
        rating = 0
        for rat in training_dict[user]:
            rating += training_dict[user][rat]
            total += 1
        rating = rating / total
        return rating
    
    return avg_rating


    
    

In [7]:


def predict_item_rating_sim(ratings_dict, cos_similarities_dict, k, item, user):

    if user in cos_similarities_dict:
        cos_similarities_dict.pop(user)
    other_users = list(cos_similarities_dict.keys())
    for other_user in other_users:
        if item not in ratings_dict[other_user]:
            cos_similarities_dict.pop(other_user)
    
    list_cos_sim = sorted(cos_similarities_dict.items(), key=lambda x: x[1], reverse=True)[:k]
    rating = 0
    similarities = 0
    
    if len(list_cos_sim) != 0 and list_cos_sim[0][1] != 0:
        for pair in list_cos_sim:
            rating += ratings_dict[pair[0]][item] * cos_similarities_dict[pair[0]]
            similarities += cos_similarities_dict[pair[0]]
        rating = rating / similarities
    else:
        total = 0
        rating = 0
        for rat in training_dict[user]:
            rating += training_dict[user][rat]
            total += 1
        rating = rating / total
        
    if rating > 5.1:
        print("Flag")
        print("")
        rating = 4.99
    return rating

  
    

In [8]:
def predict(ratings_dict, user, items):
    predicted_ratings_dict = {}
    sub_mean_dict = sub_row_mean(ratings_dict)
    cos_sim_dict = cosine_sim(sub_mean_dict, user)
    for item in items:
        avg_rating_prediction = predict_item_rating_avg(training_dict, cos_sim_dict.copy(), k, item, user)
        sim_rating_prediction = predict_item_rating_sim(training_dict, cos_sim_dict.copy(), k, item, user)
        
        predicted_ratings_dict[item] = (avg_rating_prediction, sim_rating_prediction)
        
    return predicted_ratings_dict

In [9]:
k = 10


start = time.time()

    
N = 0
MAE_avg = 0
RMSE_avg = 0 
MAE_sim = 0
RMSE_sim = 0

i = 0
j = 0

for user, list_ratings in testing_dict.items():
    predicted_vals = predict(training_dict, user, list(testing_dict[user].keys()))
    orig_vals = testing_dict[user]
    i += 1


    for item, (avg_rat, sim_rat) in predicted_vals.items():
        j += 1
        corr_rat = orig_vals[item]
        MAE_avg += abs(avg_rat-corr_rat)
        MAE_sim += abs(sim_rat-corr_rat)
        RMSE_avg += (avg_rat-corr_rat)**2
        RMSE_sim += (sim_rat-corr_rat)**2
        N+=1
        
        if j % 1000 == 0:
            print("Item" + str(j))
            print("User" + str(i))
            print(time.time() - start)
            print("predicted vals: ")
            print(predicted_vals)
            print("original vals: ")
            print(orig_vals)

MAE_avg = MAE_avg/N
MAE_sim = MAE_sim/N
RMSE_avg = math.sqrt(RMSE_avg/N)
RMSE_sim = math.sqrt(RMSE_sim/N)

print((MAE_avg, RMSE_avg))
print((MAE_sim, RMSE_sim))
    



end = time.time()

print(end - start) 

Item1000
User399
147.97392892837524
predicted vals: 
{'A1UK17SNKI5O6X': (4.75, 4.75), 'AK1O1SSMJMUPE': (4.75, 4.75)}
original vals: 
{'A1UK17SNKI5O6X': 5.0, 'AK1O1SSMJMUPE': 5.0}
Item2000
User785
290.5098853111267
predicted vals: 
{'AJLXN6VP7YQM4': (4.863636363636363, 4.863636363636363), 'A2BH945DHGH5J7': (5.0, 5.000000000000001), 'AWXTOLU2K2PH4': (4.555555555555555, 5.000000000000001), 'A307KRGS93AWUV': (5.0, 5.0), 'A1W81WBRSGIA8L': (4.7, 3.907600732368588), 'A1TZO0LL87VBML': (4.863636363636363, 4.863636363636363), 'A2EQXM2FLHJZ83': (5.0, 5.0), 'A3H5KBH5VROQR8': (5.0, 5.000000000000001), 'A2ES02QY8PFTCV': (4.863636363636363, 4.863636363636363), 'AAPOQVDM5ZTHB': (5.0, 5.0), 'A35GOZFYGY37B1': (4.863636363636363, 4.863636363636363), 'A1GCE3BM88LFFE': (4.863636363636363, 4.863636363636363), 'A28P52NLQ6VC0X': (4.863636363636363, 4.863636363636363), 'AVCG4AA8W8NE1': (5.0, 5.0), 'A1NF6UMPRMFFC0': (4.888888888888889, 5.0), 'A1HA83IXCM4362': (5.0, 5.0)}
original vals: 
{'AJLXN6VP7YQM4': 5.0, '

Item11000
User2591
1013.7588562965393
predicted vals: 
{'A3QLVPX1P2ZYNB': (4.8, 4.999999999999999), 'A36EY3ZP83UC1M': (4.8, 4.8), 'A2VIB3S5P0J38C': (4.375, 4.999999999999999), 'A1EYTDCGBR54X3': (5.0, 5.000000000000001), 'A2HHA4PJGSRANX': (5.0, 5.0), 'A2XZWMTCQWOIX3': (4.7, 4.0), 'A3UBT4VHJF63KO': (4.8, 4.8), 'AC72K0D0USEI6': (5.0, 5.0), 'A2XKCRSSSXDY6Y': (4.571428571428571, 5.000000000000001), 'AG8M6D4XOUOPL': (5.0, 5.0), 'AMAH5NQ59ELIC': (4.8, 4.8)}
original vals: 
{'A3QLVPX1P2ZYNB': 5.0, 'A36EY3ZP83UC1M': 5.0, 'A2VIB3S5P0J38C': 5.0, 'A1EYTDCGBR54X3': 5.0, 'A2HHA4PJGSRANX': 5.0, 'A2XZWMTCQWOIX3': 3.0, 'A3UBT4VHJF63KO': 5.0, 'AC72K0D0USEI6': 5.0, 'A2XKCRSSSXDY6Y': 5.0, 'AG8M6D4XOUOPL': 5.0, 'AMAH5NQ59ELIC': 5.0}
Item12000
User2783
1090.1964638233185
predicted vals: 
{'AFH8JOLBLCXIQ': (5.0, 5.0), 'A1LIX5FCM49JH3': (4.4, 4.999999999999999), 'AI2WD20YMB1TS': (5.0, 5.0), 'AG51DQFJN7OF7': (4.901960784313726, 4.901960784313726), 'A1FBRG7TEDHF8J': (4.75, 5.0), 'A1QUSZ9E44QKM4': (3.8, 4.0), 'A

Item16000
User3485
1383.037339925766
predicted vals: 
{'A2ZEXIUAKNK5A1': (5.0, 5.0), 'A167IVEYZYP7T0': (5.0, 5.000000000000001), 'A32GDI2V20AFY3': (5.0, 5.0), 'A282MIMAK7B36H': (5.0, 5.000000000000001), 'AKARSXUCV7D1L': (5.0, 5.0), 'A2SUAPGZRUX8R2': (4.888888888888889, 4.888888888888889), 'A3LRDHF9N5DONK': (5.0, 5.0), 'ABDK9G6RNI91Y': (5.0, 5.0), 'A1X5A6IZNP1BPX': (4.888888888888889, 4.888888888888889), 'ADW3CXK82WO6Y': (4.888888888888889, 4.888888888888889), 'A1FGMO7R7ZGAO5': (5.0, 5.0), 'A3BZIXUPWNY4HH': (5.0, 5.0), 'A2OBGVP8J49XB4': (5.0, 5.0), 'A1CRM4LBHNP46A': (5.0, 5.0), 'A39T8UC0HUWU76': (5.0, 5.0), 'A2ZATQUUG47BML': (5.0, 5.0), 'A254N8K05ZWM4C': (5.0, 5.000000000000001), 'A1UVK9BJ1R4ARU': (4.888888888888889, 4.888888888888889), 'A2BIL657ZMN4F0': (5.0, 5.0), 'ADP4MVNXXWSQ3': (5.0, 5.0), 'AOZCT7C19SKVK': (5.0, 5.0), 'A3376LC0MCM58N': (5.0, 5.000000000000001), 'A2QYQKOUF7SRTR': (4.0, 4.0), 'AOB5C30NRJD6R': (4.333333333333333, 5.0), 'AN87P9433INVO': (5.0, 5.0)}
original vals: 
{'A2

Item26000
User6654
2573.2731153964996
predicted vals: 
{'A33RII6U1H9EV9': (4.888888888888889, 4.888888888888889), 'AKMI7S0HT2Y3N': (4.888888888888889, 4.888888888888889)}
original vals: 
{'A33RII6U1H9EV9': 5.0, 'AKMI7S0HT2Y3N': 4.0}
Item27000
User6941
2685.280715227127
predicted vals: 
{'A1D3WRT8FY3W8Q': (4.8, 4.8)}
original vals: 
{'A1D3WRT8FY3W8Q': 5.0}
Item28000
User7293
2817.1383724212646
predicted vals: 
{'AF9AKNFFW0LI4': (5.0, 4.999999999999999), 'A1SLYZ25XI9D5Z': (5.0, 5.0), 'A11B9ATI5JM9D5': (5.0, 5.000000000000002), 'A381IORSE8W4VJ': (5.0, 4.999999999999999), 'A3D0J36870DIAB': (4.653846153846154, 4.653846153846154), 'A31ISWUODTNL7K': (5.0, 5.0)}
original vals: 
{'AF9AKNFFW0LI4': 5.0, 'A1SLYZ25XI9D5Z': 5.0, 'A11B9ATI5JM9D5': 5.0, 'A381IORSE8W4VJ': 5.0, 'A3D0J36870DIAB': 5.0, 'A31ISWUODTNL7K': 5.0}
Item29000
User7624
2942.102630376816
predicted vals: 
{'A30VKWWBFJ59B8': (4.875, 4.875), 'A2FO0C6S2GZ78O': (4.875, 4.875), 'A3767JX1HM1AZM': (5.0, 5.000000000000001), 'A24Q0JIFYS5SUK'

Item34000
User8935
3465.145465373993
predicted vals: 
{'A2NUTM1FBPSTOE': (3.875, 3.435835761330642), 'A1YYILLAK9GH62': (4.375, 4.375), 'A2U3W2PBPO5YQ': (4.375, 4.375), 'A2N2C5CIR4G4TR': (4.375, 4.375), 'A2EL3990HXOMZB': (4.0, 3.0), 'A21A565W3SWX6Q': (3.5, 3.6349188609996133), 'A2PR7BETS2RIV0': (4.375, 4.375), 'A3OT19SQ2FEQQG': (3.6666666666666665, 5.0), 'A3ICJY1SYW0X7P': (5.0, 5.0), 'A3J8I8XLN6T0QU': (5.0, 5.0), 'A1XHFQG34HNUYP': (5.0, 5.0), 'A1XTBQEJQLMB1H': (2.6666666666666665, 5.0), 'A15VFMU0WO8ED2': (4.9, 5.0), 'AFT1VNGNMPWPZ': (4.375, 4.375), 'A2X2NZPZ5PJGC0': (4.375, 4.375), 'A1TTCJLMH1I26G': (4.0, 5.0), 'A382ZSI8ZQ6IRJ': (4.142857142857143, 4.703037569875445), 'A3SDCMI9Q0XJOO': (4.3, 4.028226955006943), 'AXTAEIG9Y1AB1': (4.0, 4.0), 'ACPDWB71LGWA0': (4.75, 4.0), 'A1VT7MNJR43YHX': (4.375, 4.375), 'A1HBYVJ4SRC8SY': (5.0, 5.0), 'A2GAB7I4DK6E0W': (3.0, 3.0), 'A2ZCB8LM9E61D': (4.375, 4.375), 'A3J49L4HQSOK0E': (5.0, 5.000000000000001), 'A12C6V8PUD5GIJ': (4.0, 3.0), 'A3ITX78MCTSAEJ': (5

Item40000
User10473
4081.606300354004
predicted vals: 
{'A1IY5CJK0LMC71': (4.5, 4.5), 'A34Y7FLP1ENSGH': (4.5, 4.5), 'AVSG2FSINBVBM': (4.0, 4.0), 'A3RSO2MXIZA9EB': (3.4, 1.0)}
original vals: 
{'A1IY5CJK0LMC71': 5.0, 'A34Y7FLP1ENSGH': 5.0, 'AVSG2FSINBVBM': 4.0, 'A3RSO2MXIZA9EB': 5.0}
Item41000
User10755
4192.280919551849
predicted vals: 
{'AIKI16QELDFG5': (5.0, 5.0), 'A3ONQ6H1DXL8A1': (4.538461538461538, 4.538461538461538), 'AO6TDGK8HQ89D': (4.538461538461538, 4.538461538461538), 'A25Z0X6484GU9N': (4.538461538461538, 4.538461538461538), 'A5X54ANBPDKT8': (5.0, 5.0), 'A3DVAXTUZQVNSN': (4.538461538461538, 4.538461538461538)}
original vals: 
{'AIKI16QELDFG5': 5.0, 'A3ONQ6H1DXL8A1': 5.0, 'AO6TDGK8HQ89D': 5.0, 'A25Z0X6484GU9N': 5.0, 'A5X54ANBPDKT8': 5.0, 'A3DVAXTUZQVNSN': 5.0}
Item42000
User11009
4295.692269325256
predicted vals: 
{'A34L6WYC4IXGKE': (5.0, 5.0), 'A13OYPEGL3GLIV': (5.0, 5.0), 'A1OOST2U853BUH': (5.0, 4.999999999999999), 'A2DSG9ATUJ5QUV': (4.872964169381108, 4.872964169381108), 'A

Item48000
User12625
4935.224516630173
predicted vals: 
{'A3KM8BTPCPB5YG': (4.818181818181818, 4.818181818181818), 'A174Q4HYYJW1TJ': (5.0, 5.000000000000001)}
original vals: 
{'A3KM8BTPCPB5YG': 5.0, 'A174Q4HYYJW1TJ': 5.0}
Item49000
User12900
5044.5302658081055
predicted vals: 
{'A3844EH20ZBIG2': (4.693548387096774, 4.693548387096774), 'A37S2ER3H2ONZC': (5.0, 5.0), 'A3DU0W2AW1ZQ4V': (4.666666666666667, 4.999999999999999), 'A2BEXZC8WHHTM5': (5.0, 5.0), 'A13DV0T55XBQNW': (4.693548387096774, 4.693548387096774), 'A2ZD5FMXTZ8OPO': (5.0, 5.0), 'A3O2JYPW9KYP21': (4.693548387096774, 4.693548387096774), 'A1HCCW38EQQBTY': (4.7, 4.611781386965827), 'AHBG3YDT36KKJ': (4.5, 4.0), 'A174XKI5HMXZN5': (4.333333333333333, 4.1513043070958435), 'A1G25LP6XKC6F1': (4.693548387096774, 4.693548387096774), 'A8SXPZNKVT8JH': (5.0, 5.0), 'A1FIDUHNI3CSDB': (4.693548387096774, 4.693548387096774), 'A3GSULO9NWMFN9': (5.0, 5.000000000000001), 'A2OF0BEG8YXG8N': (4.0, 4.0), 'A2B2KQ0Q6ASF6V': (4.693548387096774, 4.693548387

Item52000
User13790
5387.2725694179535
predicted vals: 
{'A2UQ0JKCDV5JSD': (3.3333333333333335, 3.7133135309963), 'A3F2HRA5D1629Q': (4.423076923076923, 4.423076923076923), 'A1YZCADBVKWC9X': (4.423076923076923, 4.423076923076923), 'A2H3JURQZOHVMB': (4.9, 4.924841792248942), 'A1GJVF86T61CBW': (4.423076923076923, 4.423076923076923), 'A12EX1HUQ67ROW': (4.423076923076923, 4.423076923076923), 'A3T04XVGTP4AFI': (4.423076923076923, 4.423076923076923), 'A16J690RW5ALML': (4.423076923076923, 4.423076923076923), 'A39K9GDTIZ0933': (3.8333333333333335, 3.957509256528047), 'AW88HMROMQBP1': (5.0, 4.999999999999999), 'A1M8VIQH1FL2C7': (5.0, 5.0), 'AF65C8QK9NNXP': (4.5, 4.9425645177250495), 'A2JEA6LNXLMQME': (4.9, 4.999999999999999)}
original vals: 
{'A2UQ0JKCDV5JSD': 5.0, 'A3F2HRA5D1629Q': 2.0, 'A1YZCADBVKWC9X': 5.0, 'A2H3JURQZOHVMB': 5.0, 'A1GJVF86T61CBW': 5.0, 'A12EX1HUQ67ROW': 4.0, 'A3T04XVGTP4AFI': 5.0, 'A16J690RW5ALML': 3.0, 'A39K9GDTIZ0933': 3.0, 'AW88HMROMQBP1': 5.0, 'A1M8VIQH1FL2C7': 5.0, 'AF65