In [1]:
import numpy as np
import pandas as pd

from collections import defaultdict
from surprise import Reader, Dataset, SVD, evaluate
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

r_cols=['user_id', 'item_id', 'rating', 'timestamp']

data_df = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols,encoding='latin-1')



In [2]:
#Train with the u.data using SVD algorithm of Surprise

reader = Reader()
data = Dataset.load_from_df(data_df[['user_id', 'item_id', 'rating']], reader)
trainset = data.build_full_trainset()

algo = SVD()
fit=algo.fit(trainset)




In [4]:
#Build predictions for those userid, movieid pairs which aren't in u.data

testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [5]:
#Write the predicted ratings in a csv file

f=open("ml-100k/new_predicted_colab.csv","w")
for i in predictions:
    f.write(str(i.uid)+","+str(i.iid)+","+str(round(i.est))+"\n")


In [66]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n=dict()
    for uid, iid, true_r, est, _ in predictions:
        
        if uid in top_n.keys():
            top_n[uid].append([iid,est])
        else:
            l=[]
            top_n[uid]=l
            top_n[uid].append([iid,est])

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n(predictions, n=10)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

196 [64, 174, 318, 114, 408, 603, 357, 185, 480, 189]
186 [165, 241, 604, 657, 114, 22, 511, 524, 496, 178]
22 [22, 100, 357, 98, 64, 408, 268, 179, 315, 318]
244 [127, 251, 474, 320, 187, 792, 285, 14, 12, 175]
166 [483, 603, 64, 408, 114, 488, 100, 1194, 169, 531]
298 [64, 169, 408, 657, 316, 480, 493, 251, 272, 1194]
115 [169, 408, 519, 488, 483, 474, 603, 57, 114, 318]
253 [408, 357, 172, 657, 169, 178, 174, 275, 603, 272]
305 [213, 23, 8, 132, 1194, 603, 604, 659, 185, 922]
6 [429, 114, 923, 709, 603, 654, 661, 657, 922, 150]
62 [175, 427, 408, 124, 205, 234, 185, 513, 169, 192]
286 [474, 427, 286, 114, 607, 318, 12, 659, 922, 480]
200 [12, 127, 603, 408, 480, 181, 64, 611, 302, 963]
210 [408, 169, 100, 474, 480, 318, 165, 511, 178, 316]
224 [50, 174, 96, 210, 172, 181, 144, 204, 143, 64]
303 [205, 515, 527, 114, 169, 513, 963, 89, 45, 644]
122 [169, 408, 483, 657, 114, 285, 641, 1449, 515, 134]
194 [484, 480, 169, 408, 430, 603, 1142, 302, 190, 487]
291 [408, 318, 169, 513, 165, 

65 [8, 408, 496, 169, 114, 166, 136, 515, 483, 174]
137 [302, 265, 194, 603, 515, 480, 479, 272, 483, 408]
257 [169, 474, 134, 408, 98, 318, 641, 657, 114, 515]
111 [408, 173, 172, 98, 50, 64, 169, 483, 515, 12]
285 [50, 513, 12, 169, 408, 603, 134, 174, 190, 172]
96 [408, 427, 480, 169, 12, 192, 191, 963, 603, 487]
116 [89, 64, 98, 513, 42, 12, 178, 69, 705, 168]
73 [483, 408, 654, 114, 178, 488, 98, 511, 427, 641]
221 [483, 114, 408, 474, 657, 169, 357, 654, 603, 513]
235 [114, 408, 513, 64, 98, 199, 654, 169, 528, 316]
164 [174, 272, 483, 64, 357, 169, 22, 251, 59, 488]
281 [169, 963, 64, 496, 174, 318, 483, 223, 659, 479]
182 [318, 64, 174, 511, 480, 483, 12, 357, 603, 408]
129 [408, 318, 178, 169, 474, 603, 483, 357, 14, 511]
45 [483, 169, 178, 408, 285, 174, 318, 192, 527, 511]
131 [89, 199, 50, 174, 603, 480, 178, 12, 511, 170]
230 [318, 272, 12, 59, 328, 483, 316, 921, 173, 169]
126 [174, 64, 12, 191, 50, 127, 195, 114, 187, 483]
231 [12, 408, 483, 64, 169, 603, 189, 480, 318, 

510 [22, 357, 272, 191, 50, 64, 12, 318, 423, 59]
524 [611, 656, 428, 57, 648, 512, 408, 1194, 59, 641]
501 [64, 285, 178, 12, 603, 408, 318, 114, 357, 89]
525 [169, 408, 50, 318, 173, 12, 195, 963, 96, 174]
521 [169, 408, 513, 64, 114, 313, 483, 199, 489, 59]
520 [408, 174, 127, 64, 318, 50, 172, 114, 483, 496]
519 [242, 302, 474, 86, 234, 98, 193, 194, 603, 181]
528 [169, 483, 408, 64, 114, 318, 313, 272, 963, 515]
532 [237, 174, 172, 69, 64, 173, 50, 190, 114, 657]
530 [657, 1194, 408, 318, 480, 190, 272, 169, 114, 515]
531 [169, 511, 318, 480, 98, 496, 408, 1449, 272, 251]
529 [313, 483, 64, 318, 480, 603, 178, 114, 484, 511]
517 [408, 169, 657, 60, 114, 59, 921, 87, 513, 285]
527 [198, 178, 483, 408, 258, 195, 478, 199, 166, 257]
485 [12, 50, 64, 169, 483, 657, 172, 408, 174, 251]
533 [1194, 166, 114, 136, 59, 79, 487, 1039, 272, 165]
535 [408, 659, 313, 169, 481, 199, 611, 251, 513, 191]
536 [64, 114, 520, 651, 173, 178, 313, 429, 659, 272]
526 [12, 173, 174, 178, 251, 132, 318, 

In [67]:
#save the top-10 movies for each user in a mat file

from scipy.io import savemat

ntop=dict()
for i in top_n.keys():
    y=top_n[i]
    l=list(map(lambda x:x[0],y))
    ntop[str(i)]=l

    savemat('top10.mat', ntop)

    

In [68]:
algo.predict(46,1,0)

Prediction(uid=46, iid=1, r_ui=0, est=4.159077947612093, details={'was_impossible': False})

In [69]:
predictions

[Prediction(uid=196, iid=302, r_ui=3.52986, est=3.9422800794633246, details={'was_impossible': False}),
 Prediction(uid=196, iid=377, r_ui=3.52986, est=2.735720702703065, details={'was_impossible': False}),
 Prediction(uid=196, iid=51, r_ui=3.52986, est=3.5623508439507723, details={'was_impossible': False}),
 Prediction(uid=196, iid=346, r_ui=3.52986, est=3.236336741201037, details={'was_impossible': False}),
 Prediction(uid=196, iid=474, r_ui=3.52986, est=4.163418262012177, details={'was_impossible': False}),
 Prediction(uid=196, iid=265, r_ui=3.52986, est=3.9532128075758304, details={'was_impossible': False}),
 Prediction(uid=196, iid=465, r_ui=3.52986, est=3.5896442491936322, details={'was_impossible': False}),
 Prediction(uid=196, iid=451, r_ui=3.52986, est=3.55049852334756, details={'was_impossible': False}),
 Prediction(uid=196, iid=86, r_ui=3.52986, est=3.7448509777305032, details={'was_impossible': False}),
 Prediction(uid=196, iid=1014, r_ui=3.52986, est=3.1768155502701014, de

In [73]:
int(3.9)

3