In [13]:
#importing necessary libs
import pandas as pd
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV
from collections import defaultdict

In [2]:
#importing Dataset

df  = pd.read_csv('ml-100k/u.data', sep="\t")
df.columns = ['user_id', 'item_id', 'rating', 'timestamp']

reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
data = Dataset.load_from_df(df[["user_id", "item_id", "rating"]], reader)


In [3]:
#recommender.py

Algo = SVD()

In [4]:
#Cross Validate results

cross_validate(Algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9299  0.9353  0.9302  0.9438  0.9428  0.9364  0.0060  
MAE (testset)     0.7293  0.7392  0.7340  0.7415  0.7439  0.7376  0.0053  
Fit time          5.54    6.59    6.20    5.59    6.06    6.00    0.39    
Test time         0.18    0.14    0.33    0.25    0.17    0.21    0.07    


{'test_rmse': array([0.92993489, 0.93534385, 0.93016289, 0.94382711, 0.94275329]),
 'test_mae': array([0.7293167 , 0.73915018, 0.73396049, 0.74152146, 0.74394658]),
 'fit_time': (5.537494897842407,
  6.590005159378052,
  6.199488639831543,
  5.5890443325042725,
  6.063248872756958),
 'test_time': (0.18370652198791504,
  0.14380478858947754,
  0.32729172706604004,
  0.2501704692840576,
  0.16774320602416992)}

In [5]:
#Training prediction data
trainingSet = data.build_full_trainset()
Algo.fit(trainingSet)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fceb8085f10>

In [6]:
#predicting with user_id and item_id as input
prediction = Algo.predict(100, 40)
prediction.est

2.6129332479189777

In [9]:
#Parameter Tuning

param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005,0.007, 0.009],
              'reg_all': [0.2,0.3,0.4]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.949874752097418
{'n_epochs': 10, 'lr_all': 0.009, 'reg_all': 0.2}


In [21]:
#Top recommendations returning

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [23]:
# Predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainingSet.build_anti_testset()
predictions = Algo.test(testset)

top_n = get_top_n(predictions, n=10)


In [24]:
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

186 [265, 408, 496, 172, 1194, 483, 114, 242, 144, 316]
22 [12, 64, 178, 272, 318, 169, 56, 408, 135, 23]
244 [127, 427, 124, 483, 963, 14, 223, 474, 1449, 517]
166 [169, 408, 204, 357, 251, 320, 174, 513, 64, 12]
298 [408, 216, 169, 251, 64, 173, 12, 313, 170, 480]
115 [285, 134, 199, 169, 1449, 519, 191, 483, 484, 180]
253 [174, 515, 480, 136, 408, 603, 357, 191, 197, 165]
305 [57, 603, 114, 513, 8, 488, 498, 9, 661, 641]
6 [603, 114, 654, 172, 179, 60, 657, 48, 1137, 615]
62 [408, 23, 175, 185, 427, 484, 478, 223, 137, 792]
286 [8, 12, 302, 170, 427, 114, 813, 165, 478, 484]
200 [408, 736, 114, 64, 12, 251, 134, 513, 427, 659]
210 [285, 479, 191, 513, 603, 169, 963, 408, 12, 318]
224 [174, 64, 114, 12, 98, 79, 204, 408, 513, 1]
303 [178, 192, 169, 14, 180, 515, 285, 657, 197, 190]
122 [408, 515, 657, 603, 114, 483, 272, 98, 169, 479]
194 [114, 513, 42, 170, 493, 169, 45, 59, 603, 1449]
291 [474, 318, 313, 169, 480, 603, 185, 127, 408, 654]
234 [302, 114, 306, 514, 169, 408, 707, 269

591 [318, 483, 170, 427, 269, 408, 604, 480, 9, 197]
581 [169, 408, 528, 12, 318, 114, 194, 496, 498, 8]
592 [792, 517, 190, 511, 205, 693, 208, 209, 114, 651]
580 [12, 178, 408, 603, 134, 483, 272, 23, 474, 515]
590 [483, 318, 251, 408, 12, 169, 173, 302, 516, 199]
593 [114, 408, 205, 22, 169, 64, 603, 515, 707, 333]
583 [603, 408, 127, 114, 169, 479, 427, 474, 98, 302]
596 [318, 64, 408, 169, 12, 98, 515, 480, 114, 603]
570 [318, 50, 408, 173, 174, 12, 480, 427, 519, 64]
599 [98, 408, 64, 169, 186, 173, 168, 190, 513, 516]
589 [169, 408, 50, 114, 657, 513, 479, 603, 496, 483]
594 [64, 318, 114, 408, 169, 657, 511, 272, 603, 528]
597 [178, 408, 64, 12, 114, 657, 483, 169, 474, 357]
578 [408, 169, 318, 484, 251, 12, 603, 173, 483, 168]
601 [408, 169, 919, 114, 515, 474, 285, 137, 124, 209]
602 [496, 408, 178, 318, 64, 169, 12, 483, 603, 174]
600 [64, 12, 318, 135, 169, 251, 513, 191, 180, 223]
605 [603, 151, 197, 427, 178, 134, 514, 654, 515, 136]
603 [114, 513, 64, 318, 603, 178, 190,