In [73]:
import pandas as pd 
from surprise import Reader, Dataset, SVD, NormalPredictor, BaselineOnly, KNNBasic, NMF
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate

In [74]:
movies = pd.read_csv('./movielens_dataset/u.item' , header = None , sep = "|" , encoding='latin-1')
movies.columns = ['movie_id' , 'movie_title' , 'release_date' , 'video_release_date' ,
             'IMDb_URL' , 'unknown' , 'Action' , 'Adventure' , 'Animation' ,
             'Childrens' , 'Comedy' , 'Crime' , 'Documentary' , 'Drama' , 'Fantasy' ,
             'Film_Noir' , 'Horror' , 'Musical' , 'Mystery' , 'Romance' , 'Sci_Fi' ,
             'Thriller' , 'War' , 'Western']

users = pd.read_csv('./movielens_dataset/u.user', header= None , sep = '|')
users.columns = ['user_id' , 'age' , 'gender' , 'occupation' , 'zip_code']

ratings = pd.read_csv('./movielens_dataset/u.data', header= None , sep = '\t')
ratings.columns = ['user_id' , 'movie_id' , 'rating' , 'timestamp']

In [75]:
data = ratings.merge(users , on='user_id')
data = data.merge(movies , on='movie_id')
data.head(3)

Unnamed: 0,user_id,movie_id,rating,timestamp,age,gender,occupation,zip_code,movie_title,release_date,...,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,196,242,3,881250949,49,M,writer,55105,Kolya (1996),24-Jan-1997,...,0,0,0,0,0,0,0,0,0,0
1,305,242,5,886307828,23,M,programmer,94086,Kolya (1996),24-Jan-1997,...,0,0,0,0,0,0,0,0,0,0
2,6,242,4,883268170,42,M,executive,98101,Kolya (1996),24-Jan-1997,...,0,0,0,0,0,0,0,0,0,0


In [76]:
reader = Reader()
dataset = Dataset.load_from_df(data[['user_id', 'movie_id', 'rating']], reader)
kf = KFold(n_splits=5)
kf.split(dataset)

<generator object KFold.split at 0x00000197C455F200>

In [77]:
algorithm = NormalPredictor()
cross_validate(algorithm, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5229  1.5284  1.5183  1.5156  1.5160  1.5202  0.0048  
MAE (testset)     1.2215  1.2315  1.2164  1.2140  1.2180  1.2203  0.0061  
Fit time          0.07    0.08    0.08    0.08    0.08    0.08    0.00    
Test time         0.09    0.15    0.16    0.09    0.16    0.13    0.03    


{'test_rmse': array([1.52294606, 1.52838136, 1.51831713, 1.51556205, 1.51600808]),
 'test_mae': array([1.22147844, 1.23149056, 1.21635419, 1.21399394, 1.21803712]),
 'fit_time': (0.07198905944824219,
  0.08100032806396484,
  0.0820000171661377,
  0.0819997787475586,
  0.08299970626831055),
 'test_time': (0.08801126480102539,
  0.1510000228881836,
  0.1550002098083496,
  0.08800005912780762,
  0.15600037574768066)}

In [78]:
algorithm = SVD()
cross_validate(algorithm, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9457  0.9354  0.9318  0.9290  0.9325  0.9349  0.0058  
MAE (testset)     0.7465  0.7373  0.7323  0.7309  0.7348  0.7364  0.0055  
Fit time          3.13    3.17    3.16    3.16    3.18    3.16    0.02    
Test time         0.09    0.15    0.08    0.08    0.15    0.11    0.03    


{'test_rmse': array([0.94574893, 0.93540732, 0.93177185, 0.92904499, 0.93250049]),
 'test_mae': array([0.74645151, 0.73733656, 0.73232702, 0.73091338, 0.73483156]),
 'fit_time': (3.125091314315796,
  3.1650233268737793,
  3.1630706787109375,
  3.1560001373291016,
  3.1785430908203125),
 'test_time': (0.08600068092346191,
  0.1490004062652588,
  0.08400106430053711,
  0.08299994468688965,
  0.1500236988067627)}

In [79]:
algorithm = KNNBasic(k=20)
cross_validate(algorithm, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9804  0.9848  0.9775  0.9687  0.9690  0.9761  0.0063  
MAE (testset)     0.7727  0.7786  0.7693  0.7630  0.7644  0.7696  0.0057  
Fit time          0.29    0.30    0.29    0.30    0.30    0.30    0.01    
Test time         1.51    1.53    1.61    1.53    1.59    1.55    0.04    


{'test_rmse': array([0.98042128, 0.98475247, 0.97749558, 0.96867891, 0.96902811]),
 'test_mae': array([0.77271728, 0.77858831, 0.76928019, 0.76301643, 0.76435644]),
 'fit_time': (0.2850229740142822,
  0.29599785804748535,
  0.29102230072021484,
  0.30299997329711914,
  0.30099987983703613),
 'test_time': (1.5079994201660156,
  1.5270822048187256,
  1.6055645942687988,
  1.5250000953674316,
  1.5926175117492676)}

In [80]:
algorithm = KNNBasic(sim_options={'user_based': False} , k=20) # https://surprise.readthedocs.io/en/stable/prediction_algorithms.html#similarity-measure-configuration
cross_validate(algorithm, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9816  0.9868  0.9787  0.9835  0.9826  0.9826  0.0026  
MAE (testset)     0.7727  0.7803  0.7752  0.7739  0.7728  0.7750  0.0028  
Fit time          0.45    0.44    0.44    0.44    0.44    0.44    0.00    
Test time         1.90    1.85    1.93    1.84    1.93    1.89    0.04    


{'test_rmse': array([0.98160089, 0.98680028, 0.97866611, 0.98351078, 0.98256068]),
 'test_mae': array([0.77271794, 0.78026861, 0.77516933, 0.77388929, 0.77276998]),
 'fit_time': (0.44700026512145996,
  0.4419999122619629,
  0.4370005130767822,
  0.44100069999694824,
  0.4400005340576172),
 'test_time': (1.8990130424499512,
  1.8520228862762451,
  1.9305610656738281,
  1.8421080112457275,
  1.9340109825134277)}

In [81]:
algorithm = NMF()
cross_validate(algorithm, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9669  0.9694  0.9717  0.9522  0.9611  0.9642  0.0070  
MAE (testset)     0.7602  0.7643  0.7632  0.7474  0.7551  0.7580  0.0062  
Fit time          3.66    3.69    3.74    3.66    3.69    3.69    0.03    
Test time         0.07    0.07    0.14    0.14    0.07    0.10    0.03    


{'test_rmse': array([0.96688685, 0.9693849 , 0.97168073, 0.9522049 , 0.96106785]),
 'test_mae': array([0.76015394, 0.76433808, 0.76323788, 0.74736406, 0.75514833]),
 'fit_time': (3.655035972595215,
  3.6861138343811035,
  3.7422823905944824,
  3.6611011028289795,
  3.686162233352661),
 'test_time': (0.07300043106079102,
  0.07402396202087402,
  0.13899970054626465,
  0.14100027084350586,
  0.07300019264221191)}

In [82]:
trainset = dataset.build_full_trainset()
algorithm = SVD(n_factors = 200 , lr_all = 0.005 , reg_all = 0.02 , n_epochs = 40 , init_std_dev = 0.05)
algorithm.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x197c4543ac0>

In [85]:
def get_recommendations(uid = None , iid = None):
    predictions = []
    if uid is None:
        for ui in trainset.all_users():
            predictions.append(algorithm.predict(ui, iid, verbose = False))
        return predictions
    
    if iid is None:
        for ii in trainset.all_items():
            ii = trainset.to_raw_iid(ii)
            predictions.append(algorithm.predict(uid, ii, verbose = False))
        return predictions
    return predictins.append(algorithm.predict(uid,iid,verbose = False))

In [88]:
predictions = get_recommendations(uid = 1)
predictions.sort(key=lambda x: x.est, reverse=True)
for pred in predictions[:50]:
    print('Movie -> {} with Score-> {}'.format(pred.iid , pred.est))

Movie -> 1512 with Score-> 5
Movie -> 867 with Score-> 5
Movie -> 1642 with Score-> 5
Movie -> 1643 with Score-> 5
Movie -> 119 with Score-> 4.994810648641642
Movie -> 408 with Score-> 4.9430236942390025
Movie -> 1064 with Score-> 4.9234805204586385
Movie -> 1398 with Score-> 4.849279971660167
Movie -> 1131 with Score-> 4.788783114825057
Movie -> 963 with Score-> 4.78555261336351
Movie -> 1175 with Score-> 4.776576037845915
Movie -> 1293 with Score-> 4.76076284400158
Movie -> 1449 with Score-> 4.753636104981325
Movie -> 958 with Score-> 4.740100003292113
Movie -> 1499 with Score-> 4.730037201924717
Movie -> 302 with Score-> 4.681102957873973
Movie -> 1203 with Score-> 4.675639510946902
Movie -> 427 with Score-> 4.620665099599474
Movie -> 113 with Score-> 4.615095842377263
Movie -> 357 with Score-> 4.610855988958445
Movie -> 1500 with Score-> 4.609063710136722
Movie -> 1592 with Score-> 4.596531732891613
Movie -> 626 with Score-> 4.58274552443589
Movie -> 251 with Score-> 4.572792415705