# Recommendation System using Python 

# Using scikit-surprise library

In [22]:
# Installing scikit-surprise library to work with surprise library
!pip install scikit-surprise



In [23]:
# Importing surprise library
import surprise

In [24]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate


# Loading Movielens Dataset
data = Dataset.load_builtin('ml-100k')

# Setting the parameters grid for training our algorithm
param_grid = {'n_epochs': [5,10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}

# Considering the famous SVD Algorithm
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)

gs.fit(data)

print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
algo = gs.best_estimator['rmse']

# Running a 5-fold Cross Validation for given data
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

0.9608676773828609
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9655  0.9658  0.9574  0.9566  0.9619  0.9614  0.0039  
MAE (testset)     0.7732  0.7769  0.7661  0.7672  0.7695  0.7706  0.0040  
Fit time          2.54    2.51    2.36    2.46    2.51    2.48    0.06    
Test time         0.17    0.21    0.13    0.20    0.13    0.17    0.03    


{'test_rmse': array([0.96549389, 0.96578378, 0.95741471, 0.95660736, 0.96191005]),
 'test_mae': array([0.77315665, 0.77691729, 0.76614114, 0.7672294 , 0.76945515]),
 'fit_time': (2.5402731895446777,
  2.505462408065796,
  2.3625986576080322,
  2.464632511138916,
  2.5140182971954346),
 'test_time': (0.17345952987670898,
  0.20538806915283203,
  0.12983942031860352,
  0.20197033882141113,
  0.12574195861816406)}

cv = 5

That is Given data set is split into a K(5) number of sections/folds where each fold is used as a testing set at some point.

In [32]:
from surprise import accuracy

#Building the Training Dataset
trainset = data.build_full_trainset()

#Training the algorithm with the Training Set
algo.fit(trainset)


#Building the Testing DataSet 
testset = trainset.build_testset()

#Applying our Algo to Testing Dataset
predictions = algo.test(testset)

In [33]:
print(predictions[435])

user: 244        item: 317        r_ui = 5.00   est = 3.94   {'was_impossible': False}


# Return the top-N recommendation for each user

get_top_n function ::

Paramenters   : predictions - list of Prediction objects
                      n     - number of Recommendations needed 
Return Values : A dict where keys   are user (raw) ids and
                             values are lists of tuples:
                                            raw_item id, rating estimation), ... of size n.

In [38]:
from collections import defaultdict

def get_top_n(predictions, n=10):
   
    # Mapping predictions to each User 
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and get the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n



# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

top_n = get_top_n(predictions, n=10)

# Print the recommended items for 5 users
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

196 ['408', '169', '318', '64', '483', '12', '114', '603', '178', '50']
186 ['408', '169', '318', '64', '483', '114', '603', '178', '50', '513']
22 ['408', '169', '318', '64', '483', '114', '12', '603', '178', '513']
244 ['408', '483', '12', '603', '178', '513', '480', '127', '272', '657']
166 ['408', '169', '318', '64', '114', '483', '12', '603', '50', '178']
298 ['408', '169', '64', '114', '12', '513', '480', '272', '657', '515']
115 ['408', '169', '318', '64', '483', '114', '603', '513', '480', '272']
253 ['408', '169', '114', '603', '178', '513', '480', '272', '357', '174']
305 ['114', '603', '513', '657', '515', '313', '316', '488', '22', '493']
6 ['114', '603', '657', '251', '313', '963', '172', '316', '923', '190']
62 ['408', '169', '603', '178', '513', '480', '272', '657', '515', '251']
286 ['318', '64', '114', '12', '603', '178', '513', '480', '657', '98']
200 ['408', '64', '114', '12', '603', '178', '513', '480', '127', '272']
210 ['408', '169', '318', '64', '12', '603', '178

# Prediction of Rating for User 113 to Movie 12

In [39]:
algo.predict(113,12)

Prediction(uid=113, iid=12, r_ui=None, est=3.52986, details={'was_impossible': False})

# Recommending top 10 movies to user 113 

In [40]:
for iid, rating in top_n['123']:
    print(iid,rating,sep="\t")

408	4.2842841823934465
318	4.247647830267165
169	4.244825935603339
12	4.180928396861669
114	4.17929913001937
603	4.152786134981188
178	4.1390959255637005
513	4.125781672495516
272	4.109066816597037
357	4.108269376557744


# Accuracy 

In [31]:
accuracy.rmse(predictions, verbose=True)

RMSE: 0.4056


0.40559380609612533

In [41]:
accuracy.mse(predictions, verbose=True)

MSE: 0.1644


0.16438899205384433

In [42]:
accuracy.mae(predictions, verbose=True)

MAE:  0.3166


0.3165691546884007