In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/772.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/772.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m768.0/772.0 kB[0m [31m12.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163482 sha256=7d929a1f92931fe4dae683d5eef476c8b056a91b9a79d78f0fa4d73f7fb6bd8b
  Stored in directory: /root/.cache/pip/wheels

In [2]:
import pandas as pd
import numpy as np
import surprise
import os

In [3]:
os.chdir("/content/drive/MyDrive/Cases/filmtrust")
ratings = pd.read_csv("ratings.txt",sep=' ',names = ['uid','iid','rating'])
ratings.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [4]:
lowest_rating = ratings['rating'].min()
highest_rating = ratings['rating'].max()
print("Ratings range between {0} and {1}".format(lowest_rating,highest_rating))

Ratings range between 0.5 and 4.0


In [5]:
reader = surprise.Reader(rating_scale = (lowest_rating,highest_rating))
data = surprise.Dataset.load_from_df(ratings,reader)
type(data)

surprise.dataset.DatasetAutoFolds

Tuning for best parameters

In [6]:
from surprise.model_selection import GridSearchCV
from surprise.model_selection.split import KFold

param_grid = {'lr_all':np.linspace(0.001,1,3), 'reg_all': np.linspace(0.01,0.8,5),
              'n_factors':[40,30]}
kfold = KFold(random_state=23,n_splits=5,shuffle=True)
gs = GridSearchCV(surprise.SVD, param_grid,joblib_verbose =3, measures=['rmse', 'mae'], cv=kfold,n_jobs=-1)
gs.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:   36.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   44.8s finished


Best Score:

In [7]:
print(gs.best_score['rmse'])

0.828838165001413


Best Parameter:

In [8]:
print(gs.best_params['rmse'])

{'lr_all': 0.001, 'reg_all': 0.01, 'n_factors': 40}


We can now use the algorithm that yields the best rmse:

In [9]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ee6881f87c0>

The recommendations can be generated for any user with the object **algo**.

Total Items:

In [10]:
iids = ratings['iid'].unique()
print(iids)

[   1    2    3 ... 2069 2070 2071]


The list of items rated by user 50:

In [11]:
u_iid = ratings[ratings['uid']==50]['iid'].unique()
print(u_iid)

[  8 211   3   2 219 234  12 254 250 207  11 253 236  84  10   7 233  13
   1   5   6 252 241 216 257 206   4 217   9 215 213  17 255 220 121 245
 239 251 235]


List of the items not rated by user 50:

In [12]:
iids_to_predict = np.setdiff1d(iids, u_iid)
print(iids_to_predict)

[  14   15   16 ... 2069 2070 2071]


Extracting the estimated rating from iids_to_predict

In [13]:
testset = [[50,iid,0.] for iid in iids_to_predict]
predictions = algo.test(testset)

In [14]:
predictions[0]

Prediction(uid=50, iid=14, r_ui=0.0, est=3.2946298940647933, details={'was_impossible': False})

In [15]:
pred_ratings = np.array([pred.est for pred in predictions])

Getting the item with highest expected rating

In [16]:
i_max = np.argmax(pred_ratings)
print("Item:",iids_to_predict[i_max],"is the item with highest expected rating as",pred_ratings[i_max])

Item: 218 is the item with highest expected rating as 3.7001850839220407


In [17]:
exp_ratings = pd.DataFrame({'Item_ID': iids_to_predict, 'Exp_Rating':pred_ratings})
exp_ratings.sort_values(by=['Exp_Rating','Item_ID'], ascending=[False, True], inplace=True)
exp_ratings.head(10)

Unnamed: 0,Item_ID,Exp_Rating
194,218,3.700185
472,512,3.610494
246,286,3.608207
1133,1173,3.581184
765,805,3.57391
635,675,3.57264
302,342,3.569024
210,242,3.568918
360,400,3.562906
278,318,3.553457
