In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357281 sha256=6d92bfa84af57c46dfe377ecfe3706b70b40f1b666614415019f3a9436a42e52
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [None]:
import pandas as pd
import numpy as np
import surprise

In [None]:
ratings = pd.read_csv("ratings.txt",sep=' ',names = ['uid','iid','rating'])
ratings.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [None]:
lowest_rating = ratings['rating'].min()
highest_rating = ratings['rating'].max()
print("Ratings range between {0} and {1}".format(lowest_rating,highest_rating))

Ratings range between 0.5 and 4.0


In [None]:
reader = surprise.Reader(rating_scale = (lowest_rating,highest_rating))
data = surprise.Dataset.load_from_df(ratings,reader)
type(data)

In [None]:
similarity_options = {'name': 'cosine', 'user_based': True}
# Default k = 40
algo = surprise.KNNBasic(sim_options = similarity_options)
output = algo.fit(data.build_full_trainset())

Computing the cosine similarity matrix...
Done computing similarity matrix.


List of User IDs

In [None]:
ratings['uid'].unique()

array([   1,    2,    3, ..., 1506, 1507, 1508])

Expected rating for user 100 for item 217:

In [None]:
pred = algo.predict(uid='100',iid='900')
print(pred.est)

3.0028030537791928


In [None]:
pred

Prediction(uid='100', iid='900', r_ui=None, est=3.0028030537791928, details={'was_impossible': True, 'reason': 'User and/or item is unknown.'})

Total Items:

In [None]:
iids = ratings['iid'].unique()
print(iids)

[   1    2    3 ... 2069 2070 2071]


The list of items rated by user 100:

In [None]:
u_iid = ratings[ratings['uid']==100]['iid'].unique()
print("List of items rated by user 100:", u_iid)
print("No. of items rated by user {0}: {1}".format(100, len(u_iid)))

List of items rated by user 100: [215]
No. of items rated by user 100: 1


List of the items not rated by user 50:

In [None]:
iids_to_predict = np.setdiff1d(iids, u_iid)
print("Items not rated by 100 or those items for which the expected ratings are to be predicted:",iids_to_predict )

Items not rated by 100 or those items for which the expected ratings are to be predicted: [   1    2    3 ... 2069 2070 2071]


In [None]:
len(iids_to_predict)

2070

Extracting the estimated rating for iids_to_predict

In [None]:
testset = [[100,iid,0.] for iid in iids_to_predict]
predictions = algo.test(testset)
exp_ratings = [ (predictions[i].iid,predictions[i].est) for i in range(0,len(predictions)) ]
exp_ratings = pd.DataFrame(exp_ratings, columns=['iid','est_rating'])
exp_ratings.sort_values(by='est_rating',ascending=False).head()

Unnamed: 0,iid,est_rating
1862,1864,4.0
1369,1371,4.0
1805,1807,4.0
1800,1802,4.0
1361,1363,4.0


Tuning for best K

In [None]:
from surprise.model_selection import GridSearchCV
from surprise.model_selection.split import KFold

### User-Based Filtering

In [None]:
param_grid = {'k': np.arange(30,70,10),  'user_based':[True]}
param_grid

{'k': array([30, 40, 50, 60]), 'user_based': [True]}

In [None]:
kfold = KFold(n_splits=5, random_state=23, shuffle=True)
gs = GridSearchCV(surprise.KNNBasic, param_grid,measures=['rmse', 'mae'], cv=kfold)

In [None]:
gs.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

Best Score & Paramter:

In [None]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.8641633357915124
{'k': 60, 'user_based': True}


We can now use the algorithm that yields the best rmse:

In [None]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x785630590550>

The recommendations can be generated for any user with the object **algo**.

### Item-Based Filtering

In [None]:
param_grid = {'k': np.arange(30,70,10), 'user_based':[False]}
param_grid

{'k': array([30, 40, 50, 60]), 'user_based': [False]}

In [None]:
kfold = KFold(n_splits=5, random_state=23, shuffle=True)
gs = GridSearchCV(surprise.KNNBasic, param_grid,measures=['rmse', 'mae'], cv=kfold)

In [None]:
gs.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

Best Score:

In [None]:
print(gs.best_score['rmse'])

0.8641633357915124


Best Parameter:

In [None]:
print(gs.best_params['rmse'])

{'k': 60, 'user_based': False}


We can now use the algorithm that yields the best rmse:

In [None]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x78561c1dd960>

The recommendations can be generated for any user with the object **algo**.