In [1]:
!pip install numpy==1.26.4



In [None]:
!pip install scikit-surprise

In [4]:
import pandas as pd
import numpy as np
import surprise
from surprise.model_selection import GridSearchCV
from surprise.model_selection.split import KFold

In [5]:
ratings = pd.read_csv("/content/drive/MyDrive/Cases/filmtrust/ratings.txt",sep=' ',names = ['uid','iid','rating'])
ratings.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [6]:
lowest_rating = ratings['rating'].min()
highest_rating = ratings['rating'].max()
print("Ratings range between {0} and {1}".format(lowest_rating,highest_rating))

Ratings range between 0.5 and 4.0


In [7]:
reader = surprise.Reader(rating_scale = (lowest_rating,highest_rating))
data = surprise.Dataset.load_from_df(ratings,reader)
type(data)

In [None]:
similarity_options = {'name': 'cosine', 'user_based': True}
# Default k = 40
algo = surprise.KNNBasic(sim_options = similarity_options)
output = algo.fit(data.build_full_trainset()) # Calculates expected rating for all the users

List of User IDs

In [None]:
ratings['uid'].unique()

Expected rating for any specific user for a specific item:

In [None]:
pred = algo.predict(uid='20',iid='101')
print(pred.est)

In [None]:
pred

Total Items:

In [None]:
iids = ratings['iid'].unique()
print(iids)

The list of items rated by user 60:

In [None]:
u_iid = list(ratings[ratings['uid']==60]['iid'])
print("List of items rated by user 60:", u_iid)
print("No. of items rated by user {0}: {1}".format(60, len(u_iid)))

`np.setdiff1d`

In [None]:
a = [ 2,4,6,7,8 ]
b = [ 4,8]
np.setdiff1d(a,b)

List of the items not rated by user 60:

In [None]:
iids_to_predict = np.setdiff1d(iids, u_iid)
print("Items not rated by 60 or those items for which the expected ratings are to be predicted:",iids_to_predict )

In [None]:
len(iids_to_predict)

Extracting the estimated rating for iids_to_predict

In [None]:
testset = [[60,iid,0.] for iid in iids_to_predict]
predictions = algo.test(testset)
exp_ratings = pd.DataFrame(predictions)[['iid','est']]
exp_ratings.sort_values('est', ascending=False).iloc[:10]

## Tuning for best K

### User-Based Filtering

In [None]:
param_grid = {'k': np.arange(30,70,10),  'user_based':[True]}
param_grid

In [None]:
kfold = KFold(n_splits=5, random_state=25, shuffle=True)
gs = GridSearchCV(surprise.KNNWithZScore, param_grid,measures=['rmse', 'mae'], cv=kfold)

In [None]:
gs.fit(data)

Best Score & Paramter:

In [None]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

In [None]:
pd.DataFrame(gs.cv_results)

We can now use the algorithm that yields the best rmse:

In [None]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

The recommendations can be generated for any user with the object **algo**.

### Item-Based Filtering

In [None]:
param_grid = {'k': np.arange(30,70,10), 'user_based':[False]}
param_grid

In [None]:
kfold = KFold(n_splits=5, random_state=25, shuffle=True)
gs = GridSearchCV(surprise.KNNWithZScore, param_grid=param_grid,measures=['rmse', 'mae'], cv=kfold)

In [None]:
gs.fit(data)

Best Score:

In [None]:
print(gs.best_score['rmse'])

Best Parameter:

In [None]:
print(gs.best_params['rmse'])

We can now use the algorithm that yields the best rmse:

In [None]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

The recommendations can be generated for any user with the object **algo**.