In [1]:
#importing necessary libs
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import KNNWithMeans
from surprise.model_selection import GridSearchCV

In [2]:
#importing Dataset

df  = pd.read_csv('ml-100k/u.data', sep="\t")
df.columns = ['user_id', 'item_id', 'rating', 'timestamp']

reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
data = Dataset.load_from_df(df[["user_id", "item_id", "rating"]], reader)


In [3]:
#dataframe description
df.count()
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99999 entries, 0 to 99998
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   user_id    99999 non-null  int64
 1   item_id    99999 non-null  int64
 2   rating     99999 non-null  int64
 3   timestamp  99999 non-null  int64
dtypes: int64(4)
memory usage: 3.1 MB


Unnamed: 0,user_id,item_id,rating,timestamp
count,99999.0,99999.0,99999.0,99999.0
mean,462.487415,425.531965,3.529865,883528900.0
std,266.614421,330.799501,1.125678,5343878.0
min,1.0,1.0,1.0,874724700.0
25%,254.0,175.0,3.0,879448700.0
50%,447.0,322.0,4.0,882826900.0
75%,682.0,631.0,4.0,888260000.0
max,943.0,1682.0,5.0,893286600.0


In [4]:
# recommender.py

# To use item-based cosine similarity
config = {
    "name": "cosine",
    "user_based": False,  # To Compute  similarities between items
}
Algo = KNNWithMeans(sim_options=config)

In [5]:
#Training prediction data
trainingSet = data.build_full_trainset()
Algo.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fea249c0400>

In [6]:
#predicting with user_id and item_id as input
prediction = Algo.predict(100, 40)
prediction.est

2.635766187275516

In [7]:
# Parameter Tuning
sim_options = {
    "name": ["msd", "cosine"],
    "min_support": [3, 4, 5],
    "user_based": [False, True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi