In [7]:
!pip install scikit-surprise



In [8]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import KNNWithMeans
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV

In [9]:
ratings = pd.read_csv('ecommerce_rating.csv', nrows=1000)

In [10]:
rating_scale = (1, 5)

In [11]:
reader = Reader(rating_scale=rating_scale)

In [12]:
data = Dataset.load_from_df(ratings[['UserId', 'ProductId', 'Rating']], reader)

In [13]:
trainset, testset = train_test_split(data, test_size=.2)

In [14]:
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNWithMeans(k=50, sim_options=sim_options)

In [15]:
results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.4125  1.3991  1.4538  1.4218  0.0233  
MAE (testset)     1.1138  1.1442  1.1866  1.1482  0.0298  
Fit time          0.02    0.02    0.01    0.01    0.00    
Test time         0.01    0.00    0.00    0.00    0.00    


In [17]:
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x791c33421f90>

In [18]:
def get_top_n(user_id, n):
    user_ratings = ratings[ratings['UserId'] == user_id]
    purchased_products = user_ratings['ProductId'].unique()
    top_n = []
    for product_id in ratings['ProductId'].unique():
        if product_id not in purchased_products:
            prediction = algo.predict(user_id, product_id)
            top_n.append((product_id, prediction.est))
    top_n.sort(key=lambda x: x[1], reverse=True)
    return top_n[:n]

In [19]:
top_10 = get_top_n(1, 10)
print(top_10)

[('0205616461', 4.14875), ('0558925278', 4.14875), ('0733001998', 4.14875), ('0737104473', 4.14875), ('0762451459', 4.14875), ('1304139212', 4.14875), ('1304139220', 4.14875), ('130414089X', 4.14875), ('130414643X', 4.14875), ('1304146537', 4.14875)]


In [26]:
param_grid = {'k': [20, 30, 40],
              'sim_options': {'name': ['msd', 'cosine'],
                              'min_support': [1, 5],
                              'user_based': [False]}
              }

grid_search = GridSearchCV(KNNWithMeans, param_grid, measures=['RMSE', 'MAE'], cv=3)
grid_search.fit(data)

print(grid_search.best_params)
print(grid_search.best_score)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matr

In [29]:
def get_product_details(product_id):
    product_details = ratings[ratings['ProductId'] == product_id]
    return product_details

for product_id, rating in top_10:
    product_details = get_product_details(product_id)
    print(f"Product ID: {product_id}, Rating: {rating}")

Product ID: 0205616461, Rating: 4.14875
Product ID: 0558925278, Rating: 4.14875
Product ID: 0733001998, Rating: 4.14875
Product ID: 0737104473, Rating: 4.14875
Product ID: 0762451459, Rating: 4.14875
Product ID: 1304139212, Rating: 4.14875
Product ID: 1304139220, Rating: 4.14875
Product ID: 130414089X, Rating: 4.14875
Product ID: 130414643X, Rating: 4.14875
Product ID: 1304146537, Rating: 4.14875
