In [1]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import SVD, SVDpp, NMF, SlopeOne, KNNBasic, KNNBaseline, KNNWithMeans, KNNWithZScore, CoClustering, BaselineOnly
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

In [2]:
# Load data from Review.csv
df = pd.read_csv('Files/Review.csv')

In [3]:
# Define the Reader object
reader = Reader(rating_scale=(1, 5))

In [4]:
# Load data into the Surprise Dataset format
data = Dataset.load_from_df(df[['customer_id', 'product_id', 'customer_rating']], reader)

In [5]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [6]:
# Train the SVDpp algorithm on the training set
algo = SVDpp()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x1e1e736e280>

In [7]:
# Make predictions on the test set
predictions = algo.test(testset)

In [8]:
# Calculate and print the RMSE and MAE
print('SVDpp -- RMSE:', accuracy.rmse(predictions))
print('SVDpp -- MAE:', accuracy.mae(predictions))

RMSE: 0.9814
SVDpp -- RMSE: 0.9814110697005336
MAE:  0.6602
SVDpp -- MAE: 0.6602386330999442


In [9]:
def recommend_products(customer_id, num_recommendations=10):
    # Get a list of all product IDs
    product_ids = df['product_id'].unique()

    # Create a list of tuples containing the product ID and predicted rating for the given customer
    predictions = [(iid, algo.predict(customer_id, iid).est) for iid in product_ids]

    # Sort the predictions by predicted rating (highest first) and select the top N recommendations
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:num_recommendations]

    # Extract the product IDs from the recommendation tuples and create a new DataFrame with product names and ratings
    recommended_products = pd.DataFrame(recommendations, columns=['product_id', 'rating'])
    products_df = pd.read_csv('Files/Product.csv')
    recommended_products = recommended_products.merge(products_df[['product_id', 'product_name']], on='product_id')
    recommended_products = recommended_products[['product_id', 'product_name', 'rating']]

    return recommended_products

In [10]:
customer_id = 'A3HPCRD9RX351S'
recommended_products = recommend_products(customer_id)

In [11]:
recommended_products.head()

Unnamed: 0,product_id,product_name,rating
0,74558946,iPad Pro 11 inch (2020) 128GB Wifi - Hàng Ch...,4.922984
1,54582248,Bộ Vi Xử Lý CPU AMD Ryzen 3 3100 Processors - ...,4.899782
2,53056161,Phần mềm Microsoft 365 Family English APAC EM ...,4.897587
3,4679003,Giá Treo Màn Máy Tính Góc Xoay Đa Hướng Nhập K...,4.895543
4,71896003,Đồng Hồ Thông Minh Apple Watch Series 6 LTE GP...,4.883856
