In [95]:
from surprise import accuracy
from surprise import *
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader
import pandas as pd
from surprise import NMF
from surprise.model_selection import train_test_split
from surprise import accuracy
from collections import defaultdict

# Load the data from a CSV file
df = pd.read_csv("selected_item_based.csv")
ratings = df.drop("Unnamed: 0", axis=1)
ratings.head()

# Define a reader with the rating scale
reader = Reader(rating_scale=(1, 5))  # Assuming the ratings are from 1 to 5

def get_top_n(predictions, n=10):
    # Create a dictionary to store recommendations for each user
    top_n = defaultdict(list)

    # Group the predictions by user ID
    for user_id, item_id, true_rating, estimated_rating, _ in predictions:
        top_n[user_id].append((item_id, estimated_rating))

    # Sort the items for each user and retrieve the top-N recommendations
    for user_id, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[user_id] = user_ratings[:n]
    print()
    print("*"*100)
    return top_n

# Load the data from the DataFrame into the surprise dataset
dataset = Dataset.load_from_df(ratings[['reviewerID', 'asin', 'overall']], reader)

trainset, testset = train_test_split(dataset, test_size=0.4)

model = SVD()

model.fit(dataset.build_full_trainset())

import pickle

with open('Model/svd_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [96]:
rat = ratings["reviewerID"].value_counts()
rat1 = rat.reset_index()
print(rat1.shape)
rat1["count"] = rat1["count"].astype(int)
rat1 = rat1[rat1["count"] > 0]
rat1.shape

(272034, 2)


(272034, 2)

In [97]:
rat2 = rat1[rat1["count"] == 1]
rat2

Unnamed: 0,reviewerID,count
135815,A3UC1OXZKWOU4Y,1
135816,A1UO6L82IJH46H,1
135817,A11AV3E7SOVWB9,1
135818,A2GW2PQSA9ZVZX,1
135819,A2EYZJBTX8X9XU,1
...,...,...
272029,A1936PL893C0BF,1
272030,A15I8JXQYS3YKJ,1
272031,AYF2R8J20JKM9,1
272032,A8VOWY1K80AJV,1


In [98]:
ratings_new = pd.merge(ratings, rat2, on="reviewerID", how="inner")
ratings_new.shape

(136219, 4)

In [99]:
rat1.head()

Unnamed: 0,reviewerID,count
0,A3OA4DV4L81N1D,158
1,A5JLAU2ARJ0BO,136
2,A3OXHLG6DIBRW8,115
3,A2AY4YUOX2N1BQ,89
4,A11KZ906QD08C5,88


In [100]:
ratings.head()
ratings.shape

(640235, 3)

In [101]:
rr = ratings[ratings['reviewerID'] == "A3UC1OXZKWOU4Y"].sort_values(by="overall", ascending=False)
rr

Unnamed: 0,asin,overall,reviewerID
504594,B0073HSJ24,4,A3UC1OXZKWOU4Y


In [102]:
items = ratings["reviewerID"].unique()
test = [['A3UC1OXZKWOU4Y', iid, 3] for iid in items]
predictions = model.test(test)
pred = pd.DataFrame(predictions)
pred.head()

Unnamed: 0,uid,iid,r_ui,est,details
0,A3UC1OXZKWOU4Y,A3VVMIMMTYQV5F,3,4.273291,{'was_impossible': False}
1,A3UC1OXZKWOU4Y,A1XSPKZ8HHSBX2,3,4.273291,{'was_impossible': False}
2,A3UC1OXZKWOU4Y,A2ZEQ0WBLNQN7O,3,4.273291,{'was_impossible': False}
3,A3UC1OXZKWOU4Y,A2IC3RIPB6HKSQ,3,4.273291,{'was_impossible': False}
4,A3UC1OXZKWOU4Y,A1JHNR71TPEETW,3,4.273291,{'was_impossible': False}


In [103]:
top_n = get_top_n(predictions, n=10)

top_n


****************************************************************************************************


defaultdict(list,
            {'A3UC1OXZKWOU4Y': [('A3VVMIMMTYQV5F', 4.273291409318378),
              ('A1XSPKZ8HHSBX2', 4.273291409318378),
              ('A2ZEQ0WBLNQN7O', 4.273291409318378),
              ('A2IC3RIPB6HKSQ', 4.273291409318378),
              ('A1JHNR71TPEETW', 4.273291409318378),
              ('AESDP3GI6GYQD', 4.273291409318378),
              ('A3P18XS3FMKMGI', 4.273291409318378),
              ('AU7SO8B4M5UBA', 4.273291409318378),
              ('A1O9IEPW5RZRI3', 4.273291409318378),
              ('A27N2CH8M3O5JS', 4.273291409318378)]})

In [105]:
user_id = "A3UC1OXZKWOU4Y"
recommendations = top_n[user_id]
print("Top 10 recommendations for user '{}':".format(user_id))
for item_id, rating in recommendations:
    print(f"- Item ID: {item_id}, Estimated Rating: {rating}")

Top 10 recommendations for user 'A3UC1OXZKWOU4Y':
- Item ID: A3VVMIMMTYQV5F, Estimated Rating: 4.273291409318378
- Item ID: A1XSPKZ8HHSBX2, Estimated Rating: 4.273291409318378
- Item ID: A2ZEQ0WBLNQN7O, Estimated Rating: 4.273291409318378
- Item ID: A2IC3RIPB6HKSQ, Estimated Rating: 4.273291409318378
- Item ID: A1JHNR71TPEETW, Estimated Rating: 4.273291409318378
- Item ID: AESDP3GI6GYQD, Estimated Rating: 4.273291409318378
- Item ID: A3P18XS3FMKMGI, Estimated Rating: 4.273291409318378
- Item ID: AU7SO8B4M5UBA, Estimated Rating: 4.273291409318378
- Item ID: A1O9IEPW5RZRI3, Estimated Rating: 4.273291409318378
- Item ID: A27N2CH8M3O5JS, Estimated Rating: 4.273291409318378


In [17]:
pred = pred.sort_values(by="est", ascending=False)
pred.head(20)

Unnamed: 0,uid,iid,r_ui,est,details
0,A2ZEQ0WBLNQN7O,A3VVMIMMTYQV5F,3,4.345005,{'was_impossible': False}
181360,A2ZEQ0WBLNQN7O,A1WH7IXM8O4RAM,3,4.345005,{'was_impossible': False}
181346,A2ZEQ0WBLNQN7O,AHLNHGHPHVD9D,3,4.345005,{'was_impossible': False}
181347,A2ZEQ0WBLNQN7O,A209OZBOGROXR9,3,4.345005,{'was_impossible': False}
181348,A2ZEQ0WBLNQN7O,AIHMY4H2KK1DM,3,4.345005,{'was_impossible': False}
181349,A2ZEQ0WBLNQN7O,A2LDY0FOVKLUJH,3,4.345005,{'was_impossible': False}
181350,A2ZEQ0WBLNQN7O,A2ENP55O98C471,3,4.345005,{'was_impossible': False}
181351,A2ZEQ0WBLNQN7O,A2HCCCK7DKIB0K,3,4.345005,{'was_impossible': False}
181352,A2ZEQ0WBLNQN7O,ALIMT31706C2J,3,4.345005,{'was_impossible': False}
181353,A2ZEQ0WBLNQN7O,A3HSQFZMJL0BP4,3,4.345005,{'was_impossible': False}


In [14]:
pred = pred.sort_values(by="est", ascending=True)
pred.head(20)

Unnamed: 0,uid,iid,r_ui,est,details
0,A2ZEQ0WBLNQN7O,A3VVMIMMTYQV5F,4,4.345005,{'was_impossible': False}
64,A2ZEQ0WBLNQN7O,A25FL6VLD7S23S,4,4.345005,{'was_impossible': False}
34,A2ZEQ0WBLNQN7O,A2O7BAVY4B8YWE,4,4.345005,{'was_impossible': False}
33,A2ZEQ0WBLNQN7O,AHL3ES7DCWZJC,4,4.345005,{'was_impossible': False}
32,A2ZEQ0WBLNQN7O,A6YO1F6CSB8JT,4,4.345005,{'was_impossible': False}
31,A2ZEQ0WBLNQN7O,A3CIBA6QZK2262,4,4.345005,{'was_impossible': False}
2,A2ZEQ0WBLNQN7O,A2ZEQ0WBLNQN7O,4,4.345005,{'was_impossible': False}
3,A2ZEQ0WBLNQN7O,A2IC3RIPB6HKSQ,4,4.345005,{'was_impossible': False}
4,A2ZEQ0WBLNQN7O,A1JHNR71TPEETW,4,4.345005,{'was_impossible': False}
5,A2ZEQ0WBLNQN7O,AESDP3GI6GYQD,4,4.345005,{'was_impossible': False}


In [19]:
import pickle
import numpy as np

In [21]:
with open('Model/svd_model.pkl', 'rb') as fp:
    model = pickle.load(fp)

predictions = model.test(testset)

top_n = get_top_n(predictions, n=10)

top_n

defaultdict(list,
            {'A2B3T42QBKDFFX': [('B0077E493G', 4.474923625388678),
              ('B001TOD3JQ', 4.32988195914964)],
             'A2XJX9LP6VJB88': [('B004M8SWBU', 4.620006443853191)],
             'A1C8NYDVHEBEVU': [('B008H08HLE', 4.721078672616966),
              ('B003WD2SLQ', 4.286170986429988),
              ('B005BHAYGM', 4.12702485873497),
              ('B0024NKHHM', 2.9730150730758247)],
             'A1GKO7BP27PDC0': [('B001MRZLZ4', 3.1951141231047475)],
             'A3T5RGTJO1LY9E': [('B001CRBTDW', 4.165926445062882)],
             'A1EE1A1DBKLEAQ': [('B00CDS9EKE', 5)],
             'A2RCLDM8ANAV3I': [('B005Z4ROIW', 4.759071486665881),
              ('B009EC41SW', 4.32262612428546)],
             'A2SKEQT0WTB954': [('B004Y1AYAC', 5),
              ('B000PIZVBU', 5),
              ('B006U1YUZE', 5),
              ('B0083WYBHU', 5),
              ('B00004SABB', 4.983815980015564),
              ('B005HNNJXA', 4.909929926887222),
              ('B0083W35LI', 4

In [23]:
user_id = "A2ZEQ0WBLNQN7O"
recommendations = top_n[user_id]
print("Top 10 recommendations for user '{}':".format(user_id))
for item_id, rating in recommendations:
    print(f"- Item ID: {item_id}, Estimated Rating: {rating}")


Top 10 recommendations for user 'A2ZEQ0WBLNQN7O':


In [67]:
ratings_new.sort_values(by="count", ascending=False)

Unnamed: 0,asin,overall,reviewerID,count
11212,B002CGSYM6,5,A3OA4DV4L81N1D,158
11188,B0016J1EIQ,4,A3OA4DV4L81N1D,158
11194,B001CCAISE,5,A3OA4DV4L81N1D,158
11193,B001C6JA2A,5,A3OA4DV4L81N1D,158
11192,B001AXKY70,5,A3OA4DV4L81N1D,158
...,...,...,...,...
191387,B001QSVF9K,5,AD5G39PJUJVJ,5
103813,B0093IIGZQ,5,A38YOIR2H7TDDX,5
103822,B00009R6TA,5,A2BRJXUX4RI1SZ,5
103823,B001QK9I0Q,4,A2BRJXUX4RI1SZ,5


In [None]:
ratings_new[""]

In [70]:
items = ratings["reviewerID"].unique()
test = [['A3OA4DV4L81N1D', iid, 4] for iid in items]

In [72]:

# Get the user's profile
# user_profile = np.array([4.0, 5.0, 3.0])
user_profile = [(4.0, 5.0, 3.0)]

# Get the item vectors
# item_vectors = model.components_
# item_vectors
# Transform the user's profile using the SVD model
user_vector = model.test(test)

def get_top_n(predictions, n=10):
    # Create a dictionary to store recommendations for each user
    top_n = defaultdict(list)

    # Group the predictions by user ID
    for user_id, item_id, true_rating, estimated_rating, _ in predictions:
        top_n[user_id].append((item_id, estimated_rating))
    
    print()
    # Sort the items for each user and retrieve the top-N recommendations
    for user_id, user_ratings in top_n.items():
#         print(user_id, user_ratings)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[user_id] = user_ratings
    print()
    print("*"*100)
    return top_n

top_n = get_top_n(predictions, n=10)

top_n

user_id = "A3OA4DV4L81N1D"
recommendations = top_n[user_id]
print("Top 10 recommendations for user '{}':".format(user_id))
for item_id, rating in recommendations:
    print(f"- Item ID: {item_id}, Estimated Rating: {rating}")

# # Compute the cosine similarity between the user's vector and the vectors of all items
# item_vectors = model.components_
# item_scores = np.dot(user_vector, item_vectors.T)

# # Recommend the items with the highest scores
# recommended_items = np.argsort(item_scores)[-5:]

# # Print the recommended items
# print(recommended_items)



****************************************************************************************************
Top 10 recommendations for user 'A3OA4DV4L81N1D':


In [74]:
top_n["A2ZEQ0WBLNQN7O"]

[('A3VVMIMMTYQV5F', 4.331492523579733),
 ('A1XSPKZ8HHSBX2', 4.331492523579733),
 ('A2ZEQ0WBLNQN7O', 4.331492523579733),
 ('A2IC3RIPB6HKSQ', 4.331492523579733),
 ('A1JHNR71TPEETW', 4.331492523579733),
 ('AESDP3GI6GYQD', 4.331492523579733),
 ('A3P18XS3FMKMGI', 4.331492523579733),
 ('AU7SO8B4M5UBA', 4.331492523579733),
 ('A1O9IEPW5RZRI3', 4.331492523579733),
 ('A27N2CH8M3O5JS', 4.331492523579733),
 ('A3RS9C3RQLPRLU', 4.331492523579733),
 ('A2QQ09V4YOI90Y', 4.331492523579733),
 ('A2TNU7LSDHLIUR', 4.331492523579733),
 ('A11YGH316KJD3U', 4.331492523579733),
 ('A1T19R7OYFIHK7', 4.331492523579733),
 ('A2VZU7KPQMF3UJ', 4.331492523579733),
 ('AHGXPY9C0HJ5N', 4.331492523579733),
 ('A1X6761FMCSES2', 4.331492523579733),
 ('ABGD7DTR98ZHC', 4.331492523579733),
 ('A2TGQLJZ8BGO34', 4.331492523579733),
 ('A3G7Y8TENKTB6Y', 4.331492523579733),
 ('A1A9K9PPJYQS7D', 4.331492523579733),
 ('A11ZG4LM9IUEEV', 4.331492523579733),
 ('AHLC1B2T1T0FM', 4.331492523579733),
 ('A2CFPEXCG9K6E2', 4.331492523579733),
 ('A3

In [10]:
%%sh
python --version

Python 3.11.5


In [36]:
predictions

[Prediction(uid='A2ZEQ0WBLNQN7O', iid='A3VVMIMMTYQV5F', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='A1XSPKZ8HHSBX2', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='A2ZEQ0WBLNQN7O', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='A2IC3RIPB6HKSQ', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='A1JHNR71TPEETW', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='AESDP3GI6GYQD', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='A3P18XS3FMKMGI', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A2ZEQ0WBLNQN7O', iid='AU7SO8B4M5UBA', r_ui=3, est=4.331492523579733, details={'was_impossible': False}),
 Prediction(uid='A

In [107]:
%%sh
python --version

Python 3.12.0
