In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load your cleaned CSV
df = pd.read_csv("amazon_cleaned.csv")  # Make sure this file exists

# Show first 5 rows
print(df.head())

   product_id                                       product_name  \
0  B07JW9H4J1  Wayona Nylon Braided USB to Lightning Fast Cha...   
1  B098NS6PVG  Ambrane Unbreakable 60W / 3A Fast Charging 1.5...   
2  B096MSW6CT  Sounce Fast Phone Charging Cable & Data Sync U...   
3  B08HDJ86NZ  boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...   
4  B08CF3B7N1  Portronics Konnect L 1.2M Fast Charging 3A 8 P...   

                                            category  discounted_price  \
0  Computers&Accessories|Accessories&Peripherals|...             399.0   
1  Computers&Accessories|Accessories&Peripherals|...             199.0   
2  Computers&Accessories|Accessories&Peripherals|...             199.0   
3  Computers&Accessories|Accessories&Peripherals|...             329.0   
4  Computers&Accessories|Accessories&Peripherals|...             154.0   

   actual_price  discount_percentage  rating  rating_count  \
0        1099.0                   64     4.2         24269   
1         349.0       

In [3]:
# Keep only user_id, product_name, and rating
df = df[['user_id', 'product_name', 'rating']].dropna()

In [4]:
# Create a table: rows = users, columns = products, values = ratings
user_item_matrix = df.pivot_table(index='user_id', columns='product_name', values='rating').fillna(0)

In [5]:
# Compute cosine similarity between users
similarity_matrix = cosine_similarity(user_item_matrix)

# Convert to DataFrame for easy use
similar_users_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

In [6]:
def recommend_products(user_id, top_n=5):
    if user_id not in similar_users_df.index:
        print("User not found in dataset.")
        return []

    # Find top 5 similar users (excluding the user themself)
    similar_users = similar_users_df[user_id].sort_values(ascending=False)[1:6]

    # Average ratings of similar users
    similar_users_ratings = user_item_matrix.loc[similar_users.index]
    avg_ratings = similar_users_ratings.mean(axis=0)

    # Remove products already rated by the target user
    rated_products = user_item_matrix.loc[user_id]
    unrated_products = avg_ratings[rated_products == 0]

    # Get top N recommendations
    top_recommendations = unrated_products.sort_values(ascending=False).head(top_n)

    return top_recommendations


In [10]:
# Print a few available users
print("Available users:", list(user_item_matrix.index)[:5])

# Choose a valid user ID
user_id = user_item_matrix.index[0]  # You can replace with any valid ID

# Get recommendations
recommendations = recommend_products(user_id)

# Show the recommendations
print(f"\n🎯 Top Recommendations for user {user_id}:")
for product, score in recommendations.items():
    print(f"{product} — Predicted Score: {score:.2f}")


Available users: ['AE22Y3KIS7SE6LI3HE2VS6WWPU4Q,AHWEYO2IJ5I5GDWZAHJK6NGYHFMA,AGYURQ3476BNT4D2O46THXEUY3SA,AFPMBSBIEX45OQ6UCQWPDG55GWLQ,AGWJU3WUQBDQYPSYAJSR3AKBLCOA,AEOVUNFCIFV223O536GVW5JHZKOA', 'AE23RS3W7GZO7LHYKJU6KSKVM4MQ,AEQUNEY6GQOTEGUMS6KRUEYNXJSQ,AGYPIE5BICV44WEEEPJVEFQOCJSQ,AFR7CEQKWZE53IHHOWBIPAMYKL4Q,AGBV7FBP4SEITF6UKRFKTV7O32IA,AHQVOY54QKPIQZIJ57JKCGQPVV3Q,AEMCVRRD3XQRGFHC2VFCXHJEMESQ,AFBWXU7DUWCIK5MRDCLBXWTWN7ZQ', 'AE242TR3GQ6TYC6W4SJ5UYYKBTYQ', 'AE27UOZENYSWCQVQRRUQIV2ZM7VA,AGMYSLV6NNOAYES25JDTJPCZY47A,AFHS33MWRQGSS64EETZJGCBWXXXA,AHYXZVXUY3QTBP7IBFIUBSZVH2XQ,AH2SHWYEWDAK6A5Y2ZBEMZ2KIG3A,AEYMOGP2CYRKYZ7TIDNLGR5QPZ4Q,AGPGDCCXPI3EACMNJKBCNT57DVFA,AFPBMRYRSMD3PP3CBKLFF7EKOCXA', 'AE2JTMRKTUOIVIZWS2WDGTMNTU4Q,AF4QXCB32VC2DVE7O3DGFNQVFFNQ,AGAFYHMPFGVPR3MOS4QAZLAWPW3A,AGNNWLEF6V57TKIFJM7SWHNFAIQQ,AFVIPOPKMOCVCX3CMXUJHMWDIMGA,AH6MFUU725GG4KA3XTALSTU2ILHA,AGQYTSKE2UBYARZYRBADQMX6BJPQ,AG7F66F724JZ2HIJQY7NOU5M5D2Q']

🎯 Top Recommendations for user AE22Y3KIS7SE6LI3HE2VS6WWPU4Q,AHWEYO2