# Amazon Product Recommendation System

This notebook implements various recommendation approaches for Amazon products:
1. Content-based filtering
2. Item-based collaborative filtering
3. Hybrid recommendations

## 1. Data Preparation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Set display options
pd.set_option('display.max_columns', None)

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nouha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nouha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
# Load the dataset
df = pd.read_csv('../data/cleaned_amazon_data.csv')
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (1465, 17)


Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link,main_category
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269.0,High Compatibility : Compatible With iPhone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","Satisfied,Charging is really fast,Value for mo...",Looks durable Charging is fine tooNo complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,199.0,349.0,43.0,4.0,43994.0,"Compatible with all Type C enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","A Good Braided Cable for Your Type C Device,Go...",I ordered this cable to connect my phone to An...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,Computers&Accessories
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,199.0,1899.0,90.0,3.9,7928.0,【 Fast Charger& Data Sync】-With built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","Good speed for earlier versions,Good Product,W...","Not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,Computers&Accessories
3,B08HDJ86NZ,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...,Computers&Accessories|Accessories&Peripherals|...,329.0,699.0,53.0,4.2,94363.0,The boAt Deuce USB 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","Good product,Good one,Nice,Really nice product...","Good product,long wire,Charges good,Nice,I bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...,Computers&Accessories
4,B08CF3B7N1,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,Computers&Accessories|Accessories&Peripherals|...,154.0,399.0,61.0,4.2,16905.0,[CHARGE & SYNC FUNCTION]- This cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","As good as original,Decent,Good one for second...","Bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...,Computers&Accessories


### 1.1 Data Preprocessing

In [3]:
def preprocess_text(text):
    if isinstance(text, str):
        # Tokenize
        tokens = word_tokenize(text.lower())
        # Remove stopwords and non-alphabetic tokens
        stop_words = set(stopwords.words('english'))
        tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
        return ' '.join(tokens)
    return ''

## 2. Recommendation Models

### 2.1 Content-Based Filtering

In [4]:
class ContentBasedRecommender:
    def __init__(self):
        self.tfidf = TfidfVectorizer(stop_words='english')
        self.product_features = None
        self.products = None
        
    def fit(self, products, feature_columns):
        self.products = products
        # Combine all text features
        combined_features = products[feature_columns].fillna('').apply(lambda x: ' '.join(x), axis=1)
        # Create TF-IDF matrix
        self.product_features = self.tfidf.fit_transform(combined_features)
        
    def recommend(self, product_id, n_recommendations=5):
        # Calculate similarity scores
        product_idx = self.products.index.get_loc(product_id)
        similarity_scores = cosine_similarity(self.product_features[product_idx:product_idx+1], self.product_features)
        similar_indices = similarity_scores.argsort()[0][-n_recommendations-1:-1][::-1]
        
        return self.products.iloc[similar_indices]

### 2.2 Item-Based Collaborative Filtering

In [5]:
class ItemBasedCollaborativeFilter:
    def __init__(self):
        self.item_similarity_matrix = None
        self.products = None
        
    def fit(self, user_item_matrix):
        # Calculate item-item similarity matrix
        self.item_similarity_matrix = cosine_similarity(user_item_matrix.T)
        self.products = user_item_matrix.columns
        
    def recommend(self, product_id, n_recommendations=5):
        product_idx = list(self.products).index(product_id)
        similar_scores = self.item_similarity_matrix[product_idx]
        similar_products = [(self.products[i], similar_scores[i]) 
                          for i in range(len(self.products)) if i != product_idx]
        similar_products.sort(key=lambda x: x[1], reverse=True)
        return similar_products[:n_recommendations]

### 2.3 Hybrid Recommendations

In [6]:
class HybridRecommender:
    def __init__(self, content_weight=0.5):
        self.content_recommender = ContentBasedRecommender()
        self.collaborative_recommender = ItemBasedCollaborativeFilter()
        self.content_weight = content_weight
        
    def fit(self, products, feature_columns, user_item_matrix):
        self.content_recommender.fit(products, feature_columns)
        self.collaborative_recommender.fit(user_item_matrix)
        
    def recommend(self, product_id, n_recommendations=5):
        # Get recommendations from both models
        content_recs = self.content_recommender.recommend(product_id, n_recommendations)
        collab_recs = self.collaborative_recommender.recommend(product_id, n_recommendations)
        
        # Combine recommendations with weighted scoring
        # Implementation depends on the specific scoring mechanism you want to use
        return content_recs  # Placeholder - implement actual hybrid logic

## 3. Evaluation and Testing

In [7]:
def evaluate_recommendations(true_items, predicted_items):
    """Calculate precision, recall, and F1 score"""
    true_set = set(true_items)
    pred_set = set(predicted_items)
    
    precision = len(true_set.intersection(pred_set)) / len(pred_set) if pred_set else 0
    recall = len(true_set.intersection(pred_set)) / len(true_set) if true_set else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

### Visualization of Results

In [8]:
def plot_recommendation_metrics(metrics_dict):
    """Plot comparison of different recommendation approaches"""
    plt.figure(figsize=(10, 6))
    
    x = np.arange(len(metrics_dict))
    width = 0.25
    
    plt.bar(x - width, [m['precision'] for m in metrics_dict.values()], width, label='Precision')
    plt.bar(x, [m['recall'] for m in metrics_dict.values()], width, label='Recall')
    plt.bar(x + width, [m['f1'] for m in metrics_dict.values()], width, label='F1')
    
    plt.xlabel('Recommendation Approach')
    plt.ylabel('Score')
    plt.title('Comparison of Recommendation Approaches')
    plt.xticks(x, metrics_dict.keys())
    plt.legend()
    
    plt.tight_layout()
    plt.show()