# Recommendation Model
1. [Import Libraries](#1)
2. [Load Processed Data](#2)
3. [Content-Based Filtering](#3)
4. [Collaborative Filtering](#4)
5. [Initialize Recommenders](#5)
6. [Saving Models](#6)

<a name="1"></a>
## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import pickle

<a name = "2" ></a>
## 2. Loading Processesd Data

In [2]:
df = pd.read_csv('processed_products.csv')

<a name="3"></a>
## 3. Content-Based Filtering

In [3]:
class ContentBasedRecommender:
    def __init__(self, data):
        self.df = data
        self.similarity_matrix = None

    def prepare_similarity_matrix(self):
        features = ['Price', 'Rating', 'Main Category',  'Sub Category', 'Discount', 'Popularity']
        feature_matrix = self.df[features]
        self.similarity_matrix = cosine_similarity(feature_matrix)
    
    def recommend_products(self, product_name, n=5):
        if self.similarity_matrix is None:
            self.prepare_similarity_matrix()
        idx = self.df[self.df['Product Name'] == product_name].index[0]
        sim_scores = list(enumerate(self.similarity_matrix[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:n+1]
        product_indices = [i[0] for i in sim_scores]
        return self.df.iloc[product_indices]
    
# Initialize recommenders
content_rec = ContentBasedRecommender(df)
content_rec.prepare_similarity_matrix()

## Testing Content-Based Recommendation

In [4]:
from difflib import get_close_matches

def get_matching_product(user_query, df, top_n=1):
    """Find product names matching user input based on keywords."""
    user_query = user_query.lower()
    matches = df[df['Product Name'].str.lower().str.contains(user_query)]

    if not matches.empty:
        return matches['Product Name'].values[:top_n]
    
    # fallback: fuzzy match
    all_names = df['Product Name'].tolist()
    fuzzy_matches = get_close_matches(user_query, all_names, n=top_n, cutoff=0.3)
    return fuzzy_matches

In [5]:
user_input = input("Enter a product name to get recommendation: ")   # e.g : Headphones

matching_products = get_matching_product(user_input, df, top_n=1)

if matching_products:
    print(f"\nShowing recommendations based on: {matching_products[0]}")
    recommendations = content_rec.recommend_products(matching_products[0], n=5)
    print("\nTop 5 Recommended Products: ")
    print(recommendations['Product Name'])
else:
    print("No matchine product found, Try searching with different words.")


Showing recommendations based on: Chander-Brahmbhatt Numquam Pro Headphones 7

Top 5 Recommended Products: 
2848                 Khalsa Ltd Illum Ultra Headphones 15
1409           Upadhyay LLC Repudiandae Lite Headphones 9
3073                Vora-Iyengar Quidem Plus Headphones 6
1515                Doshi-Kuruvilla Cum Lite Headphones 1
2128    Chauhan, Chakrabarti and Patil Nemo Ultra Head...
Name: Product Name, dtype: object


## 4. Collaborative Filtering (User-Item Matrix approach)

In [39]:
class CollaborativeFilteringRecommender:
    def __init__(self, data):
        self.df = data
        self.model = None
        self.user_item_matrix = None

    def create_user_item_matrix(self):
        # Creating synthetic user-item interactions
        user_ids = []
        products_ids = []
        ratings = []

        for _, row in self.df.iterrows():
            num_users = max(1, int(row['Number of Buyers'] / 1000)) # scale down
            for user_num in range(num_users):
                user_id = f"user_{user_num}"
                rating = max(1, min(5, row['Rating'] * 5 + np.random.normal(0, 0.5)))
                user_ids.append(user_id)
                products_ids.append(row['Product Name'])
                ratings.append(rating)
        
        # Creating DataFrame
        interactions = pd.DataFrame({
            'user_id': user_ids,
            'product_id': products_ids,
            'rating' : ratings
        })
        # Creating Pivot Table
        self.user_item_matrix = interactions.pivot_table(
            index='user_id',
            columns='product_id',
            values='rating'
        ).fillna(0)

        # Convert to spare matrix
        self.sparse_matrix = csr_matrix(self.user_item_matrix.values)
    
    def train_model(self):
        if self.user_item_matrix is None:
            self.create_user_item_matrix()
        
        # KNN model for user-based collaborative filtering
        self.model = NearestNeighbors(metric='cosine', algorithm='brute')
        self.model.fit(self.sparse_matrix)
    
    def recommend_products(self, user_id, n=5):
        if self.model is None:
            self.train_model()

        # Find similar users
        user_idx = self.user_item_matrix.index.get_loc(user_id)
        distances, indices = self.model.kneighbors(
            self.sparse_matrix[user_idx], n_neighbors=n+1
        )

        # Get products from similar users
        similar_users = self.user_item_matrix.iloc[indices[0]].index[1:]
        recommendations = self.user_item_matrix.loc[similar_users].mean(axis=0)
        recommendations = recommendations.sort_values(ascending=False)[:n]

        # Get product details
        recommended_products = []
        for product_id in recommendations.index:
            product_details = self.df[self.df['Product Name'] == product_id].iloc[0]
            recommended_products.append(product_details)

        return pd.DataFrame(recommended_products)


# Initialize recommenders
collab_rec = CollaborativeFilteringRecommender(df)
collab_rec.train_model()      


## 5. Save Models

In [41]:
with open('content_based_model.pkl', 'wb') as f:
    pickle.dump(content_rec, f)

In [42]:
with open('collab_filter_model.pkl', 'wb') as f:
    pickle.dump(collab_rec, f)