<a href="https://colab.research.google.com/github/rashanjotkaur/Recommendation-System/blob/main/Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Sample dataset of products
products_data = {
    'product_id': [1, 2, 3, 4, 5,6],
    'name': ['Wireless Headphones', 'Wireless Headphones 1','Bluetooth Speaker', 'Wired Headphones', 'Noise Cancelling Headphones', 'Portable Charger'],
    'description': ['Wireless sound system', 'Wireless sound system 1', 'High-quality bluetooth speaker', 'Durable wired headphones', 'Noise cancelling with Bluetooth', 'Compact and powerful charger'],
    'price': [220, 120, 180, 300, 700, 1000],
    'review_score': [4.5, 4.5, 4.8, 4.2, 4.9, 4.7]`
}

# Convert to DataFrame
products_df = pd.DataFrame(products_data)
print(products_df)

# Sample user profile
user_profile = {
    'user_id': 1,
    'name': 'Rashan',
    'spending_range': (100, 550)  # User generally spends between $100 and $550
}

In [None]:
# Function to recommend products with refined similarity calculation
def recommend_products(keyword, user_profile, products_df):
    # Combine 'name' and 'description' for better keyword matching
    products_df['combined_features'] = products_df['name'] + " " + products_df['description']

    # TF-IDF vectorization
    tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))  # Using n-grams for better context capture
    tfidf_matrix = tfidf.fit_transform(products_df['combined_features'])

    # Compute cosine similarity between the search keyword and product descriptions
    query_vec = tfidf.transform([keyword])
    similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Add similarity scores to the dataframe
    products_df['similarity_score'] = similarity_scores

    # Remove products with similarity score of 0
    products_df = products_df[products_df['similarity_score'] > 0].copy()

    # Split products into within-range and out-of-range based on user's spending range
    min_price, max_price = user_profile['spending_range']
    products_df.loc[:, 'within_range'] = products_df['price'].between(min_price, max_price)

    # Sort products: first by similarity_score, then by review_score, and prioritize within-range products
    products_sorted = products_df.sort_values(by=['similarity_score', 'review_score', 'within_range','price'],
                                              ascending=[False, False, False, True])

    return products_sorted[['name', 'price', 'review_score', 'within_range', 'similarity_score']]


In [None]:
# Example keyword search by user
keyword_search = "headphones"

# Get recommendations
recommendations = recommend_products(keyword_search, user_profile, products_df)

# Print recommendations
print(recommendations)

The below will work by combining similarity score, content based filtering, and collaborative filtering.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset of products
products_data = {
    'product_id': [1, 2, 3, 4, 5],
    'name': ['Wireless Headphones', 'Bluetooth Speaker', 'Wired Headphones', 'Noise Cancelling Headphones', 'Portable Charger'],
    'description': ['Wireless sound system', 'High-quality bluetooth speaker', 'Durable wired headphones', 'Noise cancelling with Bluetooth', 'Compact and powerful charger'],
    'price': [120, 180, 300, 700, 1000],
    'review_score': [4.5, 4.8, 4.2, 4.9, 4.7]
}

# Convert to DataFrame
products_df = pd.DataFrame(products_data)

# Sample user profile
user_profile = {
    'user_id': 101,
    'name': 'Rashan',
    'spending_range': (100, 550),  # User generally spends between $100 and $550
    'liked_product_ids': [1, 3]  # Products that the user has liked or interacted with
}

users = {
    'user_id': [101,102,103]
    'name': ['R','A','B','R']
    'liked_product_ids': [1, 1, 4, 5]  # Products that the user has liked or interacted with
}


# Function to compute TF-IDF similarity for product descriptions
def compute_tfidf_similarity(keyword, products_df):
    # Combine 'name' and 'description' for better keyword matching
    products_df['combined_features'] = products_df['name'] + " " + products_df['description']

    # TF-IDF vectorization
    tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
    tfidf_matrix = tfidf.fit_transform(products_df['combined_features'])

    # Compute cosine similarity between the search keyword and product descriptions
    query_vec = tfidf.transform([keyword])
    similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Add similarity scores to the dataframe
    products_df['similarity_score'] = similarity_scores
    return products_df

# Function to compute content-based similarity using user preferences
def content_based_filtering(user_profile, products_df):
    # Get products liked by the user
    liked_products = products_df[products_df['product_id'].isin(user_profile['liked_product_ids'])]
    return products_df

def collaborative_based_filtering(user_profile, products_df):
    # Get products liked by the user
    liked_products = products_df[products_df['product_id'].isin(user_profile['liked_product_ids'])]
    return products_df


# Function to recommend products
def recommend_products(keyword, user_profile, products_df):
    # Compute TF-IDF similarity for the search keyword
    products_df = compute_tfidf_similarity(keyword, products_df)

    # Compute content-based similarity based on user preferences
    products_df = content_based_filtering(user_profile, products_df)

    # Filter out products with zero similarity score
    products_df = products_df[products_df['similarity_score'] > 0].copy()

    # Prioritize products within the user's spending range
    min_price, max_price = user_profile['spending_range']
    products_df['within_range'] = products_df['price'].between(min_price, max_price)

    # Combine similarity scores (weighted sum of TF-IDF and content-based similarity)
    products_df['combined_score'] = 0.6 * products_df['similarity_score'] + 0.4 * products_df['content_similarity']

    # Sort products by the combined score, review score, and prioritize within-range products
    products_sorted = products_df.sort_values(by=['combined_score', 'review_score', 'within_range'],
                                              ascending=[False, False, False])

    return products_sorted[['name', 'price', 'review_score', 'within_range', 'similarity_score', 'content_similarity', 'combined_score']]

# Example keyword search by user
keyword_search = "headphones"

# Get recommendations
recommendations = recommend_products(keyword_search, user_profile, products_df)

# Print recommendations
print(recommendations)
