In [12]:
import pandas as pd
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load data
expo_data = pd.read_excel(r"C:\Users\Moksh\Dropbox\PC\Downloads\Expo Master.xlsx")

# Preprocessing
expo_data['cleaned_description'] = expo_data['description'].astype(str).str.replace("_x000D_\n", " ").str.strip()
expo_data['combined_text'] = expo_data['name'] + " " + expo_data['cleaned_description'] + " " + expo_data['categories'].astype(str)

# Simulate user search history and user ratings
def generate_search_terms():
    terms = ['durable', 'eco-friendly', 'stylish', 'modern', 'vintage', 'high-quality', 'cheap', 'luxury', 'compact', 'spacious']
    return random.sample(terms, random.randint(1, 5))

expo_data['user_search_history'] = [generate_search_terms() for _ in range(len(expo_data))]
expo_data['user_rating'] = [random.randint(1, 5) for _ in range(len(expo_data))]

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.85)
tfidf_matrix = tfidf_vectorizer.fit_transform(expo_data['combined_text'])

# Content-Based Filtering
def get_user_profile(search_history):
    profile = " ".join(expo_data[expo_data['combined_text'].str.contains('|'.join(search_history))]['combined_text'])
    return profile

def content_based_recommendations(search_history):
    user_profile = get_user_profile(search_history)
    user_vector = tfidf_vectorizer.transform([user_profile])
    cosine_sim_user = linear_kernel(user_vector, tfidf_matrix).flatten()
    recommended_indices = cosine_sim_user.argsort()[-10:][::-1]
    return recommended_indices

# Collaborative Filtering
def adjust_by_ratings(recommended_indices, ratings):
    recommended_ratings = ratings[recommended_indices]
    sorted_indices = [index for _, index in sorted(zip(recommended_ratings, recommended_indices), reverse=True)]
    return sorted_indices

# Hybrid Recommendation System
def hybrid_recommendations(search_history, num_recommendations=5):
    recommended_indices = content_based_recommendations(search_history)
    final_indices = adjust_by_ratings(recommended_indices, expo_data['user_rating'].values)
    return expo_data['name'].iloc[final_indices[:num_recommendations]]

# Test the system
test_search_history = expo_data['user_search_history'].iloc[1]
print(test_search_history)
recommended_products = hybrid_recommendations(test_search_history)
print(recommended_products)

['vintage', 'luxury', 'spacious', 'compact', 'durable']
6192        Kadai
6183    Casserole
6182    Casserole
6181    Casserole
6210         Tawa
Name: name, dtype: object


In [13]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the updated data
updated_expo_data = pd.read_excel(r"C:\Users\Moksh\Dropbox\PC\Downloads\Updated_Expo_Master_v3.xlsx")

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.85)
tfidf_matrix = tfidf_vectorizer.fit_transform(updated_expo_data['combined_text'])

# Define the function to simulate user search history
def generate_search_terms():
    terms = ['durable', 'eco-friendly', 'stylish', 'modern', 'vintage', 'high-quality', 'cheap', 'luxury', 'compact', 'spacious']
    return random.sample(terms, random.randint(1, 5))

# Function to build user profile based on search history
def get_user_profile(search_history):
    profile = " ".join(updated_expo_data[updated_expo_data['combined_text'].str.contains('|'.join(search_history))]['combined_text'])
    return profile

# Content-Based Filtering incorporating Click Through Rate
def content_based_recommendations(search_history):
    user_profile = get_user_profile(search_history)
    user_vector = tfidf_vectorizer.transform([user_profile])
    cosine_sim_user = linear_kernel(user_vector, tfidf_matrix).flatten()
    
    # Introduce a CTR-based boost
    cosine_sim_user = cosine_sim_user * updated_expo_data['click_through_rate'].values
    
    # Get top 10 product indices based on adjusted similarity
    recommended_indices = cosine_sim_user.argsort()[-10:][::-1]
    return recommended_indices

# Collaborative Filtering adjusted by ratings and conversion rate
def adjust_by_ratings_and_conversion(recommended_indices):
    recommended_ratings = updated_expo_data['user_rating'].values[recommended_indices]
    recommended_conversion = updated_expo_data['conversion_rate'].values[recommended_indices]
    
    # Weighted sum of ratings and conversion rates to get a combined score for each product
    combined_scores = recommended_ratings + recommended_conversion
    
    # Sort product indices based on combined scores
    sorted_indices = [index for _, index in sorted(zip(combined_scores, recommended_indices), reverse=True)]
    return sorted_indices

# Hybrid Recommendation System
def updated_hybrid_recommendations(search_history, num_recommendations=5):
    recommended_indices = content_based_recommendations(search_history)
    final_indices = adjust_by_ratings_and_conversion(recommended_indices)
    return updated_expo_data['name'].iloc[final_indices[:num_recommendations]]


In [14]:
# Test the updated system
test_search_history = updated_expo_data['user_search_history'].iloc[5]
updated_recommended_products = updated_hybrid_recommendations(test_search_history)
updated_recommended_products

10698     Cello Creeper Dinner Set
10729    Blooming Daisy Dinner Set
10579      Blue Creeper Dinner Set
10743    Amazon Creeper Dinner Set
4360         Blue Swirl Dinner Set
Name: name, dtype: object