# How It Works

Preprocessing: Combines relevant features into a single column (clean_text).

Embedding Generation: Creates vector representations of product descriptions using Sentence-BERT.

Cosine Similarity: Matches the query to the most similar product embeddings.

Recommendation: Returns the top-k recommendations based on similarity scores.

# Load Dataset

In [1]:
import pandas as pd

df = pd.read_csv("/content/data.csv")
df.head()

ModuleNotFoundError: No module named 'pandas'

# Preprocessing & Cleaning

In [16]:
# Combine relevant features into a single text column
def clean_text(row):
    title = row['title'] if pd.notna(row['title']) else ""
    brand = row['brand'] if pd.notna(row['brand']) else ""
    category = row['category'] if pd.notna(row['category']) else ""
    specs = ' '.join([f"{k}:{v}" for k, v in eval(row['specifications']).items() if v]) if pd.notna(row['specifications']) else ""
    return f"{title} {brand} {category} {specs}".lower()

df['clean_text'] = df.apply(clean_text, axis=1)
df = df.dropna(subset=['clean_text'])  # Drop rows without descriptions


# Create Embeddings

In [17]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for product descriptions
df['embeddings'] = df['clean_text'].apply(lambda x: model.encode(x))


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Load and Save Embeddings

In [18]:
# Save the embeddings to a file
df.to_pickle('product_embeddings.pkl')

# # Load the embeddings later
df = pd.read_pickle('product_embeddings.pkl')

# Recommendation Engine

In [21]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Function to get recommendations based on a user query
def recommend_products(query, top_k=5):
    # lower case
    query = query.lower()
    # Embed the query
    query_embedding = model.encode(query)

    # Compute cosine similarity with all products
    df['similarity'] = df['embeddings'].apply(lambda x: cosine_similarity([query_embedding], [x]).flatten()[0])

    # Sort products by similarity score
    recommendations = df.sort_values(by='similarity', ascending=False).head(top_k)
    return recommendations[['title', 'brand', 'category', 'similarity','imgs']]

# Example usage
query = "8GB RAM smartphone"
recommendations = recommend_products(query)
recommendations

Unnamed: 0,title,brand,category,similarity,imgs
524,Samsung Galaxy Tab S8 Ultra 12GB (RAM) + 256GB...,Samsung,Mobile,0.59769,['https://www.czone.com.pk/images/thumbnails-l...
516,"Samsung Galaxy Tab S8 11"" 128GB | Czone.com.pk",Samsung,Mobile,0.550169,['https://www.czone.com.pk/images/thumbnails-l...
131,Samsung Galaxy A23 6GB Ram 128GB Storage LTE P...,Samsung,Mobile,0.542552,['https://www.mega.pk/items_images/Samsung+Gal...
130,Samsung Galaxy A53 8GB Ram 128GB Storage 5G PT...,Samsung,Mobile,0.540563,['https://www.mega.pk/items_images/Samsung+Gal...
488,"Samsung Galaxy Tab A7 Lite 8.7"" - 32GB",Samsung,Mobile,0.538458,['https://www.czone.com.pk/images/thumbnails-l...


In [22]:
# Example usage
query = "Oppo Smart phone"
recommendations = recommend_products(query)
recommendations

Unnamed: 0,title,brand,category,similarity,imgs
1358,Oppo A16e (Activated),,Mobile,0.725794,['https://images.priceoye.pk/oppo-a16e-activat...
1457,Oppo A54 (Activated),,Mobile,0.720386,['https://images.priceoye.pk/oppo-a54-activate...
1347,Oppo A16e,,Mobile,0.713075,['https://images.priceoye.pk/oppo-a16e-pakista...
1359,Oppo A54,,Mobile,0.710593,['https://images.priceoye.pk/oppo-a54-pakistan...
1489,Oppo A16,,Mobile,0.709044,['https://images.priceoye.pk/oppo-a16-pakistan...
