In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('/content/data.csv')
df.head()

Unnamed: 0,id,slug,title,imgs,brand,category,vendor,used,address,availability,...,discounted_price,specifications,description,delivery_fee,delivery_details,warranty,warranty_type,average_rating,num_ratings,reviews
0,0,https://www.mega.pk/mobiles_products/23522/Not...,Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...,['https://www.mega.pk/items_images/Nothing+Pho...,,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,"{'RAM': '8GB', 'Memory quantity': '', 'Interna...",,,,,,,,[]
1,1,https://www.mega.pk/mobiles_products/23458/Opp...,Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...,['https://www.mega.pk/items_images/Oppo+F21+Pr...,OPPO,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,"{'RAM': '8gb', 'Memory quantity': '', 'Interna...",,,,,,,,[]
2,2,https://www.mega.pk/mobiles_products/24393/Tec...,Tecno Spark 10,['https://www.mega.pk/items_images/Tecno+Spark...,Tecno,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,"{'RAM': '4GB,8GB', 'Memory quantity': '', 'Int...",,,,1 year,,,,[]
3,3,https://www.mega.pk/mobiles_products/24259/Viv...,Vivo V27 5G,['https://www.mega.pk/items_images/Vivo+V27+5G...,Vivo,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,"{'RAM': '8GB,12GB', 'Memory quantity': '', 'In...",,,,1 year,,,,[]
4,4,https://www.mega.pk/mobiles_products/24204/App...,Apple Iphone 15 Pro Max,['https://www.mega.pk/items_images/Apple+Iphon...,Apple,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,"{'RAM': '8GB', 'Memory quantity': '', 'Interna...",,,,,,,,[]


In [3]:
df.isnull().sum()

Unnamed: 0,0
id,0
slug,0
title,0
imgs,0
brand,893
category,0
vendor,0
used,0
address,1122
availability,587


In [4]:
df['specifications'][0]

"{'RAM': '8GB', 'Memory quantity': '', 'Internal storage space': '256GB', 'Main Camera Pixels': '50 MP, f/1.9, 24mm (wide), 1/1.56', 'Battery capacity': 'Li-Po 4500 mAh, non-removable', 'Screen size': '6.55 Inches', '5G Support': 'Yes', 'Finger Print': 'Yes', 'Display technology': 'OLED, 1B colors, 120Hz, HDR10+, 500 nits (typ), 700 nits (peak)', 'Display': '6.55 inches OLED, 1B colors', 'Number of colours': '1B', 'Scratch resistant display': '', 'Screen resolution': '1080 x 2400 pixels', 'Pixel density': '402 PPI', 'Dual screens': '', 'SD Card': 'Yes', 'SDIO': '', 'Compatible memory cards': '128GB 8GB RAM, 256GB 8GB RAM, 256GB 12GB RAM', 'Maximum amount of memory': '', 'Built-in camera': 'Yes', 'Auto focus': 'Yes', 'Built-in flash': 'Yes', 'Digital zoom (still image)': 'Yes', 'Maximum resolution (still)': '1080p 30fps', 'Number of cameras': '3', 'Front Camera Resolution': '16 MP, f/2.5, (wide), 1/3.1', 'Optical zoom': 'Yes', 'Video recorder': 'Yes', 'Digital zoom (video)': 'Yes', 'Max

In [5]:
def clean_text(row):
  title = row['title'] if pd.notna(row['title']) else ""
  brand = row['brand'] if pd.notna(row['brand']) else ""
  category = row['category'] if pd.notna(row['category']) else ""
  spec = ' '.join([f"{k}:{v}" for k,v in eval(row['specifications']).items() if v]) if pd.notna(row['specifications']) else ""
  return f"{title} {brand} {category} {spec}"

In [6]:
df['clean_text'] = df.apply(clean_text , axis = 1)

In [7]:
df.head()

Unnamed: 0,id,slug,title,imgs,brand,category,vendor,used,address,availability,...,specifications,description,delivery_fee,delivery_details,warranty,warranty_type,average_rating,num_ratings,reviews,clean_text
0,0,https://www.mega.pk/mobiles_products/23522/Not...,Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...,['https://www.mega.pk/items_images/Nothing+Pho...,,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,"{'RAM': '8GB', 'Memory quantity': '', 'Interna...",,,,,,,,[],Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...
1,1,https://www.mega.pk/mobiles_products/23458/Opp...,Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...,['https://www.mega.pk/items_images/Oppo+F21+Pr...,OPPO,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,"{'RAM': '8gb', 'Memory quantity': '', 'Interna...",,,,,,,,[],Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...
2,2,https://www.mega.pk/mobiles_products/24393/Tec...,Tecno Spark 10,['https://www.mega.pk/items_images/Tecno+Spark...,Tecno,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,"{'RAM': '4GB,8GB', 'Memory quantity': '', 'Int...",,,,1 year,,,,[],"Tecno Spark 10 Tecno Mobile RAM:4GB,8GB Inter..."
3,3,https://www.mega.pk/mobiles_products/24259/Viv...,Vivo V27 5G,['https://www.mega.pk/items_images/Vivo+V27+5G...,Vivo,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,"{'RAM': '8GB,12GB', 'Memory quantity': '', 'In...",,,,1 year,,,,[],"Vivo V27 5G Vivo Mobile RAM:8GB,12GB Internal..."
4,4,https://www.mega.pk/mobiles_products/24204/App...,Apple Iphone 15 Pro Max,['https://www.mega.pk/items_images/Apple+Iphon...,Apple,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,"{'RAM': '8GB', 'Memory quantity': '', 'Interna...",,,,,,,,[],Apple Iphone 15 Pro Max Apple Mobile RAM:8GB ...


In [9]:
df.isnull().sum()

Unnamed: 0,0
id,0
slug,0
title,0
imgs,0
brand,893
category,0
vendor,0
used,0
address,1122
availability,587


In [10]:
model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [11]:
df['embeddings'] = df['clean_text'].apply(lambda x : model.encode(x))

In [12]:
df.head()

Unnamed: 0,id,slug,title,imgs,brand,category,vendor,used,address,availability,...,description,delivery_fee,delivery_details,warranty,warranty_type,average_rating,num_ratings,reviews,clean_text,embeddings
0,0,https://www.mega.pk/mobiles_products/23522/Not...,Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...,['https://www.mega.pk/items_images/Nothing+Pho...,,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,,,,,,,[],Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...,"[-0.031639054, 0.024927748, 0.059493024, -0.07..."
1,1,https://www.mega.pk/mobiles_products/23458/Opp...,Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...,['https://www.mega.pk/items_images/Oppo+F21+Pr...,OPPO,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,,,,,,,[],Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...,"[-0.02028133, 0.02233131, 0.01556493, -0.01725..."
2,2,https://www.mega.pk/mobiles_products/24393/Tec...,Tecno Spark 10,['https://www.mega.pk/items_images/Tecno+Spark...,Tecno,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,,,1 year,,,,[],"Tecno Spark 10 Tecno Mobile RAM:4GB,8GB Inter...","[-0.057045326, -0.048345394, -0.040879004, -0...."
3,3,https://www.mega.pk/mobiles_products/24259/Viv...,Vivo V27 5G,['https://www.mega.pk/items_images/Vivo+V27+5G...,Vivo,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,,,1 year,,,,[],"Vivo V27 5G Vivo Mobile RAM:8GB,12GB Internal...","[-0.010266217, -0.039680757, 0.0074030417, -0...."
4,4,https://www.mega.pk/mobiles_products/24204/App...,Apple Iphone 15 Pro Max,['https://www.mega.pk/items_images/Apple+Iphon...,Apple,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",Coming Soon,...,,,,,,,,[],Apple Iphone 15 Pro Max Apple Mobile RAM:8GB ...,"[-0.0023281914, 0.0074312524, 0.043554112, -0...."


In [13]:
df.to_pickle('product_embeddings.pkl')

df = pd.read_pickle('product_embeddings.pkl')

In [14]:
def recommended_products(query , top_k = 5):

  query = query.lower()
  query_embedding = model.encode(query)
  df['similarity'] = df['embeddings'].apply(lambda x : cosine_similarity([query_embedding] , [x]).flatten()[0])
  recommendations = df.sort_values(by='similarity' , ascending=False).head(top_k)
  return recommendations[['title' , 'brand' , 'category', 'similarity' , 'imgs']]

In [16]:
q = 'samsung galaxy s22 ultra'
recommended_products(q)

Unnamed: 0,title,brand,category,similarity,imgs
1382,Samsung Galaxy S22 Ultra,,Mobile,0.745557,['https://images.priceoye.pk/samsung-galaxy-s2...
1429,Samsung Galaxy S22,,Mobile,0.681177,['https://images.priceoye.pk/samsung-galaxy-s2...
1534,Samsung Galaxy S23 Ultra,,Mobile,0.673594,['https://images.priceoye.pk/samsung-galaxy-s2...
145,Samsung Galaxy S22 Ultra 12GB Ram 256GB Storag...,Samsung,Mobile,0.654621,['https://www.mega.pk/items_images/Samsung+Gal...
128,Samsung Galaxy S22 Ultra 12GB Ram 512GB Storag...,Samsung,Mobile,0.647524,['https://www.mega.pk/items_images/Samsung+Gal...
