In [86]:
from pandas import pandas as pd
import numpy as np
import torch

In [68]:
## Read and process Data
df = pd.read_csv("amazon.csv")

In [69]:
df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","Satisfied,Charging is really fast,Value for mo...",Looks durable Charging is fine tooNo complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,"Compatible with all Type C enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","A Good Braided Cable for Your Type C Device,Go...",I ordered this cable to connect my phone to An...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& Data Sync】-With built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","Good speed for earlier versions,Good Product,W...","Not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...
3,B08HDJ86NZ,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...,Computers&Accessories|Accessories&Peripherals|...,₹329,₹699,53%,4.2,94363,The boAt Deuce USB 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","Good product,Good one,Nice,Really nice product...","Good product,long wire,Charges good,Nice,I bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...
4,B08CF3B7N1,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,Computers&Accessories|Accessories&Peripherals|...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FUNCTION]- This cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","As good as original,Decent,Good one for second...","Bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...


## Preprocess data

In [70]:
## Convert to lowercase
def preprocess_text(text):
    return text.lower()

df['product_name'] = df['product_name'].apply(preprocess_text)
df['category'] = df['category'].apply(preprocess_text)
df['about_product'] = df['about_product'].apply(preprocess_text)

# Combine product name and category text
df['combined_text'] = df['product_name'] + ' ' + df['category'] + ' ' + df['about_product']

# Keep only two columns
df = df[['product_name','category','about_product','combined_text']]


In [71]:
df.head(2)

Unnamed: 0,product_name,category,about_product,combined_text
0,wayona nylon braided usb to lightning fast cha...,computers&accessories|accessories&peripherals|...,high compatibility : compatible with iphone 12...,wayona nylon braided usb to lightning fast cha...
1,ambrane unbreakable 60w / 3a fast charging 1.5...,computers&accessories|accessories&peripherals|...,"compatible with all type c enabled devices, be...",ambrane unbreakable 60w / 3a fast charging 1.5...


## Create embeddings using sentence transformer

In [72]:
from sentence_transformers import SentenceTransformer
# Load a pre-trained sentence transformer model (e.g., 'bert-base-nli-mean-tokens') https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens
model = SentenceTransformer('bert-base-nli-mean-tokens')

# Encode the combined text into embeddings
df['combined_embeddings'] = df['combined_text'].apply(lambda x: model.encode(x))


In [88]:
df.head()

Unnamed: 0,product_name,category,about_product,combined_text,combined_embeddings
0,wayona nylon braided usb to lightning fast cha...,computers&accessories|accessories&peripherals|...,high compatibility : compatible with iphone 12...,wayona nylon braided usb to lightning fast cha...,"[-0.47553006, 0.76456255, 0.2630146, 0.1100741..."
1,ambrane unbreakable 60w / 3a fast charging 1.5...,computers&accessories|accessories&peripherals|...,"compatible with all type c enabled devices, be...",ambrane unbreakable 60w / 3a fast charging 1.5...,"[-0.5932936, 0.9773711, 0.04942418, 0.2643483,..."
2,sounce fast phone charging cable & data sync u...,computers&accessories|accessories&peripherals|...,【 fast charger& data sync】-with built-in safet...,sounce fast phone charging cable & data sync u...,"[-0.43358973, 0.7435666, 0.27349097, 0.2739624..."
3,boat deuce usb 300 2 in 1 type-c & micro usb s...,computers&accessories|accessories&peripherals|...,the boat deuce usb 300 2 in 1 cable is compati...,boat deuce usb 300 2 in 1 type-c & micro usb s...,"[-0.5192799, 0.46103123, -0.21505368, 0.534678..."
4,portronics konnect l 1.2m fast charging 3a 8 p...,computers&accessories|accessories&peripherals|...,[charge & sync function]- this cable comes wit...,portronics konnect l 1.2m fast charging 3a 8 p...,"[-0.4897875, 0.38213462, -0.009049115, 0.23148..."


In [89]:
df['combined_embeddings']

0       [-0.47553006, 0.76456255, 0.2630146, 0.1100741...
1       [-0.5932936, 0.9773711, 0.04942418, 0.2643483,...
2       [-0.43358973, 0.7435666, 0.27349097, 0.2739624...
3       [-0.5192799, 0.46103123, -0.21505368, 0.534678...
4       [-0.4897875, 0.38213462, -0.009049115, 0.23148...
                              ...                        
1460    [-0.61356914, 0.9325824, 0.32966587, 0.1548301...
1461    [-0.4910463, 1.3314799, -0.36481798, 0.2186922...
1462    [-0.25516573, 1.1415099, -0.052242536, 0.29591...
1463    [-0.106983595, 1.0955564, -0.17530212, 0.23177...
1464    [-0.5415292, 0.89888835, -0.74178505, 0.113858...
Name: combined_embeddings, Length: 1465, dtype: object

In [111]:
# ## Cosine similarity is a metric used to measure the similarity of two vectors.
from sentence_transformers import util

# Function to get similar items based on combined name and category
def get_similar_items(combined_input, df, top_n=5):
    
    combined_embedding = model.encode(combined_input)
    
    # Calculate cosine similarity between the combined input and all other combined texts
    similarities = util.pytorch_cos_sim(combined_embedding, df['combined_embeddings'])

    # Get the indices of top N similar items
    similar_indices = similarities.argsort(descending=True, axis=1)[0][:top_n]
 
    # Retrieve the similar items from the DataFrame
    similar_items_df = df.iloc[similar_indices][['product_name', 'category', 'about_product']]

    return similar_items_df
    

In [112]:
# Get similar items based on combined name and category
product_info_to_search = "usb cable 1 meter"

similar_items = get_similar_items(product_info_to_search, df)


In [113]:
# Print the formatted output
print(f"Product: {product_info_to_search}")
print("\nSimilar Products:")
for idx, row in similar_items.iterrows():
    print(f"Product : {row['product_name']}")

Product: usb cable 1 meter

Similar Products:
Product : samsung original type c to c cable - 3.28 feet (1 meter), white
Product : samsung original type c to c cable - 3.28 feet (1 meter), white
Product : zebronics, zeb-nc3300 usb powered laptop cooling pad with dual fan, dual usb port and blue led lights
Product : mi usb type-c cable smartphone (black)
Product : mi usb type-c cable smartphone (black)


## Save embeddings

In [80]:
import pickle

In [114]:
pickle.dump(df, open('data_embeddings.pkl', 'wb'))

In [116]:
## Load pickle
data = pickle.load(open('data_embeddings.pkl', 'rb'))

In [117]:
data.head()

Unnamed: 0,product_name,category,about_product,combined_text,combined_embeddings
0,wayona nylon braided usb to lightning fast cha...,computers&accessories|accessories&peripherals|...,high compatibility : compatible with iphone 12...,wayona nylon braided usb to lightning fast cha...,"[-0.47553006, 0.76456255, 0.2630146, 0.1100741..."
1,ambrane unbreakable 60w / 3a fast charging 1.5...,computers&accessories|accessories&peripherals|...,"compatible with all type c enabled devices, be...",ambrane unbreakable 60w / 3a fast charging 1.5...,"[-0.5932936, 0.9773711, 0.04942418, 0.2643483,..."
2,sounce fast phone charging cable & data sync u...,computers&accessories|accessories&peripherals|...,【 fast charger& data sync】-with built-in safet...,sounce fast phone charging cable & data sync u...,"[-0.43358973, 0.7435666, 0.27349097, 0.2739624..."
3,boat deuce usb 300 2 in 1 type-c & micro usb s...,computers&accessories|accessories&peripherals|...,the boat deuce usb 300 2 in 1 cable is compati...,boat deuce usb 300 2 in 1 type-c & micro usb s...,"[-0.5192799, 0.46103123, -0.21505368, 0.534678..."
4,portronics konnect l 1.2m fast charging 3a 8 p...,computers&accessories|accessories&peripherals|...,[charge & sync function]- this cable comes wit...,portronics konnect l 1.2m fast charging 3a 8 p...,"[-0.4897875, 0.38213462, -0.009049115, 0.23148..."


In [120]:
# Get similar items based on combined name and category
product_info_to_search = "iphone 12"

similar_items = get_similar_items(product_info_to_search, data)

In [121]:
# Print the formatted output
print(f"Product: {product_info_to_search}")
print("\nSimilar Products:")
for idx, row in similar_items.iterrows():
    print(f"Product : {row['product_name']}")

Product: iphone 12

Similar Products:
Product : swapkart fast charging cable and data sync usb cable compatible for iphone 6/6s/7/7+/8/8+/10/11, 12, 13 pro max ipad air/mini, ipod and ios devices (white)
Product : swapkart fast charging cable and data sync usb cable compatible for iphone 6/6s/7/7+/8/8+/10/11, 12, 13 pro max ipad air/mini, ipod and ios devices (white)
Product : duracell chhota power aa battery set of 10 pcs
Product : eveready red 1012 aaa batteries - pack of 10
Product : samsung galaxy m13 5g (aqua green, 6gb, 128gb storage) | 5000mah battery | upto 12gb ram with ram plus
