In [1]:
import pandas as pd
import numpy as np
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [4]:
products = pd.read_csv("/content/All Electronics.csv.zip")

In [5]:
pd.set_option('display.max_colwidth', None)

In [9]:
products.head(1)

Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB Storage)","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJL._AC_UL320_.jpg,https://www.amazon.in/Redmi-Power-Black-128GB-Storage/dp/B09Y64H8VS/ref=sr_1_4?qid=1679133649&s=electronics&sr=1-4,4.0,965,"₹10,999","₹18,999"


In [10]:
products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9600 entries, 0 to 9599
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   name            9600 non-null   object
 1   main_category   9600 non-null   object
 2   sub_category    9600 non-null   object
 3   image           9600 non-null   object
 4   link            9600 non-null   object
 5   ratings         9505 non-null   object
 6   no_of_ratings   9505 non-null   object
 7   discount_price  9116 non-null   object
 8   actual_price    9530 non-null   object
dtypes: object(9)
memory usage: 675.1+ KB


In [11]:
products.isna().sum()

Unnamed: 0,0
name,0
main_category,0
sub_category,0
image,0
link,0
ratings,95
no_of_ratings,95
discount_price,484
actual_price,70


In [12]:
products.duplicated().sum()

np.int64(0)

In [13]:
products['#keywords'] = products['name'].str.lower()

In [14]:
products.head(2)

Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price,#keywords
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB Storage)","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJL._AC_UL320_.jpg,https://www.amazon.in/Redmi-Power-Black-128GB-Storage/dp/B09Y64H8VS/ref=sr_1_4?qid=1679133649&s=electronics&sr=1-4,4.0,965,"₹10,999","₹18,999","redmi 10 power (power black, 8gb ram, 128gb storage)"
1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM, 128GB Storage)","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71AvQd3VzqL._AC_UL320_.jpg,https://www.amazon.in/OnePlus-Nord-Lite-128GB-Storage/dp/B09WQYFLRX/ref=sr_1_5?qid=1679133649&s=electronics&sr=1-5,4.3,113956,"₹18,999","₹19,999","oneplus nord ce 2 lite 5g (blue tide, 6gb ram, 128gb storage)"


In [15]:
products['#keywords']

Unnamed: 0,#keywords
0,"redmi 10 power (power black, 8gb ram, 128gb storage)"
1,"oneplus nord ce 2 lite 5g (blue tide, 6gb ram, 128gb storage)"
2,"oneplus bullets z2 bluetooth wireless in ear earphones with mic, bombastic bass - 12.4 mm drivers, 10 mins charge - 20 hrs..."
3,"samsung galaxy m33 5g (mystique green, 6gb, 128gb storage) | 6000mah battery | upto 12gb ram with ram plus | travel adapte..."
4,"oneplus nord ce 2 lite 5g (black dusk, 6gb ram, 128gb storage)"
...,...
9595,"palay® bts backpack for boys kpop bts bangtan school backback for student with cable vent, backpack travel bag backpack la..."
9596,"moca ipad air 5th generation case 2022 / ipad air 4th 2020 case 10.9 inch, slim stand hard back shell protective smart cov..."
9597,"zebronics zeb-sound bomb 5 tws v5.0 bluetooth truly wireless in ear earbuds with up to 22h backup, flash connect, splash p..."
9598,linqs® original nxp chip | waterproof nfc tag sticker (set of 5) | smartrac circus ntag213 chip | for all nfc phones | vin...


In [16]:
products.columns

Index(['name', 'main_category', 'sub_category', 'image', 'link', 'ratings',
       'no_of_ratings', 'discount_price', 'actual_price', '#keywords'],
      dtype='object')

In [17]:
print(products['main_category'].value_counts())
print(products['sub_category'].value_counts())

main_category
tv, audio & cameras    9600
Name: count, dtype: int64
sub_category
All Electronics    9600
Name: count, dtype: int64


In [18]:
products.drop(['main_category', 'sub_category'], axis=1, inplace=True)

In [19]:
products

Unnamed: 0,name,image,link,ratings,no_of_ratings,discount_price,actual_price,#keywords
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB Storage)",https://m.media-amazon.com/images/I/81eM15lVcJL._AC_UL320_.jpg,https://www.amazon.in/Redmi-Power-Black-128GB-Storage/dp/B09Y64H8VS/ref=sr_1_4?qid=1679133649&s=electronics&sr=1-4,4.0,965,"₹10,999","₹18,999","redmi 10 power (power black, 8gb ram, 128gb storage)"
1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM, 128GB Storage)",https://m.media-amazon.com/images/I/71AvQd3VzqL._AC_UL320_.jpg,https://www.amazon.in/OnePlus-Nord-Lite-128GB-Storage/dp/B09WQYFLRX/ref=sr_1_5?qid=1679133649&s=electronics&sr=1-5,4.3,113956,"₹18,999","₹19,999","oneplus nord ce 2 lite 5g (blue tide, 6gb ram, 128gb storage)"
2,"OnePlus Bullets Z2 Bluetooth Wireless in Ear Earphones with Mic, Bombastic Bass - 12.4 Mm Drivers, 10 Mins Charge - 20 Hrs...",https://m.media-amazon.com/images/I/51UhwaQXCpL._AC_UL320_.jpg,https://www.amazon.in/Oneplus-Bluetooth-Wireless-Earphones-Bombastic/dp/B09TVVGXWS/ref=sr_1_6?qid=1679133649&s=electronics&sr=1-6,4.2,90304,"₹1,999","₹2,299","oneplus bullets z2 bluetooth wireless in ear earphones with mic, bombastic bass - 12.4 mm drivers, 10 mins charge - 20 hrs..."
3,"Samsung Galaxy M33 5G (Mystique Green, 6GB, 128GB Storage) | 6000mAh Battery | Upto 12GB RAM with RAM Plus | Travel Adapte...",https://m.media-amazon.com/images/I/81I3w4J6yjL._AC_UL320_.jpg,https://www.amazon.in/Samsung-Mystique-Storage-Purchased-Separately/dp/B09TWGDY4W/ref=sr_1_7?qid=1679133649&s=electronics&sr=1-7,4.1,24863,"₹15,999","₹24,999","samsung galaxy m33 5g (mystique green, 6gb, 128gb storage) | 6000mah battery | upto 12gb ram with ram plus | travel adapte..."
4,"OnePlus Nord CE 2 Lite 5G (Black Dusk, 6GB RAM, 128GB Storage)",https://m.media-amazon.com/images/I/71V--WZVUIL._AC_UL320_.jpg,https://www.amazon.in/OnePlus-Nord-Black-128GB-Storage/dp/B09WQY65HN/ref=sr_1_8?qid=1679133649&s=electronics&sr=1-8,4.3,113956,"₹18,999","₹19,999","oneplus nord ce 2 lite 5g (black dusk, 6gb ram, 128gb storage)"
...,...,...,...,...,...,...,...,...
9595,"PALAY® BTS Backpack For Boys Kpop BTS Bangtan School Backback For Student with Cable Vent, Backpack Travel Bag Backpack La...",https://m.media-amazon.com/images/W/IMAGERENDERING_521856-T2/images/I/51WpV43F1ML._AC_UL320_.jpg,https://www.amazon.in/PALAY%C2%AE-Backpack-Bangtan-Backback-Student/dp/B09XMFBMCR/ref=sr_1_9598?qid=1679134236&s=electronics&sr=1-9598,4.1,37,"₹1,710","₹2,491","palay® bts backpack for boys kpop bts bangtan school backback for student with cable vent, backpack travel bag backpack la..."
9596,"MOCA iPad Air 5th Generation Case 2022 / iPad Air 4th 2020 Case 10.9 Inch, Slim Stand Hard Back Shell Protective Smart Cov...",https://m.media-amazon.com/images/W/IMAGERENDERING_521856-T2/images/I/31lmoZ3XWQL._AC_UL320_.jpg,https://www.amazon.in/MOCA-Protective-Generation-Support-Charging/dp/B08SHY6FQV/ref=sr_1_9599?qid=1679134236&s=electronics&sr=1-9599,4.2,1133,₹475,"₹1,999","moca ipad air 5th generation case 2022 / ipad air 4th 2020 case 10.9 inch, slim stand hard back shell protective smart cov..."
9597,"ZEBRONICS Zeb-Sound Bomb 5 TWS V5.0 Bluetooth Truly Wireless in Ear Earbuds with Up to 22H Backup, Flash Connect, Splash P...",https://m.media-amazon.com/images/W/IMAGERENDERING_521856-T2/images/I/61O7fCXFipL._AC_UL320_.jpg,https://www.amazon.in/Zebronics-Zeb-Sound-Bluetooth-Wireless-Earbuds/dp/B09NNNLBVD/ref=sr_1_9600?qid=1679134236&s=electronics&sr=1-9600,3.5,3177,₹962,"₹3,999","zebronics zeb-sound bomb 5 tws v5.0 bluetooth truly wireless in ear earbuds with up to 22h backup, flash connect, splash p..."
9598,LINQS® Original NXP Chip | Waterproof NFC Tag Sticker (Set of 5) | Smartrac Circus NTAG213 chip | for All NFC Phones | Vin...,https://m.media-amazon.com/images/W/IMAGERENDERING_521856-T2/images/I/71Sg1Qsf+4L._AC_UL320_.jpg,https://www.amazon.in/LINQS-Sticker-Smartrac-Circus-NTAG213/dp/B011CD6OVM/ref=sr_1_9601?qid=1679134236&s=electronics&sr=1-9601,3.6,210,₹400,"₹1,000",linqs® original nxp chip | waterproof nfc tag sticker (set of 5) | smartrac circus ntag213 chip | for all nfc phones | vin...


In [20]:
products['#keywords'] = products['#keywords'].str.replace('''[^\w\d\s]''','', regex = True)

  products['#keywords'] = products['#keywords'].str.replace('''[^\w\d\s]''','', regex = True)


In [21]:
products['#keywords']

Unnamed: 0,#keywords
0,redmi 10 power power black 8gb ram 128gb storage
1,oneplus nord ce 2 lite 5g blue tide 6gb ram 128gb storage
2,oneplus bullets z2 bluetooth wireless in ear earphones with mic bombastic bass 124 mm drivers 10 mins charge 20 hrs
3,samsung galaxy m33 5g mystique green 6gb 128gb storage 6000mah battery upto 12gb ram with ram plus travel adapte
4,oneplus nord ce 2 lite 5g black dusk 6gb ram 128gb storage
...,...
9595,palay bts backpack for boys kpop bts bangtan school backback for student with cable vent backpack travel bag backpack la
9596,moca ipad air 5th generation case 2022 ipad air 4th 2020 case 109 inch slim stand hard back shell protective smart cov
9597,zebronics zebsound bomb 5 tws v50 bluetooth truly wireless in ear earbuds with up to 22h backup flash connect splash p
9598,linqs original nxp chip waterproof nfc tag sticker set of 5 smartrac circus ntag213 chip for all nfc phones vin


In [22]:
stemmer = PorterStemmer()

In [23]:
def stemming(text):
    words = []
    for word in text.split(' '):
        words.append(stemmer.stem(word))
    return ' '.join(words)

In [24]:
text = "Dancer dancing in the dance floor. while dancing he fell on the floor and dancer's dance in stopped"
stemming(text)

"dancer danc in the danc floor. while danc he fell on the floor and dancer' danc in stop"

In [25]:
products['#keywords'] = products['#keywords'].apply(stemming)

In [26]:
products['#keywords']

Unnamed: 0,#keywords
0,redmi 10 power power black 8gb ram 128gb storag
1,oneplu nord ce 2 lite 5g blue tide 6gb ram 128gb storag
2,oneplu bullet z2 bluetooth wireless in ear earphon with mic bombast bass 124 mm driver 10 min charg 20 hr
3,samsung galaxi m33 5g mystiqu green 6gb 128gb storag 6000mah batteri upto 12gb ram with ram plu travel adapt
4,oneplu nord ce 2 lite 5g black dusk 6gb ram 128gb storag
...,...
9595,palay bt backpack for boy kpop bt bangtan school backback for student with cabl vent backpack travel bag backpack la
9596,moca ipad air 5th gener case 2022 ipad air 4th 2020 case 109 inch slim stand hard back shell protect smart cov
9597,zebron zebsound bomb 5 tw v50 bluetooth truli wireless in ear earbud with up to 22h backup flash connect splash p
9598,linq origin nxp chip waterproof nfc tag sticker set of 5 smartrac circu ntag213 chip for all nfc phone vin


In [27]:
cv = CountVectorizer(max_features = 5000, stop_words = 'english', dtype = np.uint8)

In [28]:
cv.fit(products['#keywords'])

In [29]:
vector = cv.transform(products['#keywords']).toarray()

In [30]:
vector.shape

(9600, 5000)

In [31]:
similarity = cosine_similarity(vector)

In [32]:
del(vector)
products.drop(['#keywords'], axis=1, inplace=True)

In [33]:
similarity.shape

(9600, 9600)

In [34]:
similarity[0]

array([1.        , 0.28603878, 0.07106691, ..., 0.        , 0.        ,
       0.0836242 ])

In [36]:
products['name'].sample(10, random_state=5)

Unnamed: 0,name
8414,"ONUMTZ Newly Launched Smart Watch Charging Cable, Watch Holder Magnetic 24k Gold Plated 2 pin Cable, Compatible with T55, ..."
2716,"boAt Stone 620 Bluetooth Speaker with 12W RMS Stereo Sound, 10HRS Playtime, TWS Feature, IPX4, Multi-Compatibility Modes(G..."
1360,"VIHM 7 in 1 Electronic Cleaner kit, Cleaning Kit for Monitor Keyboard Airpods MacBook iPad iPhone iPod, Screen Dust Brush ..."
1214,"Apple Lightning to USB Camera Adapter, USB 3.0 OTG Cable for iPhone/iPad to Connect Card Reader, USB Flash Drive, U Disk, ..."
420,"boAt Rockerz 335 Bluetooth in Ear Neckband with Qualcomm aptX & CVC, Upto 30 Hours Playback, ASAP Charge, Signature Sound,..."
9269,ENVOUS Newly Launched Fire Bolt Ring Smartwatch Charging Cable USB Fast Charger Magnetic Charging Cable Adapter (Charger o...
1994,"DIGITEK (DTR-320 FT) Flexible Gorillapod Tripod for All Cameras and Mobile Phones with 360° Ball Head and Phone Holder, Ma..."
5828,"Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile"
7166,Kangaro Stapler DS 45L - Colour May Vary
7544,"Sellingal Hard Disk Drive Pouch case for 2.5"" HDD Cover WD Seagate Slim Sony Dell Toshiba (Black)"


In [37]:
product = "Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile"
products[products['name'] == product]

Unnamed: 0,name,image,link,ratings,no_of_ratings,discount_price,actual_price
5828,"Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile",https://m.media-amazon.com/images/I/71llUMCdfZL._AC_UL320_.jpg,https://www.amazon.in/Lava-Notfication-recoding-Military-Certified/dp/B09F3J498L/ref=sr_1_5834?qid=1679134016&s=electronics&sr=1-5834,3.8,3384,"₹1,087","₹1,299"


In [38]:
def recommender(product):
    product_index = products[products['name'] == product].index[0]
    similarity_list = list(enumerate(similarity[product_index]))
    top_10_similar_product = sorted(similarity_list, key=lambda x : x[1], reverse = True)[1:11]
    for idx, similary in top_10_similar_product:
        print(idx)
        print(products.loc[idx]['name'])

In [39]:
recommender("Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile")

3343
Lava A1 2021(Blue Silver), Bluetooth Support, Smart AI Battery, Military Grade Certified,4 Days Battery Backup, Keypad Mobile
9194
Lava A1 2021(Black Gold), Bluetooth Support, Smart AI Battery, Military Grade Certified,4 Days Battery Backup, Keypad Mobile
7687
Lava A1 Josh 21(Blue Silver) -Dual Sim,Call Blink Notification,Military Grade Certified with 4 Day Battery Backup, Keypad ...
5275
Lava A5 (Gold), Military Grade Certified with 3 Days Battery Backup, Sound Leakage Resistance, Super Battery Mode, Keypad ...
5305
Lava Hero Punch(Charcoal Grey),Stylish Design, 3 Day Battery Backup, FM with Recording, Keypad Mobile, Basic Mobile
2504
Lava A3 Power (Aqua Blue),Military Grade Certified, Jumbo Battery with 10 Days Backup, Dedicated Music Buttons, Keypad Mob...
1950
Lava Gem (Blue Gold), Speaker with Amplifier, PMMA 2.5D Glass, Military Grade Certified,1.3 MP Camera, Keypad Mobile
1498
Lava Flip, Blue - Dual Sim Keypad GSM Mobile with Unique Design, Notification LED and Number Talke

In [40]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [42]:
pickle.dump(products, open('data.pkl', 'wb'))