In [78]:
import numpy as np
import pandas as pd
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [79]:
electronics = pd.read_csv('/content/All Electronics.csv')

In [80]:
pd.set_option('display.max_columns', None)

In [81]:
electronics.head(2)

Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB St...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJ...,https://www.amazon.in/Redmi-Power-Black-128GB-...,4.0,965,"₹10,999","₹18,999"
1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM,...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71AvQd3Vzq...,https://www.amazon.in/OnePlus-Nord-Lite-128GB-...,4.3,113956,"₹18,999","₹19,999"


In [82]:
electronics.shape

(9600, 9)

In [83]:
electronics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9600 entries, 0 to 9599
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   name            9600 non-null   object
 1   main_category   9600 non-null   object
 2   sub_category    9600 non-null   object
 3   image           9600 non-null   object
 4   link            9600 non-null   object
 5   ratings         9505 non-null   object
 6   no_of_ratings   9505 non-null   object
 7   discount_price  9116 non-null   object
 8   actual_price    9530 non-null   object
dtypes: object(9)
memory usage: 675.1+ KB


In [84]:
electronics.duplicated().sum()

0

In [85]:
electronics.isnull().sum()

Unnamed: 0,0
name,0
main_category,0
sub_category,0
image,0
link,0
ratings,95
no_of_ratings,95
discount_price,484
actual_price,70


In [86]:
electronics['#keywords'] = electronics['name'].str.lower()

In [87]:
electronics.head(2)

Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price,#keywords
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB St...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJ...,https://www.amazon.in/Redmi-Power-Black-128GB-...,4.0,965,"₹10,999","₹18,999","redmi 10 power (power black, 8gb ram, 128gb st..."
1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM,...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71AvQd3Vzq...,https://www.amazon.in/OnePlus-Nord-Lite-128GB-...,4.3,113956,"₹18,999","₹19,999","oneplus nord ce 2 lite 5g (blue tide, 6gb ram,..."


In [88]:
electronics['#keywords']

Unnamed: 0,#keywords
0,"redmi 10 power (power black, 8gb ram, 128gb st..."
1,"oneplus nord ce 2 lite 5g (blue tide, 6gb ram,..."
2,oneplus bullets z2 bluetooth wireless in ear e...
3,"samsung galaxy m33 5g (mystique green, 6gb, 12..."
4,"oneplus nord ce 2 lite 5g (black dusk, 6gb ram..."
...,...
9595,palay® bts backpack for boys kpop bts bangtan ...
9596,moca ipad air 5th generation case 2022 / ipad ...
9597,zebronics zeb-sound bomb 5 tws v5.0 bluetooth ...
9598,linqs® original nxp chip | waterproof nfc tag ...


In [89]:
electronics.columns

Index(['name', 'main_category', 'sub_category', 'image', 'link', 'ratings',
       'no_of_ratings', 'discount_price', 'actual_price', '#keywords'],
      dtype='object')

In [90]:
print(electronics['main_category'].value_counts())
print(electronics['sub_category'].value_counts())

main_category
tv, audio & cameras    9600
Name: count, dtype: int64
sub_category
All Electronics    9600
Name: count, dtype: int64


In [91]:
electronics.drop(['main_category','sub_category'],axis=1,inplace=True)

In [92]:
electronics.head(2)

Unnamed: 0,name,image,link,ratings,no_of_ratings,discount_price,actual_price,#keywords
0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB St...",https://m.media-amazon.com/images/I/81eM15lVcJ...,https://www.amazon.in/Redmi-Power-Black-128GB-...,4.0,965,"₹10,999","₹18,999","redmi 10 power (power black, 8gb ram, 128gb st..."
1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM,...",https://m.media-amazon.com/images/I/71AvQd3Vzq...,https://www.amazon.in/OnePlus-Nord-Lite-128GB-...,4.3,113956,"₹18,999","₹19,999","oneplus nord ce 2 lite 5g (blue tide, 6gb ram,..."


In [93]:
electronics['#keywords'] = electronics['#keywords'].str.replace('''[^\w\d\s]''','',regex=True)

In [94]:
electronics['#keywords']

Unnamed: 0,#keywords
0,redmi 10 power power black 8gb ram 128gb storage
1,oneplus nord ce 2 lite 5g blue tide 6gb ram 12...
2,oneplus bullets z2 bluetooth wireless in ear e...
3,samsung galaxy m33 5g mystique green 6gb 128gb...
4,oneplus nord ce 2 lite 5g black dusk 6gb ram 1...
...,...
9595,palay bts backpack for boys kpop bts bangtan s...
9596,moca ipad air 5th generation case 2022 ipad a...
9597,zebronics zebsound bomb 5 tws v50 bluetooth tr...
9598,linqs original nxp chip waterproof nfc tag st...


In [95]:
stemmer = PorterStemmer()

In [96]:
def stemming(text):
  words = []
  for i in text.split():
    words.append(stemmer.stem(i))
  return ' '.join(words)

In [97]:
text = "hello worLd"
stemming(text)

'hello world'

In [98]:
electronics['#keywords'] = electronics['#keywords'].apply(stemming)

In [99]:
electronics['#keywords']

Unnamed: 0,#keywords
0,redmi 10 power power black 8gb ram 128gb storag
1,oneplu nord ce 2 lite 5g blue tide 6gb ram 128...
2,oneplu bullet z2 bluetooth wireless in ear ear...
3,samsung galaxi m33 5g mystiqu green 6gb 128gb ...
4,oneplu nord ce 2 lite 5g black dusk 6gb ram 12...
...,...
9595,palay bt backpack for boy kpop bt bangtan scho...
9596,moca ipad air 5th gener case 2022 ipad air 4th...
9597,zebron zebsound bomb 5 tw v50 bluetooth truli ...
9598,linq origin nxp chip waterproof nfc tag sticke...


In [100]:
cv = CountVectorizer(max_features=5000,stop_words='english', dtype=np.uint8)

In [101]:
cv.fit(electronics['#keywords'])

In [102]:
vector = cv.transform(electronics['#keywords']).toarray()

In [103]:
vector.shape

(9600, 5000)

In [104]:
similarity = cosine_similarity(vector)

In [105]:
del(vector)
electronics.drop(['#keywords'],axis=1,inplace=True)

In [106]:
similarity.shape

(9600, 9600)

In [107]:
similarity[0]

array([1.        , 0.28603878, 0.07106691, ..., 0.        , 0.        ,
       0.0836242 ])

In [110]:
electronics['name'].sample(10,random_state=5)

Unnamed: 0,name
8414,ONUMTZ Newly Launched Smart Watch Charging Cab...
2716,boAt Stone 620 Bluetooth Speaker with 12W RMS ...
1360,"VIHM 7 in 1 Electronic Cleaner kit, Cleaning K..."
1214,"Apple Lightning to USB Camera Adapter, USB 3.0..."
420,boAt Rockerz 335 Bluetooth in Ear Neckband wit...
9269,ENVOUS Newly Launched Fire Bolt Ring Smartwatc...
1994,DIGITEK (DTR-320 FT) Flexible Gorillapod Tripo...
5828,"Lava A1 (Candy Blue), Number Talker, Smart AI ..."
7166,Kangaro Stapler DS 45L - Colour May Vary
7544,"Sellingal Hard Disk Drive Pouch case for 2.5"" ..."


In [112]:
product = "Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile"
electronics[electronics['name'] == product]

Unnamed: 0,name,image,link,ratings,no_of_ratings,discount_price,actual_price
5828,"Lava A1 (Candy Blue), Number Talker, Smart AI ...",https://m.media-amazon.com/images/I/71llUMCdfZ...,https://www.amazon.in/Lava-Notfication-recodin...,3.8,3384,"₹1,087","₹1,299"


In [114]:
def recommender(product):
    product_index = electronics[electronics['name'] == product].index[0]
    similarity_list = list(enumerate(similarity[product_index]))
    top_10_similar_product = sorted(similarity_list, key=lambda x : x[1], reverse = True)[1:11]
    for idx, similary in top_10_similar_product:
        print(idx)
        print(electronics.loc[idx]['name'])

In [115]:
recommender("Lava A1 (Candy Blue), Number Talker, Smart AI Battery, 4 Days Battery Backup, Military Grade Certified, Keypad Mobile")

3343
Lava A1 2021(Blue Silver), Bluetooth Support, Smart AI Battery, Military Grade Certified,4 Days Battery Backup, Keypad Mobile
9194
Lava A1 2021(Black Gold), Bluetooth Support, Smart AI Battery, Military Grade Certified,4 Days Battery Backup, Keypad Mobile
7687
Lava A1 Josh 21(Blue Silver) -Dual Sim,Call Blink Notification,Military Grade Certified with 4 Day Battery Backup, Keypad ...
5275
Lava A5 (Gold), Military Grade Certified with 3 Days Battery Backup, Sound Leakage Resistance, Super Battery Mode, Keypad ...
5305
Lava Hero Punch(Charcoal Grey),Stylish Design, 3 Day Battery Backup, FM with Recording, Keypad Mobile, Basic Mobile
2504
Lava A3 Power (Aqua Blue),Military Grade Certified, Jumbo Battery with 10 Days Backup, Dedicated Music Buttons, Keypad Mob...
1498
Lava Flip, Blue - Dual Sim Keypad GSM Mobile with Unique Design, Notification LED and Number Talker
1950
Lava Gem (Blue Gold), Speaker with Amplifier, PMMA 2.5D Glass, Military Grade Certified,1.3 MP Camera, Keypad Mobil

In [116]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [117]:
pickle.dump(electronics, open('data.pkl', 'wb'))