# BigBasket Product  Recommendation System

# Importing Libraries

In [14]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Loading Dataset

In [15]:
products_data = pd.read_csv('/content/BigBasket Products.csv')

In [16]:
#To display top 5 row in dataset
products_data.head()

Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
0,1,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...
1,2,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ..."
2,3,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m..."
3,4,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...
4,5,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...


In [17]:
#To display Bottom 5 rows
products_data.tail()

Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
27550,27551,"Wottagirl! Perfume Spray - Heaven, Classic",Beauty & Hygiene,Fragrances & Deos,Layerr,199.2,249.0,Perfume,3.9,Layerr brings you Wottagirl Classic fragrant b...
27551,27552,Rosemary,Gourmet & World Food,Cooking & Baking Needs,Puramate,67.5,75.0,"Herbs, Seasonings & Rubs",4.0,Puramate rosemary is enough to transform a dis...
27552,27553,Peri-Peri Sweet Potato Chips,Gourmet & World Food,"Snacks, Dry Fruits, Nuts",FabBox,200.0,200.0,Nachos & Chips,3.8,We have taken the richness of Sweet Potatoes (...
27553,27554,Green Tea - Pure Original,Beverages,Tea,Tetley,396.0,495.0,Tea Bags,4.2,"Tetley Green Tea with its refreshing pure, ori..."
27554,27555,United Dreams Go Far Deodorant,Beauty & Hygiene,Men's Grooming,United Colors Of Benetton,214.53,390.0,Men's Deodorants,4.5,The new mens fragrance from the United Dreams ...


In [18]:
# Checking size(rows,columns) of dataset
products_data.shape

(27555, 10)

In [19]:
#checking for null values
products_data.isnull().sum()

index              0
product            1
category           0
sub_category       0
brand              1
sale_price         0
market_price       0
type               0
rating          8626
description      115
dtype: int64

In [20]:
# Selecting the relevant features for recommendation
selected_features = ['product', 'category', 'sub_category', 'brand', 'type', 'rating', 'description']
print(selected_features)

['product', 'category', 'sub_category', 'brand', 'type', 'rating', 'description']


In [21]:
# Replacing the null values with an empty string
for feature in selected_features:
    products_data[feature] = products_data[feature].fillna('').astype(str)

In [22]:
products_data.isnull().sum()

index           0
product         0
category        0
sub_category    0
brand           0
sale_price      0
market_price    0
type            0
rating          0
description     0
dtype: int64

In [23]:
# Combining all the selected features
combined_features = products_data['product'] + ' ' + products_data['category'] + ' ' + products_data['sub_category'] + ' ' + products_data['brand'] + ' ' + products_data['type'] + ' ' + products_data['rating']

In [24]:
combined_features

0        Garlic Oil - Vegetarian Capsule 500 mg Beauty ...
1        Water Bottle - Orange Kitchen, Garden & Pets S...
2        Brass Angle Deep - Plain, No.2 Cleaning & Hous...
3        Cereal Flip Lid Container/Storage Jar - Assort...
4        Creme Soft Soap - For Hands & Body Beauty & Hy...
                               ...                        
27550    Wottagirl! Perfume Spray - Heaven, Classic Bea...
27551    Rosemary Gourmet & World Food Cooking & Baking...
27552    Peri-Peri Sweet Potato Chips Gourmet & World F...
27553    Green Tea - Pure Original Beverages Tea Tetley...
27554    United Dreams Go Far Deodorant Beauty & Hygien...
Length: 27555, dtype: object

In [25]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [26]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [27]:
print(feature_vectors)

  (0, 9784)	0.21866249918965044
  (0, 1462)	0.19128795170024968
  (0, 10371)	0.5242538256451449
  (0, 2549)	0.11077996184135593
  (0, 5181)	0.32847595842329275
  (0, 5524)	0.08935823855886982
  (0, 1837)	0.09040483913357748
  (0, 7132)	0.3054800297754058
  (0, 612)	0.2640110993380006
  (0, 2523)	0.3391186592652382
  (0, 11495)	0.3343925207244564
  (0, 7925)	0.23736907692488057
  (0, 4791)	0.24133620016562649
  (1, 2207)	0.3024386072607223
  (1, 4668)	0.30752676808767787
  (1, 6956)	0.3990815006939438
  (1, 894)	0.20221319543639463
  (1, 10481)	0.2252332146740695
  (1, 8433)	0.16979626649846952
  (1, 4787)	0.1677297402146897
  (1, 6165)	0.16788022107545597
  (1, 7994)	0.3069783831868423
  (1, 2206)	0.28947666847623155
  (1, 11693)	0.5476236648619582
  (2, 6377)	0.6063288446092481
  :	:
  (27552, 10131)	0.12762067433753715
  (27552, 3899)	0.17044114422610937
  (27552, 4579)	0.11330925170024515
  (27552, 11847)	0.11753234208669373
  (27552, 5025)	0.1172176437922079
  (27553, 10887)	0.4537

# Cosine Similarity

In [28]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [29]:
print(similarity)

[[1.         0.         0.02795849 ... 0.         0.         0.01454585]
 [0.         1.         0.         ... 0.         0.         0.        ]
 [0.02795849 0.         1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.         0.         0.         ... 0.         1.         0.        ]
 [0.01454585 0.         0.         ... 0.         0.         1.        ]]


In [30]:
print(similarity.shape)

(27555, 27555)


Getting the movie name from the user

In [31]:
# Getting the product name from the user
product_name = input('Enter your favorite product name: ')

Enter your favorite product name: Green Tea - Pure Original


In [32]:
# Creating a list with all the product names given in the dataset
list_of_all_products = products_data['product'].tolist()

In [33]:
print(list_of_all_products)

['Garlic Oil - Vegetarian Capsule 500 mg', 'Water Bottle - Orange', 'Brass Angle Deep - Plain, No.2', 'Cereal Flip Lid Container/Storage Jar - Assorted Colour', 'Creme Soft Soap - For Hands & Body', 'Germ - Removal Multipurpose Wipes', 'Multani Mati', 'Hand Sanitizer - 70% Alcohol Base', 'Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner', 'Scrub Pad - Anti- Bacterial, Regular', 'Wheat Grass Powder - Raw', 'Butter Cookies Gold Collection', 'Face Wash - Oil Control, Active', 'Mold & Mildew Remover with Bleach', 'Just Spray - Mosquito Repellent Room Spray', 'Dove Plastic Soap Case - Assorted Colour', 'Smooth Skin Oil - For Dry Skin', 'Salted Pumpkin', 'Flax Seeds - Roasted', 'Organic Tofu - Soy Paneer', 'Ceramic Barrel Brush - Colour May Vary', 'Instant Noodles - Chicken Satay Flavor', 'Chia Seeds', 'Cleanse Green Tea - Whole Leaf Loose Tea', 'Veggie Cutter', 'Insulated Hot Fresh Casserole For Roti/Chapati - White', 'Granola - Happy Berries', 'Flaxseed - Pest

In [34]:
# Finding the close match for the product name given by the user
find_close_match = difflib.get_close_matches(product_name, list_of_all_products)
close_match = find_close_match[0]

In [35]:
close_match = find_close_match[0]
print(close_match)

Green Tea - Pure Original


In [36]:
if find_close_match:
    close_match = find_close_match[0]
    # Finding the index of the product with the title
    index_of_the_product = products_data[products_data['product'] == close_match].index[0]
    print("Index of the product:", index_of_the_product)






Index of the product: 27553


In [37]:
# Getting a list of similar products
similarity_score = list(enumerate(similarity[index_of_the_product]))
print(similarity_score)

[(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), (4, 0.0), (5, 0.0), (6, 0.0), (7, 0.0), (8, 0.0), (9, 0.0), (10, 0.0), (11, 0.0), (12, 0.0), (13, 0.0), (14, 0.0), (15, 0.0), (16, 0.0), (17, 0.0), (18, 0.0), (19, 0.0), (20, 0.0), (21, 0.0), (22, 0.0), (23, 0.5876045742714227), (24, 0.0), (25, 0.0), (26, 0.0), (27, 0.0), (28, 0.0), (29, 0.0), (30, 0.03797056306160617), (31, 0.0), (32, 0.0), (33, 0.5391159324237097), (34, 0.0), (35, 0.0), (36, 0.0), (37, 0.0), (38, 0.0), (39, 0.0), (40, 0.04526169094100968), (41, 0.15549211778162284), (42, 0.0), (43, 0.0), (44, 0.05208735348553958), (45, 0.0), (46, 0.0), (47, 0.0), (48, 0.0), (49, 0.0), (50, 0.0), (51, 0.042275440143355894), (52, 0.0), (53, 0.0), (54, 0.05014083207039829), (55, 0.0), (56, 0.0), (57, 0.0), (58, 0.0), (59, 0.0), (60, 0.0), (61, 0.0), (62, 0.0), (63, 0.0), (64, 0.0), (65, 0.0), (66, 0.0), (67, 0.0), (68, 0.0), (69, 0.0), (70, 0.0), (71, 0.08174934673077705), (72, 0.0), (73, 0.0), (74, 0.04529484896004843), (75, 0.0), (76, 0.0), (77

In [38]:
len(similarity_score)

27555

In [39]:
# Sorting the products based on their similarity score
sorted_similar_products = sorted(similarity_score, key=lambda x: x[1], reverse=True)
print(sorted_similar_products)

[(27553, 1.0000000000000002), (4148, 0.8650111270734231), (385, 0.8449857362632726), (16861, 0.832880917224662), (21056, 0.7971570803527347), (24004, 0.7961975737102077), (1200, 0.7717905151640142), (13592, 0.7475462933072243), (26523, 0.7442889707140941), (20783, 0.7407313408484734), (24590, 0.7175318286218338), (13513, 0.7154834608778411), (16883, 0.6930642986502435), (6673, 0.6790729368922974), (18140, 0.6758883202019992), (17908, 0.6711346930000045), (813, 0.6699279535847242), (20678, 0.6699279535847242), (21385, 0.654141875321486), (17379, 0.6541153479144236), (17545, 0.6503603189555975), (12835, 0.650155533367563), (15359, 0.649058156507718), (16484, 0.649058156507718), (15749, 0.6476450289242972), (25705, 0.6476450289242972), (16849, 0.6451693851320436), (26942, 0.643646036405215), (20308, 0.6385138344745036), (11345, 0.6383287671208664), (17259, 0.6383287671208664), (26930, 0.63754784588054), (10441, 0.6371207681294434), (15754, 0.6371207681294434), (2737, 0.6351898875969327), 

In [40]:
# Print the names of similar products based on the index
print('Products suggested for you:\n')
i = 1
for product in sorted_similar_products:
  index = product[0]
  product_from_index = products_data.iloc[index]['product']
  if i < 30:
    print(i, '.', product_from_index)
    i += 1


Products suggested for you:

1 . Green Tea - Pure Original
2 . Masala Tea
3 . Black Tea
4 . Green Tea - Lemon & Honey
5 . Green Tea - Regular
6 . Elaichi Tea
7 . Green Tea - Long Leaf
8 . Earl Grey Tea
9 . Long Leaf Green Tea - Lemon
10 . Tea - Ginger Zing
11 . Tea Bags - Pure Green Tea
12 . Green Tea
13 . Green Tea  - Pure & Light
14 . Green Tea & Lemon
15 . Green Tea - Pure
16 . Pure Green Tea Bags
17 . 100% Green Tea
18 . 100% Green Tea
19 . Green Tea Immune With Added Vitamin C - Natural Mango
20 . Green Tea - Pure, Leaf
21 . Green Tea Immune With Added Vitamin C, Lemon & Honey
22 . Pure Green Tea, Imported
23 . Green Tea With Tulsi
24 . Green Tea With Tulsi
25 . Green Tea - Pure & Light
26 . Green Tea - Pure & Light
27 . Pure & Light Green Tea Bags
28 . Real Tulsi Leaves With Green Tea - Bags
29 . Green Tea With Lemon & Honey


# **Product Recommendation Sytem**

In [41]:
product_name = input('Enter your favorite product name: ')

list_of_all_products = products_data['product'].tolist()

find_close_match = difflib.get_close_matches(product_name, list_of_all_products)
close_match = find_close_match[0]

if find_close_match:
    close_match = find_close_match[0]
    # Finding the index of the product with the title
    index_of_the_product = products_data[products_data['product'] == close_match].index[0]

similarity_score = list(enumerate(similarity[index_of_the_product]))

sorted_similar_products = sorted(similarity_score, key=lambda x: x[1], reverse=True)

print('Products suggested for you:\n')
i = 1
for product in sorted_similar_products:
  index = product[0]
  product_from_index = products_data.iloc[index]['product']
  if i < 30:
    print(i, '.', product_from_index)
    i += 1

Enter your favorite product name: Peri-Peri Sweet Potato Chips
Products suggested for you:

1 . Peri-Peri Sweet Potato Chips
2 . Peri-Peri Beetroot Chips
3 . Sweet Potato Chips - Peri-Peri
4 . Nacho Chips - Peri-Peri
5 . Nacho Chips - Peri Peri
6 . Salsa Sweet Potato Chips
7 . Beetroot Chips - Peri-Peri
8 . Nacho Crisps - Peri Peri
9 . Nacho Chips Party Pack- Jalapeno + Cheese + Peri-Peri + Sweet Chilli (60 g)
10 . Nacho Chips - Peri Peri
11 . Nachos Peri Peri
12 . On The Go - Peri Peri Nachos & Salsa Dip
13 . Chia Seeds Chips
14 . Peri Peri Chickpeas
15 . Peri Peri Pumpkin Seeds
16 . Mexican Quinoa Chips
17 . High Protein Soya Chips
18 . Salsa Sweet Potato Chips
19 . Potato Chips - Pizza
20 . Barbeque Beetroot Chips
21 . Chips - Peri Peri
22 . Chips - Peri Peri
23 . Potato Chips - Honey & Mustard
24 . Potato Chips - Cheddar Cheese
25 . Nacho Chips - Sweet Chilli
26 . Potato Chips - Ranch
27 . Potato Chips - Sweet Chilli with Lime & Basil
28 . Protein Packed Nachos - Peri Peri Masala
2