# **PRODUCT RECOMMENDATION SYSTEM**

### **OBJECTIVE:** Building a Product Recommendation System to recommend similar products of the product purchased by the customer.


**MODEL USED:** KNearest Neighbors model 


**MAIN COLUMNS USED:** asin(ProductID),main_cat,brand

In [None]:
# Importing Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Importing Libraries
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import DataFrame 
import nltk

from sklearn.neighbors import NearestNeighbors
from sklearn import neighbors
#from scipy.spatial.distance import cosine
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
#from sklearn.feature_selection import SelectKBest
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder,StandardScaler
from scipy.sparse import csr_matrix

import re
import string
from wordcloud import WordCloud, STOPWORDS
from sklearn.metrics import mean_squared_error
from datetime import date

In [None]:
# Final dataset
final_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/A_final_data.csv')
final_df.head()

Unnamed: 0,reviewerID,product_id,reviewerName,verified,reviewText,rating,date,summary,brand,category,main_cat
0,A2YCGMW0ZKPYZ2,788602144,Bernard Perusse,True,"The answer, of course, is no. A lot of time ha...",4,2002-07-08,Nostalgia: overrated?,Patrick Adiarte,"CDs & Vinyl,Classic Rock,British Invasion",Movies & TV
1,A5N992ESD6UHW,967680654,Jonny Lee,True,"A bit outdated, from a special-effects/technol...",4,2016-06-21,Would be Nice to See This Effective Concept Re...,Daniel Kruse,"CDs & Vinyl,Christian,Pop & Contemporary",Movies & TV
2,A35L775VMC0BKL,967680654,Fm,True,Thank you,5,2015-11-18,Five Stars,Daniel Kruse,"CDs & Vinyl,Christian,Pop & Contemporary",Movies & TV
3,A1ZJJKOC38Q622,967680654,mitzi,True,very good,5,2015-01-21,Five Stars,Daniel Kruse,"CDs & Vinyl,Christian,Pop & Contemporary",Movies & TV
4,A2S78BQKIPAMT7,967680654,faithful,True,This is good for someone who is trying to make...,5,2013-02-02,Escape from Hell,Daniel Kruse,"CDs & Vinyl,Christian,Pop & Contemporary",Movies & TV


In [None]:
# Shape
final_df.shape

(1373582, 11)

In [None]:
print("Total data of verified reviews ")
print("-"*50)
print("\nTotal no of ratings :",final_df.shape[0])
print("Total No of Users   :", len(np.unique(final_df.reviewerID)))
print("Total No of products  :", len(np.unique(final_df.product_id)))

Total data of verified reviews 
--------------------------------------------------

Total no of ratings : 1373582
Total No of Users   : 181049
Total No of products  : 96631


In [None]:
# Getting the required columns
data = final_df[['product_id','brand','main_cat','rating','category']]

In [None]:
data.head()

Unnamed: 0,product_id,brand,main_cat,rating,category
0,788602144,Patrick Adiarte,Movies & TV,4,"CDs & Vinyl,Classic Rock,British Invasion"
1,967680654,Daniel Kruse,Movies & TV,4,"CDs & Vinyl,Christian,Pop & Contemporary"
2,967680654,Daniel Kruse,Movies & TV,5,"CDs & Vinyl,Christian,Pop & Contemporary"
3,967680654,Daniel Kruse,Movies & TV,5,"CDs & Vinyl,Christian,Pop & Contemporary"
4,967680654,Daniel Kruse,Movies & TV,5,"CDs & Vinyl,Christian,Pop & Contemporary"


In [None]:
# Checking for missing values
data.isnull().sum()

product_id    0
brand         0
main_cat      0
rating        0
category      0
dtype: int64

In [None]:
rating = pd.DataFrame(data.groupby(['product_id'])['rating'].mean()).reset_index()
rating.columns

Index(['product_id', 'rating'], dtype='object')

In [None]:
data = pd.merge(data[['product_id','brand','main_cat','category']],rating,on='product_id',how='inner')

In [None]:
data.shape

(1373582, 5)

In [None]:
# Dropping duplicates
data = data.drop_duplicates(keep='first')

In [None]:
# Shape of dataset 
data.shape

(96631, 5)

In [None]:
# Labelencoding categorical colunmns
data['main_cat'] =data['main_cat'].astype('category')
data['brand'] = data['brand'].astype('category')
data['category'] = data['category'].astype('category')

label_encoder = LabelEncoder()
data['main_cat_encode'] = label_encoder.fit_transform(data['main_cat'])
data['brand_encode'] = label_encoder.fit_transform(data['brand'])
data['category_encode'] = label_encoder.fit_transform(data['category'])

In [None]:
#Setting asin as index
data=data.set_index('product_id')
data.head()

Unnamed: 0_level_0,brand,main_cat,category,rating,main_cat_encode,brand_encode,category_encode
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
788602144,Patrick Adiarte,Movies & TV,"CDs & Vinyl,Classic Rock,British Invasion",4.666667,24,16825,86
967680654,Daniel Kruse,Movies & TV,"CDs & Vinyl,Christian,Pop & Contemporary",4.6,24,5418,78
972179526,Dr. Harvey Karp,Movies & TV,"CDs & Vinyl,Special Interest,Instructional",4.470588,24,6454,358
1566054109,Pat Benatar,Movies & TV,"CDs & Vinyl,Classic Rock,Album-Oriented Rock (...",5.0,24,16803,84
3937406875,Dusty Springfield,Books,"CDs & Vinyl,World Music,Europe",4.583333,9,6564,395


In [None]:
data_copy = data

In [None]:
#data = data[data.overall>3]

In [None]:
# Scaling the values
scaler = StandardScaler()
d_scaled = pd.DataFrame(scaler.fit_transform(data[['main_cat_encode','brand_encode','category_encode']]),columns=['main_cat_encode','brand_encode','category_encode'],index=data.index)

In [None]:
d_scaled.head()

Unnamed: 0_level_0,main_cat_encode,brand_encode,category_encode
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
788602144,1.230858,0.58662,-0.871596
967680654,1.230858,-1.030622,-0.895845
972179526,1.230858,-0.883741,-0.047134
1566054109,1.230858,0.583501,-0.877659
3937406875,-1.393009,-0.868146,0.065017


In [None]:
# model 
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')

model_knn.fit(d_scaled)

NearestNeighbors(algorithm='brute', metric='cosine')

In [None]:
# Distances and indices
distances, indices = model_knn.kneighbors(d_scaled, n_neighbors = 5)
print('distances: ', distances.shape, 'indices: ', indices.shape)

distances:  (96631, 5) indices:  (96631, 5)


In [None]:
indices

array([[    0,     3,    15,    39,   655],
       [    1,  1005,   250,   363,  1161],
       [    2,    10,   384,   856,   318],
       ...,
       [96628, 80266, 74505, 79677, 86460],
       [96629, 77305, 89099, 70690, 70696],
       [96630, 84693, 85118, 82503, 80126]])

In [None]:
a=pd.DataFrame([d_scaled.index,indices])

In [None]:
a=a.T
a.rename(columns={0:'product_id',1:'Neighbors'},inplace=True)

In [None]:
a.head(10)

Unnamed: 0,product_id,Neighbors
0,0788602144,"[0, 3, 15, 39, 655]"
1,0967680654,"[1, 1005, 250, 363, 1161]"
2,0972179526,"[2, 10, 384, 856, 318]"
3,1566054109,"[3, 0, 15, 39, 655]"
4,3937406875,"[4, 68147, 43639, 83001, 94116]"
5,5552622685,"[5, 24141, 24627, 31716, 16773]"
6,630025075X,"[6, 1087, 756, 759, 14]"
7,6301248252,"[7, 578, 825, 1135, 1079]"
8,6301236947,"[8, 108, 1094, 57, 112]"
9,6301334175,"[9, 29, 501, 900, 987]"


In [None]:
cat = data_copy[['brand','main_cat','rating','category']]
#cat = data1.groupby('main_cat')
cat.head()

b = pd.merge(a,cat,on='product_id',how='left')
b.head()

Unnamed: 0,product_id,Neighbors,brand,main_cat,rating,category
0,788602144,"[0, 3, 15, 39, 655]",Patrick Adiarte,Movies & TV,4.666667,"CDs & Vinyl,Classic Rock,British Invasion"
1,967680654,"[1, 1005, 250, 363, 1161]",Daniel Kruse,Movies & TV,4.6,"CDs & Vinyl,Christian,Pop & Contemporary"
2,972179526,"[2, 10, 384, 856, 318]",Dr. Harvey Karp,Movies & TV,4.470588,"CDs & Vinyl,Special Interest,Instructional"
3,1566054109,"[3, 0, 15, 39, 655]",Pat Benatar,Movies & TV,5.0,"CDs & Vinyl,Classic Rock,Album-Oriented Rock (..."
4,3937406875,"[4, 68147, 43639, 83001, 94116]",Dusty Springfield,Books,4.583333,"CDs & Vinyl,World Music,Europe"


In [None]:
b.shape

(96631, 6)

In [None]:
a.shape

(96631, 2)

In [None]:
b.shape

(96631, 6)

In [None]:
b.head(20)

Unnamed: 0,product_id,Neighbors,brand,main_cat,rating,category
0,0788602144,"[0, 3, 15, 39, 655]",Patrick Adiarte,Movies & TV,4.666667,"CDs & Vinyl,Classic Rock,British Invasion"
1,0967680654,"[1, 1005, 250, 363, 1161]",Daniel Kruse,Movies & TV,4.6,"CDs & Vinyl,Christian,Pop & Contemporary"
2,0972179526,"[2, 10, 384, 856, 318]",Dr. Harvey Karp,Movies & TV,4.470588,"CDs & Vinyl,Special Interest,Instructional"
3,1566054109,"[3, 0, 15, 39, 655]",Pat Benatar,Movies & TV,5.0,"CDs & Vinyl,Classic Rock,Album-Oriented Rock (..."
4,3937406875,"[4, 68147, 43639, 83001, 94116]",Dusty Springfield,Books,4.583333,"CDs & Vinyl,World Music,Europe"
5,5552622685,"[5, 24141, 24627, 31716, 16773]",Bruce Springsteen,Books,4.852941,"CDs & Vinyl,Classic Rock,Album-Oriented Rock (..."
6,630025075X,"[6, 1087, 756, 759, 14]",Michael Kamen,Movies & TV,4.44186,"CDs & Vinyl,Rock,Progressive,Progressive Rock"
7,6301248252,"[7, 578, 825, 1135, 1079]",The Police,Movies & TV,4.5,"CDs & Vinyl,Alternative Rock,New Wave & Post-P..."
8,6301236947,"[8, 108, 1094, 57, 112]",Bono,Movies & TV,4.333333,"CDs & Vinyl,Pop,Adult Alternative"
9,6301334175,"[9, 29, 501, 900, 987]",Pink Floyd,Movies & TV,4.47619,"CDs & Vinyl,Rock,Progressive,Progressive Rock"


In [None]:
b['main_cat'].value_counts()

Digital Music                    68553
Office Products                  19954
All Electronics                   2613
Amazon Home                       1299
Movies & TV                       1253
Industrial & Scientific            566
Arts, Crafts & Sewing              512
Computers                          377
Tools & Home Improvement           368
Toys & Games                       238
Health & Personal Care             208
Books                              177
Sports & Outdoors                  107
Home Audio & Theater                52
Cell Phones & Accessories           51
All Beauty                          44
Automotive                          42
Camera & Photo                      36
Amazon Fashion                      35
Industrial &amp; Scientific         22
Baby                                22
Tools &amp; Home Improvement        20
Arts, Crafts &amp; Sewing           19
Toys &amp; Games                    16
Pet Supplies                         8
Musical Instruments      

In [None]:
# Defining function for the reccomendation system
def recommended_products(product_id):
  lst=[]
  m_ct=[]
  ct=[]
  br=[] 
  rate=[]
  main_cat=list(b['main_cat'][b.product_id==product_id])
  cat = list(b['category'][b.product_id==product_id])
  brand=list(b['brand'][b.product_id==product_id])
  rating=list(b['rating'][b.product_id==product_id])
  print('Purchased product is:',product_id,'\nMain Category:',main_cat[0],'\nCategory:',cat[0],'\nBrand:',brand[0],'\nRating:',rating[0])
  print('Recommended Products are:')
  similar_products=b['Neighbors'][b.product_id==product_id]
  for i in similar_products:
      lst.append(b['product_id'][i]) 
      m_ct.append(b['main_cat'][i])
      ct.append(b['category'][i])
      br.append(b['brand'][i])
      rate.append(b['rating'][i])

  df1 = pd.DataFrame(lst)
  df2 = pd.DataFrame(m_ct)
  df3 = pd.DataFrame(ct)
  df4 = pd.DataFrame(br)
  df5 = pd.DataFrame(rate)
  df = pd.concat([df1,df2,df3,df4,df5],axis=0)
  #df=df.T
  df = (df.T).reset_index()
  df.drop(columns='index',inplace=True)
  df=df[df.main_cat==main_cat[0]].reset_index()
  df.drop(columns='index',axis=1,inplace=True)
  df['rating']=df['rating'].apply(lambda x:round(x,2))
  df = df[df['rating']>3]

    
  return df

In [None]:
# Product recommendation
product_id = '0439394058'
x = recommended_products(product_id)
x

Purchased product is: 0439394058 
Main Category: Office Products 
Category: Office Products,Office &amp; School Supplies,Presentation Boards,Bulletin Boards 
Brand: Scholastic 
Rating: 4.625
Recommended Products are:


Unnamed: 0,product_id,main_cat,category,brand,rating
0,439509769,Office Products,"Office Products,Office &amp; School Supplies,P...",Scholastic,4.83
1,439731593,Office Products,"Office Products,Office &amp; School Supplies,P...",Scholastic,4.62
2,545040116,Office Products,"Office Products,Office &amp; School Supplies,P...",Scholastic,4.89
3,439394058,Office Products,"Office Products,Office &amp; School Supplies,P...",Scholastic,4.62
4,545118239,Office Products,"Office Products,Office &amp; School Supplies,P...",Scholastic,4.4


In [None]:
# Checking for products with low rating

product_id = 'B0012Z9OOA'
x = recommended_products(product_id)
x

Purchased product is: B0012Z9OOA 
Main Category: All Electronics 
Category: Office Products,Office Electronics,Telephones & Accessories,Landline Phones,Cordless Telephones 
Brand: Uniden 
Rating: 2.0
Recommended Products are:


Unnamed: 0,product_id,main_cat,category,brand,rating
0,B0050MMHIG,All Electronics,"Office Products,Office Electronics,Telephones ...",Uniden,3.73
1,B000L00AUO,All Electronics,"Office Products,Office Electronics,Telephones ...",Uniden,4.0
2,B001W0Y4E0,All Electronics,"Office Products,Office Electronics,Telephones ...",Uniden,3.6
3,B001W0Y4GI,All Electronics,"Office Products,Office Electronics,Telephones ...",Uniden,4.75
4,B0012Z9OPE,All Electronics,"Office Products,Office Electronics,Telephones ...",Uniden,5.0


Thus similar products of the products purchased by the customer can be recommended using this recommendation system.