In [None]:
import os
import json
import gzip
import pandas as pd
from urllib.request import urlopen
from tqdm import tqdm

In [None]:
! wget http://deepyeti.ucsd.edu/jianmo/amazon/categoryFiles/Appliances.json.gz

--2022-05-09 03:12:22--  http://deepyeti.ucsd.edu/jianmo/amazon/categoryFiles/Appliances.json.gz
Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50
Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 69677301 (66M) [application/octet-stream]
Saving to: ‘Appliances.json.gz’


2022-05-09 03:12:24 (34.6 MB/s) - ‘Appliances.json.gz’ saved [69677301/69677301]



In [None]:
data = []
with gzip.open('Appliances.json.gz') as f:
    for l in f:
        data.append(json.loads(l.strip()))
reviews = pd.DataFrame.from_dict(data)

In [None]:
! wget http://deepyeti.ucsd.edu/jianmo/amazon/metaFiles2/meta_Appliances.json.gz

--2022-05-09 03:12:26--  http://deepyeti.ucsd.edu/jianmo/amazon/metaFiles2/meta_Appliances.json.gz
Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50
Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 59884788 (57M) [application/octet-stream]
Saving to: ‘meta_Appliances.json.gz’


2022-05-09 03:12:28 (33.6 MB/s) - ‘meta_Appliances.json.gz’ saved [59884788/59884788]



In [None]:
data = []
with gzip.open('meta_Appliances.json.gz') as f:
    for l in f:
        data.append(json.loads(l.strip()))
network = pd.DataFrame.from_dict(data)

In [None]:
nodes = network['asin'].unique()

In [None]:
network.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30445 entries, 0 to 30444
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   category         30445 non-null  object
 1   tech1            30445 non-null  object
 2   description      30445 non-null  object
 3   fit              30445 non-null  object
 4   title            30445 non-null  object
 5   also_buy         30445 non-null  object
 6   tech2            30445 non-null  object
 7   brand            30445 non-null  object
 8   feature          30445 non-null  object
 9   rank             30445 non-null  object
 10  also_view        30445 non-null  object
 11  details          30445 non-null  object
 12  main_cat         30445 non-null  object
 13  similar_item     30445 non-null  object
 14  date             30445 non-null  object
 15  price            30445 non-null  object
 16  asin             30445 non-null  object
 17  imageURL         30445 non-null

In [None]:
import networkx as nx
G = nx.Graph()

In [None]:
G.add_nodes_from(nodes)

In [None]:
for n in nodes:
  also_buy = network.loc[network['asin'] == n, 'also_buy'].iloc[0]
  also_buy
  for p in also_buy:
    if p != None:
      G.add_edge(n, p, weight = 1)
  also_view = network.loc[network['asin'] == n, 'also_view'].iloc[0]
  for p in also_view:
    if p != None:
      G.add_edge(n, p, weight = 2)

In [None]:
degree_centrality = nx.degree_centrality(G)
bar = list(G.nodes)
recommendations = {}
for node in tqdm(bar):
  scores = []
  connections = list(G.neighbors(node))
  if len(connections)<3 and len(connections)!=0:
    recommendations[node] = connections
  elif len(connections)>=3:
    for con_node in connections:
      degree = degree_centrality[con_node]
      weights = G.get_edge_data(node,con_node)['weight']
      scores.append([con_node, degree/weights])
    scores = pd.DataFrame(scores, columns= ["Items","Scores"])
    scores = scores.sort_values(by="Scores",ascending = False)
    scores = scores.head()
    recommendations[node] = scores.values.tolist()
  elif len(connections) == 0:
    recommendations[node] = "No related product!"

100%|██████████| 82394/82394 [00:20<00:00, 4009.49it/s] 


In [None]:

import torch

In [None]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
stopWords = set(stopwords.words('english'))
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/rhea2809/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/rhea2809/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/rhea2809/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
df= reviews
sid = SentimentIntensityAnalyzer()

In [None]:
i=1
final_recommendations_dict={}
for key,value in tqdm(recommendations.items()):
  # if i%999==1:
  #   print(i)

  if value == "No related product!":
    final_recommendations_dict[key]=["No related product!"]
    continue
  
  recommended_products = value
  final_recommendations=[]
  for recommended_product in recommended_products:
    reviews = list(df.loc[df['asin']==recommended_product[0]]['reviewText'])
    if len(reviews)>0:
      reviews = [str(r) for r in reviews]
      revs=[]
      review_score=0
      for r in reviews:
        words = word_tokenize(r)
        wordsFiltered = []
        for w in words:
          if w not in stopWords:
              wordsFiltered.append(w)
        ss=sid.polarity_scores(' '.join(wordsFiltered))
        review_score+=ss['compound']
      review_score/=len(reviews)      
      # review_score= sum([res['score'] for res in pipe(reviews)])/len(reviews)
    else:
      review_score= 0
    final_recommendations.append((recommended_product,review_score))
  final_recommendations.sort(key = lambda x: x[1], reverse = True)
  final_recommendations_dict[key] = final_recommendations
  i+=1

100%|██████████| 82394/82394 [6:22:21<00:00,  3.59it/s]       


In [None]:
list(final_recommendations_dict.values())

[['No related product!'],
 [('B004RUGHJW', 0)],
 [('B01HCAVSLK', 0), ('1632206579', 0)],
 ['No related product!'],
 ['No related product!'],
 [(['B000BQ8D7A', 0.001407886592307599], 0.4613542857142858),
  (['B000FPDO4Y', 0.0012986540118699404], 0.3877875),
  (['B000FP8LK6', 0.0012622431517240542], 0.3746416666666667),
  (['B00FI6U0GU', 0.0011530105712863957], 0.36324999999999996),
  (['B00NPZH820', 0.000618984622480065], 0.245375)],
 [(['B000GAQG5C', 0.001250106198342092], 0.5604333333333333),
  (['B000BQ8D7A', 0.001407886592307599], 0.4613542857142858),
  (['B004LYLP7I', 0.0011408736179044335], 0.4181677966101696),
  (['B000FP8LK6', 0.0012622431517240542], 0.3746416666666667),
  (['B00IPUA5ZW', 0.0009709562705569648], 0.34224848484848486)],
 ['No related product!'],
 [('B00002N7IL', 0)],
 [(['B000BQ8D7A', 0.0007039432961537995], 0.4613542857142858),
  (['B00MHOFTLM', 0.0006675324360079132], 0.39068260869565224),
  (['B000FP8LK6', 0.0006311215758620271], 0.3746416666666667),
  (['B00IP

In [None]:
import json

In [None]:
with open('recommendations_dict.json',"w") as f:
  json.dump(final_recommendations_dict,f)

In [None]:
with open("recommendations_dict.json", "r") as f:
  data = json.load(f)

In [None]:
list(data.keys())[5]

'B00002N7HY'

In [None]:
list(data.values())

In [None]:
def get_recommendation(product,info=False):
  with open("recommendations_dict.json", "r") as f:
    data = json.load(f)
  d = data[product]
  product_record =  network[network['asin']==product]['title'].values

  if product_record.size>0:
    product_name = product_record[0]
    print('\nChosen Product:',product_name,network[network['asin']==product]['price'].values[0])
  else:
    print('No such product')
    return -1 


  if d[0]=='No related product!':
    return 'No similar products recommended'

  if info == True:
    for i in d:
       print('\nProduct asin: ',i[0][0],' sentiment score: ',i[1],' network based score: ',i[0][1])
       rec =network[network['asin']==i[0][0]]['title'].values
       if rec.size>0:
         print(rec[0],network[network['asin']==i[0][0]]['price'].values[0])
  else:
    for i in d:
       print('\nProduct asin: ',i[0][0]) 
       rec =network[network['asin']==i[0][0]]['title'].values
       if rec.size>0:
         print(rec[0],network[network['asin']==i[0][0]]['price'].values[0])
  return d

In [None]:
r=get_recommendation('B00006JKZX',False)


Chosen Product: Sanyo BC1206 Kegerator Beer Cooler 

Product asin:  B0170O0D82
EdgeStar KC2000TWIN Full Size Dual Tap Kegerator &amp; Draft Beer Dispenser - Black $539.00

Product asin:  B00655HJJE
EdgeStar KC2000SSTWIN Full Size Stainless Steel Dual Tap Kegerator &amp; Draft Beer Dispenser - Stainless Steel $56.62

Product asin:  B014LGBJVC
EdgeStar KC1000SS Craft Brew Kegerator for 1/6 Barrel and Cornelius Kegs $35.68

Product asin:  B0170NWHDM
EdgeStar KC2000 Full Size Kegerator and Keg Beer Cooler $34.45

Product asin:  B000HJVYDQ
Nostalgia KRS2100 5.1 Cu.Ft. Full Size Kegorator Draft Beer Dispenser $34.45


In [None]:
a=input('Enter Product Asin ')
r=get_recommendation(a,True)

Enter Product Asin B00002N7HY

Chosen Product: Leviton 5050 B01-0-000 Electrical Receptacle, 125/250 Vac, 50 A, 3 Pole, 3 Wire, Pack of 1, Black $6.98

Product asin:  B000BQ8D7A  sentiment score:  0.4613542857142858  network based score:  0.001407886592307599
Southwire 09016 50-Amp 3-Wire Range Power Cord, 6-Foot $13.87

Product asin:  B000FPDO4Y  sentiment score:  0.3877875  network based score:  0.0012986540118699404
EATON WD125 3-Pole 3-Wire 30-Amp 125-Volt Surface Mount Dryer Power Receptacle, Black $9.04

Product asin:  B000FP8LK6  sentiment score:  0.3746416666666667  network based score:  0.0012622431517240542
EATON WD112 50-Amp 125-Volts 2-Pole 3-Wire Surface Mount Range Power Receptacle, Black $9.49

Product asin:  B00FI6U0GU  sentiment score:  0.36324999999999996  network based score:  0.0011530105712863957
General Electric WX09X10006 3 Wire 40amp Range Cord, 4-Feet $15.15

Product asin:  B00NPZH820  sentiment score:  0.245375  network based score:  0.000618984622480065
GE WX

## ***DEMO***

In [None]:
a=input('Enter Product Asin ')
r=get_recommendation(a,True)

Enter Product Asin B00006JKZX

Chosen Product: Sanyo BC1206 Kegerator Beer Cooler 

Product asin:  B0170O0D82  sentiment score:  0.612653846153846  network based score:  0.0010923258043765854
EdgeStar KC2000TWIN Full Size Dual Tap Kegerator &amp; Draft Beer Dispenser - Black $539.00

Product asin:  B00655HJJE  sentiment score:  0.6008254237288134  network based score:  0.0011105312344495285
EdgeStar KC2000SSTWIN Full Size Stainless Steel Dual Tap Kegerator &amp; Draft Beer Dispenser - Stainless Steel $56.62

Product asin:  B014LGBJVC  sentiment score:  0.5693756756756758  network based score:  0.0008313813066644011
EdgeStar KC1000SS Craft Brew Kegerator for 1/6 Barrel and Cornelius Kegs $35.68

Product asin:  B0170NWHDM  sentiment score:  0.553162910798122  network based score:  0.0006796693893898753
EdgeStar KC2000 Full Size Kegerator and Keg Beer Cooler $34.45

Product asin:  B000HJVYDQ  sentiment score:  0.4332967611336033  network based score:  0.0006432585292439892
Nostalgia KRS21