In [None]:
!curl https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Electronics_v1_00.tsv.gz --output amazon_reviews_us_Electronics_v1_00.tsv.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  666M  100  666M    0     0  14.9M      0  0:00:44  0:00:44 --:--:-- 16.0M


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure

sns.set_style("whitegrid")
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('amazon_reviews_us_Electronics_v1_00.tsv.gz',
                 sep='\t',
                 compression='gzip',
                 error_bad_lines=False,
                 usecols=['customer_id', 'product_id', 'star_rating', 'product_title', 'verified_purchase'],
                 dtype={'customer_id': 'int64', 'product_id': 'str', 'star_rating': 'float32', 'product_title': 'str'})

In [None]:
print(df.shape)

(3091103, 5)


In [None]:
df.head()

Unnamed: 0,customer_id,product_id,product_title,star_rating,verified_purchase
0,41409413,B00428R89M,yoomall 5M Antenna WIFI RP-SMA Female to Male ...,5.0,Y
1,49668221,B000068O48,"Hosa GPM-103 3.5mm TRS to 1/4"" TRS Adaptor",5.0,Y
2,12338275,B000GGKOG8,Channel Master Titan 2 Antenna Preamplifier,5.0,Y
3,38487968,B000NU4OTA,LIMTECH Wall charger + USB Hotsync & Charging ...,1.0,Y
4,23732619,B00JOQIO6S,Skullcandy Air Raid Portable Bluetooth Speaker,5.0,Y


In [None]:
combine_product_rating = df.dropna(axis = 0, subset = ['product_title'])
product_ratingCount = (combine_product_rating.
     groupby(by = ['product_title'])['star_rating'].
     count().
     reset_index().
     rename(columns = {'star_rating': 'totalRatingCount'})
     [['product_title', 'totalRatingCount']]
    )
product_ratingCount.head()

Unnamed: 0,product_title,totalRatingCount
0,Burst Variable Voltage Mod Battery,2
1,Model A RBX10 Programmable End Of Transmissi...,1
2,# 12ft 3.5mm Plug Jack to 2 RCA Male Stereo Au...,1
3,# EX5162-G13 Deluxe High Back Black Glove Soft...,1
4,#1 BEST SELLING Waterproof Bluetooth Speaker w...,50


In [None]:
rating_with_totalRatingCount = combine_product_rating.merge(product_ratingCount, left_on = 'product_title', right_on = 'product_title', how = 'left')
rating_with_totalRatingCount.head()

Unnamed: 0,customer_id,product_id,product_title,star_rating,verified_purchase,totalRatingCount
0,41409413,B00428R89M,yoomall 5M Antenna WIFI RP-SMA Female to Male ...,5.0,Y,121
1,49668221,B000068O48,"Hosa GPM-103 3.5mm TRS to 1/4"" TRS Adaptor",5.0,Y,149
2,12338275,B000GGKOG8,Channel Master Titan 2 Antenna Preamplifier,5.0,Y,409
3,38487968,B000NU4OTA,LIMTECH Wall charger + USB Hotsync & Charging ...,1.0,Y,776
4,23732619,B00JOQIO6S,Skullcandy Air Raid Portable Bluetooth Speaker,5.0,Y,122


In [None]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(product_ratingCount['totalRatingCount'].describe())

count   167865.000
mean        18.414
std        158.627
min          1.000
25%          1.000
50%          2.000
75%          6.000
max      24810.000
Name: totalRatingCount, dtype: float64


In [None]:
# This will be used to query products with sufficient count of ratings, i.e we don't want to use products that have been rated 5 star only once.
popularity_threshold = 500
rating_popular_product= rating_with_totalRatingCount.query("totalRatingCount >= @popularity_threshold & star_rating >= 3")
rating_popular_product.head()

Unnamed: 0,customer_id,product_id,product_title,star_rating,verified_purchase,totalRatingCount
15,38472651,B00B5QNGN6,JBL Ultra-Portable Speaker with Built-In Bass ...,5.0,Y,1486
21,13007636,B00FZ1AAUE,MOUNT FACTORY Articulating Swivel Full Motion ...,4.0,Y,513
26,40862185,B007SP2CO2,Monoprice 108323 Premium Hi-Fi DJ Style Over-t...,3.0,Y,799
32,7899951,B00HNKTVB4,JBL Flip 2 Portable Bluetooth Speaker,5.0,Y,1279
35,52886527,B005K2TXMO,FiiO D3 (D03K) Digital to Analog Audio Convert...,5.0,Y,1526


In [None]:
rating_popular_product.shape

(961069, 6)

In [None]:
# Generate Pivot Table for User-Item Mapping
#Source: https://stackoverflow.com/questions/27738398/simple-pivot-table-of-pandas-dataframe

product_features_df=rating_popular_product.pivot_table(index='product_title', columns='customer_id', values='star_rating').fillna(0)
product_features_df.head()

customer_id,10036,10048,10223,10276,10315,10348,10390,10449,10469,10610,10644,10684,10730,10734,10746,10981,10998,11008,11077,11153,11193,11237,11497,11498,11804,11813,11822,12009,12024,12116,12193,12713,12743,12799,12845,12885,12917,12933,13147,13242,...,53095542,53095605,53095652,53095656,53095659,53095663,53095669,53095685,53095704,53095720,53095725,53095774,53095826,53095835,53095873,53095879,53095883,53095887,53095912,53095916,53095927,53095929,53095958,53095970,53096055,53096117,53096191,53096194,53096229,53096234,53096338,53096363,53096367,53096379,53096397,53096454,53096471,53096482,53096520,53096567
product_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
(Create a generic Title per Amazons guidelines),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(NEW) AYL Portable Bluetooth Wireless Version 4.0 Rechargeable Mini Speaker System for PC / Cell Phone / Tablet / MP3 Player + 3 Year Warranty,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000 pcs White CD DVD Paper Sleeves Envelopes with Flap and Clear Window,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18650 3000mAh 3.7V Rechargeable Li-Ion Battery + Charger Combo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"1byone Amplified HDTV Antenna, with Detachable Amplifier Signal Booster for the Highest Performance and 10 Feet Coaxial Cable-Black",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
from scipy.sparse import csr_matrix

# Convert User-Item Pivot to Array Matrix
product_features_df_matrix = csr_matrix(product_features_df.values)

from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute', n_jobs = -1)

# Train Model
model_knn.fit(product_features_df_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=-1, n_neighbors=5, p=2, radius=1.0)

In [None]:
product_features_df.shape

(828, 792685)

In [None]:
# Index of Random Product from user
query_index = np.random.choice(product_features_df.shape[0])
print(query_index)

# Get Recommendation of Products based on the Item Randomly Selected Above
distances, indices = model_knn.kneighbors(product_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 6)

666


In [None]:
product_features_df.head()

customer_id,10036,10048,10223,10276,10315,10348,10390,10449,10469,10610,10644,10684,10730,10734,10746,10981,10998,11008,11077,11153,11193,11237,11497,11498,11804,11813,11822,12009,12024,12116,12193,12713,12743,12799,12845,12885,12917,12933,13147,13242,...,53095542,53095605,53095652,53095656,53095659,53095663,53095669,53095685,53095704,53095720,53095725,53095774,53095826,53095835,53095873,53095879,53095883,53095887,53095912,53095916,53095927,53095929,53095958,53095970,53096055,53096117,53096191,53096194,53096229,53096234,53096338,53096363,53096367,53096379,53096397,53096454,53096471,53096482,53096520,53096567
product_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
(Create a generic Title per Amazons guidelines),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(NEW) AYL Portable Bluetooth Wireless Version 4.0 Rechargeable Mini Speaker System for PC / Cell Phone / Tablet / MP3 Player + 3 Year Warranty,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000 pcs White CD DVD Paper Sleeves Envelopes with Flap and Clear Window,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18650 3000mAh 3.7V Rechargeable Li-Ion Battery + Charger Combo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"1byone Amplified HDTV Antenna, with Detachable Amplifier Signal Booster for the Highest Performance and 10 Feet Coaxial Cable-Black",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(product_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, product_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Sony ICF-C318 Automatic Time Set Clock Radio with Dual Alarm (White) (Discontinued by Manufacturer):

1: Sony ICF-C318 Clock Radio with Dual Alarm (Black) (Discontinued by Manufacturer), with distance of 0.9901924729347229:
2: Koss KTXPRO1 Titanium Portable Headphones with Volume Control, with distance of 0.9921896457672119:
3: Sony ICFC218 Dream Machine Clock Radio (Black), with distance of 0.994217574596405:
4: Sangean RCR-5 Digital AM/FM Clock Radio, with distance of 0.9945687651634216:
5: SanDisk Sansa m230 512 MB MP3 Player, with distance of 0.9946478009223938:
