In [1]:
import os
import ujson
import gzip
import pandas as pd
import gc
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import GridSearchCV
from surprise import SVD
from collections import defaultdict
import operator
from surprise.model_selection import KFold
from surprise import accuracy
from surprise import KNNWithMeans

In [2]:
data = pd.read_pickle("/Users/sidhantarora/work/ALDA/Project/data_nov_8.pkl")

In [3]:
data.columns

Index(['overall', 'reviewerID', 'asin', 'unixReviewTime', 'category',
       'description', 'title', 'also_buy', 'brand', 'feature', 'main_cat',
       'date', 'price', 'review_summary_combined', 'rating_category', 'time'],
      dtype='object')

In [4]:
data.description.iloc[0]

['The videosecu TV mount is a mounting solution for most 22"-47" LCD LED Plasma TV and some LED up to 55" with VESA 600x400mm (24"x16"), 400x400mm (16"x16"),600x300mm(24"x12"), 400x200mm (16"x8"),300x300mm (12"x12"),300x200mm(12"x8"),200x200mm (8"x8"),200x100mm (8"x4") mounting hole pattern .Heavy gauge steel construction provides safety loading up to 66lbs display .It can tilt 15 degree forward or backward and swivel 180 degree. The removable VESA plate can be taken off for easy installation. Post-installation level adjustment allows the TV to perfectly level. The on arm cable management ring system design, guides wires and prevent cable pinching. Standard hardware and user manual included. <br />Notice: This mount fits most 22-47" TVs (VESA 200x200mm (8x8")/200x100mm(8x4")/100x100mm(4x4") without extender, fits VESA 600x400(24x16")/400x400mm(16x16")/600x300mm(24x12")/400x200mm(16x8")/300x300mm(12x12")/300x200mm(12x8")with 4 plate extender), some up to 50 55 inch TVs manufactured in r

In [5]:
data.feature.iloc[0]

['Fits most 22" to 47" HDTV and some up to 55" LED TV (check VESA and weight)',
 'Fits VESA (mounting hole patterns) 100x100/200x100/200x200mm without extender, and also fits VESA 600x400/400x400/600x300/400x200/300x300/300x200mm with 4 plate extender',
 'Heavy-duty steel construction, loading capacity up to 66 lbs',
 'Adjustable tilt +/-15 degrees, swivel 180 degrees, extends 24" from the wall',
 'VESA plate can be taken off, quick release design for easy installation; Post-installation level adjustment allows the TV to perfectly level']

In [6]:
newData = data[['asin', 'reviewerID', 'overall']].copy()


In [7]:
newData.head(5)

Unnamed: 0,asin,reviewerID,overall
246,972683275,A1KECIKIWDB4HH,4.0
247,972683275,A2MQ47BBL7F6B6,5.0
248,972683275,ANWW7PT6Z6MHD,5.0
249,972683275,A2DEU0B3AUINV9,5.0
250,972683275,AE8R1JSMJYIU,4.0


In [8]:
set(list(newData.overall))

{1.0, 2.0, 3.0, 4.0, 5.0}

In [9]:
newData = newData.rename(columns={'asin': 'itemID', 'reviewerID': 'userID','overall': 'rating' })

In [10]:
newData

Unnamed: 0,itemID,userID,rating
246,0972683275,A1KECIKIWDB4HH,4.0
247,0972683275,A2MQ47BBL7F6B6,5.0
248,0972683275,ANWW7PT6Z6MHD,5.0
249,0972683275,A2DEU0B3AUINV9,5.0
250,0972683275,AE8R1JSMJYIU,4.0
...,...,...,...
3368220,B01HISA452,AG8SMUXFYGYH8,4.0
3368221,B01HISA452,AB443G89K25H2,5.0
3368222,B01HISA452,A1X6MABURKFRQA,5.0
3368223,B01HISA452,A3NJEEYRMW3FJR,5.0


In [11]:
reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
surprise_data = Dataset.load_from_df(newData[["userID", "itemID","rating"]], reader)

### Finding the best model for item-item filtering

In [12]:
param = {
    "name": ["cosine"],
    "min_support": [3],
    "user_based": [True],
}
param_grid = {"sim_options": param}

In [13]:
memory_based = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse"], cv=5)

In [None]:
memory_based.fit(surprise_data)

In [None]:
print(memory_based.best_score["rmse"])
print(memory_based.best_params["rmse"])

In [None]:
evaluation = pd.DataFrame.from_dict(memory_based.cv_results)

In [None]:
evaluation

### Testing on Common Data

In [None]:
sim_options = {'name': 'msd', 'min_support': 12, 'user_based': False}

In [None]:
test_model = KNNWithMeans(sim_options=sim_options)

In [None]:
kf = KFold(n_splits=5)

for trainset, testset in kf.split(surprise_data):

    # train and test algorithm.
    test_model .fit(trainset)
    predictions = test_model.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

### Using Best Parameters

In [None]:
best = memory_based.best_estimator['rmse']

In [None]:
best.fit(surprise_data.build_full_trainset())

In [None]:
best.predict("A28T6TZRAJF7J5","B01HIY64XM")

### Generating Recommendation

In [None]:
#No. of all unique items
all_items = list(set(list(newData.itemID)))
len(all_items)

In [None]:
def get_rating_predictions(user_id):
    
    item_rating = defaultdict(int)
    
    for item in all_items:
        item_rating[item] = best.predict(user_id, item).est
        
    return item_rating

In [None]:
len(set(list(data['reviewerID'])))

In [None]:
len(set(list(data['asin'])))

In [None]:
len(data)

In [None]:
def reviewed_items(user_id):
    
    items = set()
    
    for idx in range(len(data)):
        if data.iloc[idx]['reviewerID'] == user_id:
            items.add(data.iloc[idx]['asin'])
            
    return items

In [None]:
reviewed_items("A28T6TZRAJF7J5")

###  Mapping Item id to Product Name

In [None]:
itemID_to_name = defaultdict(str)

for idx in range(len(data)):
    itemID_to_name[data.iloc[idx]['asin']] = data.iloc[idx]['title']

In [None]:
def get_recommendation(user_id):
    item_rating = get_rating_predictions(user_id)
    already_bought = reviewed_items(user_id)
    sorted_items = sorted(item_rating.items(), key=operator.itemgetter(1), reverse = True)
    print(sorted_items[:10])
    
    items_to_suggest = []
    
    #Removing already bought items
    count = 0
    for item in sorted_items:
        
        if count == 10:
            break
        
        item_id = item[0]
        
        if item not in already_bought:
            items_to_suggest.append(itemID_to_name[item[0]])
            count += 1
    
    return items_to_suggest

####  Items purchased by user A3AKVALGT4Y02G

In [None]:
bought_items = reviewed_items("A3AKVALGT4Y02G")
for i in bought_items:
    print(itemID_to_name[i]) 

In [None]:
get_recommendation("A3AKVALGT4Y02G")