# Sample Code

## 基礎建設

In [76]:
import pandas as pd
import gzip, json

def parse(path):
    g = gzip.open(path, 'rb')
    for l in g:
        yield json.loads(l)

def getDF(path):
    i = 0
    df = {}
    for d in parse(path):
        df[i] = d
        i += 1
    return pd.DataFrame.from_dict(df, orient='index')

## 載入資料

In [77]:
!wget http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/All_Beauty.csv
!wget http://deepyeti.ucsd.edu/jianmo/amazon/metaFiles2/meta_All_Beauty.json.gz

--2021-12-23 02:08:04--  http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/All_Beauty.csv
Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50
Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15499476 (15M) [application/octet-stream]
Saving to: ‘All_Beauty.csv.1’


2021-12-23 02:08:06 (8.24 MB/s) - ‘All_Beauty.csv.1’ saved [15499476/15499476]

--2021-12-23 02:08:06--  http://deepyeti.ucsd.edu/jianmo/amazon/metaFiles2/meta_All_Beauty.json.gz
Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50
Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10329961 (9.9M) [application/octet-stream]
Saving to: ‘meta_All_Beauty.json.gz.1’


2021-12-23 02:08:08 (6.00 MB/s) - ‘meta_All_Beauty.json.gz.1’ saved [10329961/10329961]



## 資料整理

In [78]:
metadata = getDF('/content/meta_All_Beauty.json.gz')
ratings = pd.read_csv('/content/All_Beauty.csv', names=['asin', 'reviewerID', 'overall', 'unixReviewTime'], header=None)

In [79]:
metadata.head()

Unnamed: 0,category,tech1,description,fit,title,also_buy,tech2,brand,feature,rank,also_view,details,main_cat,similar_item,date,price,asin,imageURL,imageURLHighRes
0,[],,[Loud 'N Clear Personal Sound Amplifier allows...,,Loud 'N Clear&trade; Personal Sound Amplifier,[],,idea village,[],"2,938,573 in Beauty & Personal Care (",[],{'ASIN: ': '6546546450'},All Beauty,,,,6546546450,[],[]
1,[],,[No7 Lift & Luminate Triple Action Serum 50ml ...,,No7 Lift &amp; Luminate Triple Action Serum 50...,"[B01E7LCSL6, B008X5RVME]",,,[],"872,854 in Beauty & Personal Care (",[],"{'Shipping Weight:': '0.3 ounces (', 'ASIN: ':...",All Beauty,"class=""a-bordered a-horizontal-stripes a-spa...",,$44.99,7178680776,[],[]
2,[],,[No7 Stay Perfect Foundation now stays perfect...,,No7 Stay Perfect Foundation Cool Vanilla by No7,[],,No7,[],"956,696 in Beauty & Personal Care (","[B01B8BR0O8, B01B8BR0NO, B014MHXXM8]","{'Shipping Weight:': '3.5 ounces (', 'ASIN: ':...",All Beauty,,,$28.76,7250468162,[],[]
3,[],,[],,Wella Koleston Perfect Hair Colour 44/44 Mediu...,[B0041PBXX8],,,[],"1,870,258 in Beauty & Personal Care (",[],"{'  Item Weight: ': '1.76 ounces', 'Sh...",All Beauty,,,,7367905066,[https://images-na.ssl-images-amazon.com/image...,[https://images-na.ssl-images-amazon.com/image...
4,[],,[Lacto Calamine Skin Balance Daily Nourishing ...,,Lacto Calamine Skin Balance Oil control 120 ml...,[],,Pirmal Healthcare,[],"67,701 in Beauty & Personal Care (","[3254895630, B007VL1D9S, B00EH9A0RI, B0773MBG4...","{'Shipping Weight:': '12 ounces (', 'ASIN: ': ...",All Beauty,,,$12.15,7414204790,[https://images-na.ssl-images-amazon.com/image...,[https://images-na.ssl-images-amazon.com/image...


In [80]:
ratings.head()

Unnamed: 0,asin,reviewerID,overall,unixReviewTime
0,143026860,A1V6B6TNIC10QE,1.0,1424304000
1,143026860,A2F5GHSXFQ0W6J,4.0,1418860800
2,143026860,A1572GUYS7DGSR,4.0,1407628800
3,143026860,A1PSGLFK1NSVO,5.0,1362960000
4,143026860,A6IKXKZMTKGSC,5.0,1324771200


In [81]:
ratings['DATE'] = pd.to_datetime(ratings['unixReviewTime'], unit='s')

## 資料切分

In [82]:
ratings_trainings = ratings[
    (ratings['DATE'] < '2018-09-01')
]
ratings_testings = ratings[
    (ratings['DATE'] >= '2018-09-01') & 
    (ratings['DATE'] <= '2018-09-30')
]
ratings_testings_by_user = ratings_testings.groupby('reviewerID').agg(list).reset_index()[['reviewerID', 'asin']].to_dict('records')
ratings_testings_by_user = { rating['reviewerID']: rating['asin'] for rating in ratings_testings_by_user }
users = list(ratings_testings_by_user.keys())

## 產生推薦

In [83]:
# rule1: recommend top 10 review numbers product in training time period to
#        all users
def recommender_1(training_data, users=[], metadata=None):
    recommendations = {}
    ratings_trainings = training_data
    # find top 10 review numbers products
    top10_reviews = ratings_trainings.groupby('asin').size() \
                            .sort_values(ascending=False).index.to_list()[0:10]
    # recommend
    recommendations = {user: top10_reviews for user in users}
    return recommendations

In [84]:
# rule2: recommend top 10 review numbers products in past 180 days to all users
def recommender_2(training_data, users=[], metadata=None):
    recommendations = {}
    # set time period
    import datetime
    end_date = training_data['DATE'].max()
    start_date = end_date - datetime.timedelta(180)
    # filter data in selected time
    ratings_trainings = training_data[
        (ratings['DATE'] >= start_date) & 
        (ratings['DATE'] <= end_date)
    ]
    # find top 10 review numbers products
    top10_reviews = ratings_trainings.groupby('asin').size() \
                            .sort_values(ascending=False).index.to_list()[0:10]
    # recommend
    recommendations = {user: top10_reviews for user in users}
    return recommendations

In [85]:
# rule3: recommend products' also_view or also_buy to users who had review those ones.
def recommender_3(training_data, users=[], metadata=None):
    recommendations = {}
    ratings_trainings = training_data
    # asin versus also_view and also_buy
    also_view_or_buy = metadata[['asin', 'also_buy', 'also_view']]
    also_view_or_buy['view_or_buy'] = also_view_or_buy['also_buy'] + also_view_or_buy['also_view']
    # users versus also_view_or_buy
    ratings_view_or_buy = ratings_trainings[['asin', 'reviewerID']]
    ratings_view_or_buy = ratings_view_or_buy.merge(also_view_or_buy, how='left', on='asin')
    ratings_view_or_buy.drop(['asin', 'also_view', 'also_buy'], 1, inplace=True)
    ratings_view_or_buy.dropna(inplace=True)
    ratings_view_or_buy_groupby_user = ratings_view_or_buy.groupby('reviewerID').agg({'view_or_buy': 'sum'})
    # recommend products to user depend on also_view_or_buy table
    for user in users:
        try:
            recommendations[user] = ratings_view_or_buy_groupby_user.loc[user]['view_or_buy']
        except:
            pass
    return recommendations

## 結果評估

In [86]:
def evaluate(ratings_testings_by_user={}, ratings_by_user={}, method=None):
    '''
    * ratings_testings_by_user: dict 真實被購買的商品資料（2018-09-01 以後資料）
    * ratings_by_user: dict 利用訓練資料學習的推薦商品
    * method: str
    * score: float
    '''
    total = 0
    for d in ratings_testings_by_user:
        if d in ratings_by_user:
            total += len(set(ratings_by_user[d]) & set(ratings_testings_by_user[d]))

    score = total / len(ratings_testings)
    return score

In [87]:
evaluate_result = {}
n = 1
for recommender in [recommender_1, recommender_2, recommender_3]:
    ratings_by_user = recommender(ratings_trainings, users, metadata)
    evaluate_result[str(n)] = evaluate(ratings_testings_by_user, ratings_by_user)
    n += 1
result = pd.DataFrame(list(evaluate_result.items()), columns=['recommender', 'score'])
result

  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,recommender,score
0,1,0.083051
1,2,0.09661
2,3,0.001695
