<a href="https://colab.research.google.com/github/tcglarry/good_stuff/blob/master/recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Recommendation

## Import all necessary libraries

In [0]:
#!fc-list :lang=zh family

In [0]:
import os
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

#pd.set_option('display.max_rows', 10)
plt.rcParams['font.sans-serif'] = ['Noto Sans Mono CJK TC', 'sans-serif'] 
plt.rcParams['axes.unicode_minus'] = False

%matplotlib inline

## Load Data

In [34]:
try:
    from google.colab import drive

    # Mount the folder "drive" on google drive to Colab Notebook
    drive.mount('/content/drive')
    #path = '/content/drive/My Drive/wids-taipei/2020-WiDS-Taipei-MLCC-Workshop/dataset/*.csv'
    path = 'drive/My Drive/WiDS Workshop Document for 學員/code/data/*.csv'

except ModuleNotFoundError:
    path = '../data/*.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Read data

In [36]:
# Read data
print (path)
filenames = glob.glob(path)
pd_dict = {}
print (filenames)

for filename in filenames:
    name = filename.split("/")[-1].split(".")[0]
    pd_dict[name] = pd.read_csv(os.path.join(filename))

#pd_dict.keys()
purchase_data = pd_dict['customer_purchase_dataset']    

drive/My Drive/WiDS Workshop Document for 學員/code/data/*.csv
['drive/My Drive/WiDS Workshop Document for 學員/code/data/customers_dataset.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/order_reviews_dataset.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/customer_purchase_dataset.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/customer_analysis_input.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/orders_dataset.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/order_payments_dataset.csv', 'drive/My Drive/WiDS Workshop Document for 學員/code/data/kmeans_sample_dataset.csv']


## Item-based Recommendation

In [37]:
purchase_data.head(5)

Unnamed: 0,customer_unique_id,product_sub_category,product_main_category,price,count
0,7ad04c71bfca958e6f2ec44bce34e2da,books_technical,書籍,24.0,1
1,4e58455924b97da4c44477abb0b030a5,cool_stuff,休閒生活,44.9,1
2,1f44054faaecb5ba43ca49625fb81767,auto,3C,24.8,1
3,d8764626d0d43e3f4fb34db9021a46ef,kitchen_dining_laundry_garden_furniture,家居生活,9.6,1
4,960438e3b93de6c449d45491534855a2,computers_accessories,3C,89.8,1


In [38]:
customer_product_data = purchase_data[['customer_unique_id', 'product_main_category', 'count']]
customer_product_data

Unnamed: 0,customer_unique_id,product_main_category,count
0,7ad04c71bfca958e6f2ec44bce34e2da,書籍,1
1,4e58455924b97da4c44477abb0b030a5,休閒生活,1
2,1f44054faaecb5ba43ca49625fb81767,3C,1
3,d8764626d0d43e3f4fb34db9021a46ef,家居生活,1
4,960438e3b93de6c449d45491534855a2,3C,1
...,...,...,...
236830,305adb7f869f2d07fa2170b042abefdf,家居生活,1
236831,49edfe73ea287d715eebfced06b0bea9,3C,1
236832,d87cc9520e3fd47ef88f7098e51afe8a,保健,1
236833,738ffcf1017b584e9d2684b36e07469c,服飾/配件,1


### pivot table

In [39]:
purchase_data_sum = customer_product_data.groupby(['customer_unique_id', 'product_main_category'],
                                                 as_index=False).count()
purchase_data_sum

Unnamed: 0,customer_unique_id,product_main_category,count
0,0000366f3b9a7992bf8c76cfdf3221e2,家居生活,1
1,0000b849f77a49e4a4ce2b2a4ca5be3f,保健,1
2,0000b849f77a49e4a4ce2b2a4ca5be3f,書籍,1
3,0000f46a3911fa3c0805444483337064,保健,1
4,0000f46a3911fa3c0805444483337064,文具,1
...,...,...,...
212524,ffff371b4d645b6ecea244b27531430a,美食,1
212525,ffff5962728ec6157033ef9805bacc48,休閒生活,1
212526,ffffd2657e2aad2907e67c3e9daecbeb,3C,1
212527,ffffd2657e2aad2907e67c3e9daecbeb,休閒生活,1


In [40]:
customer_product_pivot = pd.pivot(purchase_data_sum, 
                                  index='product_main_category',
                                  columns='customer_unique_id',
                                  values='count').fillna(0)
customer_product_pivot

customer_unique_id,0000366f3b9a7992bf8c76cfdf3221e2,0000b849f77a49e4a4ce2b2a4ca5be3f,0000f46a3911fa3c0805444483337064,0000f6ccb0745a6a4b88665a16c9f078,0004aac84e0df4da2b147fca70cf8255,0004bd2a26a76fe21f786e4fbd80607f,00050ab1314c0e55a6ca13cf7181fecf,00053a61a98854899e70ed204dd4bafe,0005e1862207bf6ccc02e4228effd9a0,0005ef4cd20d2893f0d9fbd94d3c0d97,0006fdc98a402fceb4eb0ee528f6a8d4,00082cbe03e478190aadbea78542e933,00090324bbad0e9342388303bb71ba0a,000949456b182f53c18b68d6babc79c1,000a5ad9c4601d2bbdd9ed765d5213b3,000bfa1d2f1a41876493be685390d6d3,000c8bdb58a29e7115cfc257230fb21b,000d460961d6dbfa3ec6c9f5805769e1,000de6019bb59f34c099a907c151d855,000e309254ab1fc5ba99dd469d36bdb4,000ec5bff359e1c0ad76a81a45cb598f,000ed48ceeb6f4bf8ad021a10a3c7b43,000fbf0473c10fc1ab6f8d2d286ce20c,0010a452c6d13139e50b57f19f52e04e,0010fb34b966d44409382af9e8fd5b77,001147e649a7b1afd577e873841632dd,00115fc7123b5310cf6d3a3aa932699e,0011805441c0d1b68b48002f1d005526,0011857aff0e5871ce5eb429f21cdaf5,0011c98589159d6149979563c504cb21,0012929d977a8d7280bb277c1e5f589d,0014a5a58da615f7b01a4f5e194bf5ea,0015752e079902b12cd00b9b7596276b,00172711b30d52eea8b313a7f2cced02,00191a9719ef48ebb5860b130347bf33,001926cef41060fae572e2e7b30bd2a4,001928b561575b2821c92254a2327d06,00196c4c9a3af7dd2ad10eade69c926f,00196fdb2bf9edfc35e88ebfbcf8d781,0019da6aa6bcb27cc32f1249bd12da05,...,ffe4c3aa14d4702fb11fe5b3c4dd755c,ffe4d6bcc48632f0bb1bedf53702cc4b,ffe6efca3c7e6a06bad0a6a883280a93,ffe76cb2f4bb39384c432d65ece67441,ffe7752edcf14b5819058b1948e02f3a,ffe780a8995715d9560ca10f3351710f,ffe8f2fc0cee48f79934bd2c506fafc0,ffe9102bb78a76921ba0ff3c4659616a,ffe96201d466b0e0dc8139850be29d5d,ffe96c782a5bc522bd8bad3bc638981a,ffe9be10b9a58c5464d833e8b1b2c632,ffe9e41fbd14db4a7361347c56af5447,ffeb904468642a1ce663a322629801cb,ffebb6424578e7bb153322da9d65634f,ffec10ad4229ba46818560e1c8b40a68,ffec490ab531184a483efe2eedd68908,ffecceca389973ef16660d58696f281e,ffeddf8aa7cdecf403e77b2e9a99e2ea,ffedff0547d809c90c05c2691c51f9b7,ffee94d548cef05b146d825a7648dab4,ffeefd086fc667aaf6595c8fe3d22d54,ffef0ffa736c7b3d9af741611089729b,fff1afc79f6b5db1e235a4a6c30ceda7,fff1bdd5c5e37ca79dd74deeb91aa5b6,fff22793223fe80c97a8fd02ac5c6295,fff2ae16b99c6f3c785f0e052f2a9cfb,fff3a9369e4b7102fab406a334a678c3,fff3e1d7bc75f11dc7670619b2e61840,fff5eb4918b2bf4b2da476788d42051c,fff699c184bcc967d62fa2c6171765f7,fff7219c86179ca6441b8f37823ba3d3,fff96bc586f78b1f070da28c4977e810,fffa431dd3fcdefea4b1777d114144f2,fffb09418989a0dbff854a28163e47c6,fffbf87b7a1a6fa8b03f081c5f51a201,fffcf5a5ff07b0908bd4e2dbc735a684,fffea47cd6d3cc0a88bd621562a9d061,ffff371b4d645b6ecea244b27531430a,ffff5962728ec6157033ef9805bacc48,ffffd2657e2aad2907e67c3e9daecbeb
product_main_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
3C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
休閒生活,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
保健,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0
其他,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
商業用途,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
嬰兒用品,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
安全配件,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
家居生活,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
家電,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
文具,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
customer_product_matrix = csr_matrix(customer_product_pivot.values) # size is same as pivot table
model_knn = NearestNeighbors(n_neighbors=5, algorithm='brute', metric='cosine')
model_knn.fit(customer_product_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [42]:
# Select an item that we want to calculate similarity
# it repreasents that 'suppose an item is bought, ......'
#query_index = np.random.choice(customer_product_pivot.shape[0])
query_index = 0

distances, indices = model_knn.kneighbors(
    customer_product_pivot.iloc[query_index,:].values.reshape(1, -1), 
    n_neighbors=4)

for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:'.format(customer_product_pivot.index[query_index]))     
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, customer_product_pivot.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for 3C:
1: 休閒生活, with distance of 0.401609152919837:
2: 服飾/配件, with distance of 0.6214577231454532:
3: 書籍, with distance of 0.7380923681040784:
