# Lazada Item Recommendation

In [1]:
import pandas as pd
import numpy as np
import math
import re
from fuzzywuzzy import fuzz

from sklearn.metrics import mean_squared_error
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import NearestNeighbors

import warnings
from IPython.display import clear_output
warnings.filterwarnings('ignore')

In [2]:
df_reviews = pd.read_csv('datasets/item-reviews.csv')

## Dataset of User Reviews

In [3]:
df_reviews.head()

Unnamed: 0.1,Unnamed: 0,itemId,category,name,rating,reviewContent,likeCount,upVotes,downVotes,helpful,relevanceScore,boughtDate,clientType,retrievedDate
0,0,100002528,beli-harddisk-eksternal,Kamal U.,5,bagus mantap dah sesui pesanan,0,0,0,True,26.51,09 Apr 2019,androidApp,2019-10-02
1,1,100002528,beli-harddisk-eksternal,yofanca m.,4,"Bagus, sesuai foto",0,0,0,True,22.49,24 Sep 2017,androidApp,2019-10-02
2,2,100002528,beli-harddisk-eksternal,Lazada Customer,5,okkkkk mantaaaaaaapppp ... goood,0,0,0,True,21.5,04 Apr 2018,androidApp,2019-10-02
3,3,100002528,beli-harddisk-eksternal,Lazada Customer,4,bagus sesuai,0,0,0,True,20.51,22 Sep 2017,androidApp,2019-10-02
4,4,100003785,beli-harddisk-eksternal,Fadjar B.,1,baru 10 bulan layarnya dah bergaris,0,0,0,True,21.49,06 Apr 2017,androidApp,2019-10-02


### Data Preprocessing

#### Check Missing Values

In [4]:
df_reviews.isnull().sum()

Unnamed: 0        0
itemId            0
category          0
name              0
rating            0
reviewContent     0
likeCount         0
upVotes           0
downVotes         0
helpful           0
relevanceScore    0
boughtDate        0
clientType        0
retrievedDate     0
dtype: int64

#### Features Selection

In [5]:
df_reviews = df_reviews[['itemId', 'category', 'name', 'rating', 'relevanceScore', 'clientType']]

In [6]:
df_reviews.describe()

Unnamed: 0,itemId,rating,relevanceScore
count,100564.0,100564.0,100564.0
mean,292283000.0,4.539189,30.901271
std,164648700.0,1.071755,9.363444
min,19946.0,1.0,11.28
25%,160022800.0,5.0,24.51
50%,363646900.0,5.0,29.01
75%,406197000.0,5.0,35.83
max,724217000.0,5.0,76.5


#### Encode Categorical Columns

In [7]:
df_reviews['category'].unique()

array(['beli-harddisk-eksternal', 'beli-laptop', 'beli-smart-tv',
       'jual-flash-drives', 'shop-televisi-digital'], dtype=object)

In [8]:
df_reviews['clientType'].unique()

array(['androidApp', 'mobile', 'desktop', 'iosApp', 'mobile-app'],
      dtype=object)

In [9]:
df_reviews

Unnamed: 0,itemId,category,name,rating,relevanceScore,clientType
0,100002528,beli-harddisk-eksternal,Kamal U.,5,26.51,androidApp
1,100002528,beli-harddisk-eksternal,yofanca m.,4,22.49,androidApp
2,100002528,beli-harddisk-eksternal,Lazada Customer,5,21.50,androidApp
3,100002528,beli-harddisk-eksternal,Lazada Customer,4,20.51,androidApp
4,100003785,beli-harddisk-eksternal,Fadjar B.,1,21.49,androidApp
...,...,...,...,...,...,...
100559,9467887,shop-televisi-digital,Irsam J.,4,32.01,androidApp
100560,9467887,shop-televisi-digital,Lazada Customer,4,21.29,desktop
100561,9467898,shop-televisi-digital,Frank S.,1,28.01,desktop
100562,9467899,shop-televisi-digital,Isharyanto S.,5,24.63,desktop


In [10]:
encoder_cat = {"beli-harddisk-eksternal": 0, "beli-laptop": 1, 'beli-smart-tv': 2, 
                               'jual-flash-drives': 3, 'shop-televisi-digital': 4}
encoder_cli = {"androidApp": 0, "mobile": 1, "desktop": 2, "iosApp": 3,
                                  "mobile-app": 4}

df_reviews['category'].replace(encoder_cat, inplace=True)
df_reviews['clientType'].replace(encoder_cli, inplace=True)

In [11]:
df_reviews

Unnamed: 0,itemId,category,name,rating,relevanceScore,clientType
0,100002528,0,Kamal U.,5,26.51,0
1,100002528,0,yofanca m.,4,22.49,0
2,100002528,0,Lazada Customer,5,21.50,0
3,100002528,0,Lazada Customer,4,20.51,0
4,100003785,0,Fadjar B.,1,21.49,0
...,...,...,...,...,...,...
100559,9467887,4,Irsam J.,4,32.01,0
100560,9467887,4,Lazada Customer,4,21.29,2
100561,9467898,4,Frank S.,1,28.01,2
100562,9467899,4,Isharyanto S.,5,24.63,2


In [12]:
df_reviews[df_reviews['itemId'] == 100002528]

Unnamed: 0,itemId,category,name,rating,relevanceScore,clientType
0,100002528,0,Kamal U.,5,26.51,0
1,100002528,0,yofanca m.,4,22.49,0
2,100002528,0,Lazada Customer,5,21.5,0
3,100002528,0,Lazada Customer,4,20.51,0
42330,100002528,2,Kamal U.,5,26.51,0
42331,100002528,2,yofanca m.,4,22.49,0
42332,100002528,2,Lazada Customer,5,21.5,0
42333,100002528,2,Lazada Customer,4,20.51,0
58598,100002528,3,Kamal U.,5,26.51,0
58599,100002528,3,yofanca m.,4,22.49,0


## Dataset of Items

In [13]:
df_items = pd.read_csv('datasets/20191002-items.csv')

### Data Preprocessing


In [14]:
df_items.head()

Unnamed: 0,itemId,category,name,brandName,url,price,averageRating,totalReviews,retrievedDate
0,100002528,beli-harddisk-eksternal,"TOSHIBA Smart HD LED TV 32"" - 32L5650VJ Free B...",Toshiba,https://www.lazada.co.id/products/toshiba-smar...,2499000,4,8,2019-10-02
1,100003785,beli-harddisk-eksternal,"TOSHIBA Full HD Smart LED TV 40"" - 40L5650VJ -...",Toshiba,https://www.lazada.co.id/products/toshiba-full...,3788000,3,3,2019-10-02
2,100004132,beli-harddisk-eksternal,Samsung 40 Inch Full HD Flat LED Digital TV 4...,LG,https://www.lazada.co.id/products/samsung-40-i...,3850000,3,2,2019-10-02
3,100004505,beli-harddisk-eksternal,"Sharp HD LED TV 24"" - LC-24LE175I - Hitam",Sharp,https://www.lazada.co.id/products/sharp-hd-led...,1275000,3,11,2019-10-02
4,100005037,beli-harddisk-eksternal,Lenovo Ideapad 130-15AST LAPTOP MULTIMEDIA I A...,Lenovo,https://www.lazada.co.id/products/lenovo-ideap...,3984100,5,1,2019-10-02


In [15]:
df_items['name'] = df_items['name'].str.strip()
df_items['name'] = df_items['name'].str.replace(r'\[.*?\]','')
df_items['name'] = df_items['name'].map(lambda x: re.sub(' +',' ',x))
df_items['name'] = df_items['name'].str.strip()

In [16]:
df_items['name']

0        TOSHIBA Smart HD LED TV 32" - 32L5650VJ Free B...
1        TOSHIBA Full HD Smart LED TV 40" - 40L5650VJ -...
2        Samsung 40 Inch Full HD Flat LED Digital TV 40...
3                Sharp HD LED TV 24" - LC-24LE175I - Hitam
4        Lenovo Ideapad 130-15AST LAPTOP MULTIMEDIA I A...
                               ...                        
10937    Toshiba 32L3750VJ Digital Tv DVB-T2 LED TV 32"...
10938    Samsung 43K5002AK Televisi LED - Khusus JABODE...
10939    Sharp 32LE180i AQUOS LED TV 32 " Khusus JABODE...
10940    Akari LED TV LE-50D88 50" FULL HD (Hitam) - Kh...
10941    Akari LED TV LE-25B88 25" HD READY (Hitam) - K...
Name: name, Length: 10942, dtype: object

In [17]:
df_items['name'].iloc[5000]

'New Series Laptop Dell 3180 Desain & Gaming Android - A9 9420 4GB 500GB AMD R5 11.6 inch Dual OS ( Win 10 dan Android OS ) Garansi Resmi Jangan Samakan Dengan Seri Yang Hanya 1 OS ( Fiturnya Beda Jauh )'

#### Check Missing Values

In [18]:
df_items.isnull().sum()

itemId           0
category         0
name             0
brandName        2
url              0
price            0
averageRating    0
totalReviews     0
retrievedDate    0
dtype: int64

In [19]:
df_items.dropna(axis=0, how='any', inplace=True)
df_items

Unnamed: 0,itemId,category,name,brandName,url,price,averageRating,totalReviews,retrievedDate
0,100002528,beli-harddisk-eksternal,"TOSHIBA Smart HD LED TV 32"" - 32L5650VJ Free B...",Toshiba,https://www.lazada.co.id/products/toshiba-smar...,2499000,4,8,2019-10-02
1,100003785,beli-harddisk-eksternal,"TOSHIBA Full HD Smart LED TV 40"" - 40L5650VJ -...",Toshiba,https://www.lazada.co.id/products/toshiba-full...,3788000,3,3,2019-10-02
2,100004132,beli-harddisk-eksternal,Samsung 40 Inch Full HD Flat LED Digital TV 40...,LG,https://www.lazada.co.id/products/samsung-40-i...,3850000,3,2,2019-10-02
3,100004505,beli-harddisk-eksternal,"Sharp HD LED TV 24"" - LC-24LE175I - Hitam",Sharp,https://www.lazada.co.id/products/sharp-hd-led...,1275000,3,11,2019-10-02
4,100005037,beli-harddisk-eksternal,Lenovo Ideapad 130-15AST LAPTOP MULTIMEDIA I A...,Lenovo,https://www.lazada.co.id/products/lenovo-ideap...,3984100,5,1,2019-10-02
...,...,...,...,...,...,...,...,...,...
10937,9467887,shop-televisi-digital,"Toshiba 32L3750VJ Digital Tv DVB-T2 LED TV 32""...",Toshiba,https://www.lazada.co.id/products/toshiba-32l3...,1990000,4,12,2019-10-02
10938,9467898,shop-televisi-digital,Samsung 43K5002AK Televisi LED - Khusus JABODE...,Samsung,https://www.lazada.co.id/products/samsung-43k5...,4590000,1,1,2019-10-02
10939,9467899,shop-televisi-digital,"Sharp 32LE180i AQUOS LED TV 32 "" Khusus JABODE...",Sharp,https://www.lazada.co.id/products/sharp-32le18...,1990000,4,2,2019-10-02
10940,9548087,shop-televisi-digital,"Akari LED TV LE-50D88 50"" FULL HD (Hitam) - Kh...",Akari,https://www.lazada.co.id/products/akari-led-tv...,4790000,5,1,2019-10-02


#### Features Selection

In [20]:
df_items = df_items[['itemId', 'category', 'name', 'price', 'averageRating', 'totalReviews']]
df_items

Unnamed: 0,itemId,category,name,price,averageRating,totalReviews
0,100002528,beli-harddisk-eksternal,"TOSHIBA Smart HD LED TV 32"" - 32L5650VJ Free B...",2499000,4,8
1,100003785,beli-harddisk-eksternal,"TOSHIBA Full HD Smart LED TV 40"" - 40L5650VJ -...",3788000,3,3
2,100004132,beli-harddisk-eksternal,Samsung 40 Inch Full HD Flat LED Digital TV 40...,3850000,3,2
3,100004505,beli-harddisk-eksternal,"Sharp HD LED TV 24"" - LC-24LE175I - Hitam",1275000,3,11
4,100005037,beli-harddisk-eksternal,Lenovo Ideapad 130-15AST LAPTOP MULTIMEDIA I A...,3984100,5,1
...,...,...,...,...,...,...
10937,9467887,shop-televisi-digital,"Toshiba 32L3750VJ Digital Tv DVB-T2 LED TV 32""...",1990000,4,12
10938,9467898,shop-televisi-digital,Samsung 43K5002AK Televisi LED - Khusus JABODE...,4590000,1,1
10939,9467899,shop-televisi-digital,"Sharp 32LE180i AQUOS LED TV 32 "" Khusus JABODE...",1990000,4,2
10940,9548087,shop-televisi-digital,"Akari LED TV LE-50D88 50"" FULL HD (Hitam) - Kh...",4790000,5,1


In [21]:
df_items['name'] = df_items['name'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_items['name'] = df_items['name'].str.lower()


#### Encode Categorical Columns

In [22]:
df_items['category'].replace(encoder_cat, inplace=True)
df_items

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(


Unnamed: 0,itemId,category,name,price,averageRating,totalReviews
0,100002528,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
1,100003785,0,"toshiba full hd smart led tv 40"" - 40l5650vj -...",3788000,3,3
2,100004132,0,samsung 40 inch full hd flat led digital tv 40...,3850000,3,2
3,100004505,0,"sharp hd led tv 24"" - lc-24le175i - hitam",1275000,3,11
4,100005037,0,lenovo ideapad 130-15ast laptop multimedia i a...,3984100,5,1
...,...,...,...,...,...,...
10937,9467887,4,"toshiba 32l3750vj digital tv dvb-t2 led tv 32""...",1990000,4,12
10938,9467898,4,samsung 43k5002ak televisi led - khusus jabode...,4590000,1,1
10939,9467899,4,"sharp 32le180i aquos led tv 32 "" khusus jabode...",1990000,4,2
10940,9548087,4,"akari led tv le-50d88 50"" full hd (hitam) - kh...",4790000,5,1


0: External HDD, 1: Laptops, 2: Smart TVs, 3: Flashdisks, 4: Digital Television

## Features Engineering

### Merge data based on item ID

In [23]:
df_combined = df_reviews.merge(df_items, on="itemId", how = 'right')
df_combined

Unnamed: 0,itemId,category_x,name_x,rating,relevanceScore,clientType,category_y,name_y,price,averageRating,totalReviews
0,100002528,0.0,Kamal U.,5.0,26.51,0.0,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
1,100002528,0.0,yofanca m.,4.0,22.49,0.0,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
2,100002528,0.0,Lazada Customer,5.0,21.50,0.0,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
3,100002528,0.0,Lazada Customer,4.0,20.51,0.0,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
4,100002528,2.0,Kamal U.,5.0,26.51,0.0,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
...,...,...,...,...,...,...,...,...,...,...,...
318811,9830166,,,,,,0,eva case shockproof case bag for external hdd ...,45000,5,3
318812,9973399,,,,,,0,sandisk cruzer blade usb flash drive 32gb,203000,2,1
318813,9973399,,,,,,3,sandisk cruzer blade usb flash drive 32gb,203000,2,1
318814,9973500,,,,,,0,transformer ravage usb 2.0 flash drive,180000,3,4


In [24]:
df_combined.drop(['category_x', 'clientType'], 1, inplace=True)
df_combined = df_combined.rename(columns={'itemId': 'item_id',
                                          'name_x': 'user_name',
                                          'relevanceScore': 'relevance_score', 
                                          'category_y': 'item_category',
                                          'name_y': 'item_name',
                                          'averageRating': 'average_rating',
                                          'totalReviews': 'total_reviews'})
df_combined.reset_index(drop=True, inplace=True)

In [25]:
df_combined

Unnamed: 0,item_id,user_name,rating,relevance_score,item_category,item_name,price,average_rating,total_reviews
0,100002528,Kamal U.,5.0,26.51,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
1,100002528,yofanca m.,4.0,22.49,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
2,100002528,Lazada Customer,5.0,21.50,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
3,100002528,Lazada Customer,4.0,20.51,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
4,100002528,Kamal U.,5.0,26.51,0,"toshiba smart hd led tv 32"" - 32l5650vj free b...",2499000,4,8
...,...,...,...,...,...,...,...,...,...
318811,9830166,,,,0,eva case shockproof case bag for external hdd ...,45000,5,3
318812,9973399,,,,0,sandisk cruzer blade usb flash drive 32gb,203000,2,1
318813,9973399,,,,3,sandisk cruzer blade usb flash drive 32gb,203000,2,1
318814,9973500,,,,0,transformer ravage usb 2.0 flash drive,180000,3,4


0: External HDD, 1: Laptops, 2: Smart TVs, 3: Flashdisks, 4: Digital Television

In [26]:
df_combined.isnull().sum()

item_id               0
user_name          3428
rating             3428
relevance_score    3428
item_category         0
item_name             0
price                 0
average_rating        0
total_reviews         0
dtype: int64

In [27]:
df_combined.dropna(axis=0, how='any', inplace=True)
df_combined.reset_index(drop=True, inplace=True)

In [28]:
df_combined.sort_values('user_name')

Unnamed: 0,item_id,user_name,rating,relevance_score,item_category,item_name,price,average_rating,total_reviews
85810,160037307,'Aaiiu R.,5.0,24.51,0,sandisk ultra micro sdhc 98mb/s 32gb class 10 ...,157500,4,1405
54042,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
48771,160022809,-,5.0,19.50,2,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
54795,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
55548,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
...,...,...,...,...,...,...,...,...,...
20443,156289469,ラオー ピ.,5.0,26.07,1,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
20227,156289469,ラオー ピ.,5.0,26.07,0,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
20281,156289469,ラオー ピ.,5.0,26.07,1,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
21307,156289469,ラオー ピ.,5.0,26.07,4,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113


In [29]:
# Replace Non-ASCII Letters (Japanese, Chinese, Korean) in the User Name Columns with Whitespace

# df_combined['user_name'] = df_combined["user_name"].apply(lambda x: ''.join([i if ord(i) < 128 else 'Username' for i in x]))
# df_combined.reset_index(drop=True, inplace=True)

In [30]:
df_combined.sort_values('user_name')

Unnamed: 0,item_id,user_name,rating,relevance_score,item_category,item_name,price,average_rating,total_reviews
85810,160037307,'Aaiiu R.,5.0,24.51,0,sandisk ultra micro sdhc 98mb/s 32gb class 10 ...,157500,4,1405
54042,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
48771,160022809,-,5.0,19.50,2,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
54795,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
55548,160022809,-,5.0,19.50,4,philips 32 inch led hd tv - hitam (model 32pha...,2069000,4,1577
...,...,...,...,...,...,...,...,...,...
20443,156289469,ラオー ピ.,5.0,26.07,1,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
20227,156289469,ラオー ピ.,5.0,26.07,0,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
20281,156289469,ラオー ピ.,5.0,26.07,1,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113
21307,156289469,ラオー ピ.,5.0,26.07,4,lenovo ideapad ip 130 amd a4 -9125 / windows 1...,3699000,4,113


## Build Recommender System

In [31]:
df_combined.describe()

Unnamed: 0,item_id,rating,relevance_score,item_category,price,average_rating,total_reviews
count,315388.0,315388.0,315388.0,315388.0,315388.0,315388.0,315388.0
mean,310621200.0,4.552491,31.70183,1.910681,2281880.0,4.002486,1386.708543
std,155473200.0,1.055154,9.51431,1.483071,2256200.0,0.366018,2220.031582
min,19946.0,1.0,11.28,0.0,1000.0,1.0,1.0
25%,160029100.0,5.0,25.5,0.0,929990.0,4.0,57.0
50%,377474300.0,5.0,30.51,2.0,1899000.0,4.0,285.0
75%,414742100.0,5.0,36.51,3.0,3299000.0,4.0,1577.0
max,724217000.0,5.0,76.5,4.0,37650000.0,5.0,9631.0


In [32]:
ratings = pd.DataFrame(df_combined.groupby('item_name')['rating'].mean())
ratings['total_reviews'] = df_combined.groupby('item_name')['rating'].count()
ratings

Unnamed: 0_level_0,rating,total_reviews
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1
(isi 5pcs) otg mini micro usb,4.500000,32
+ aci-vakind 2.5 + aciaig-usb eksternal hard drive disk carry case cover pouch bag untuk pc (merah) + aci--intl,4.000000,1
1 pcs aluminium blueendless bs-u35wf nirkabel perangkat penyimpanan hitam-intl,1.000000,1
1 pcs kabel otg duzon micro - bisa cod,5.000000,4
10in 1 gb ram + 16 gb rom dual kamera pc 4g/bluetooth/wifi pc ipad untuk android (us plug) (mawar emas 100-240 v),1.000000,25
...,...,...
【free pengiriman + super deal + terbatas offer】new mini 64 gb usb logam putar 2.0 gantungan kunci memori flash tongkat ski jempol pena,3.117647,136
【free pengiriman + super deal + terbatas offer】swivel 32 gb 32g usb 2.0 flash stik memori drive penyimpanan u disk untuk otg telepon pc abu-abu,1.000000,4
﻿seagate harddisk external backup plus slim 1tb,5.000000,1
﻿seagate harddisk internal pc 320gb sata,3.000000,5


In [33]:
item_matrix = df_combined.pivot_table(index='user_name', columns='item_name', values='rating').fillna(0)
item_matrix.head()

item_name,(isi 5pcs) otg mini micro usb,+ aci-vakind 2.5 + aciaig-usb eksternal hard drive disk carry case cover pouch bag untuk pc (merah) + aci--intl,1 pcs aluminium blueendless bs-u35wf nirkabel perangkat penyimpanan hitam-intl,1 pcs kabel otg duzon micro - bisa cod,10in 1 gb ram + 16 gb rom dual kamera pc 4g/bluetooth/wifi pc ipad untuk android (us plug) (mawar emas 100-240 v),10in 1gb ram + 16gb rom dual kamera pc 4g/bluetooth/wifi pc ipad untuk android (uk plug) (hitam 100-240 v),128 gb logam speicherstick usb stick + 2 adaptor type c otg bulu berguna pc usb 2.0,128 gb usb 2.0 flash drive dengan mikro usb konektor untuk ponsel android perangkat u disk disket pulpen logam-intl,128 gb usb 3.0 flash disk flashdisk kotak hadiah foto memori video stick u disk,128 gb usb 3.0 memori stik usb otg flashdisk untuk telepon seluler android pc,...,【free pengiriman + super deal + terbatas offer】bestrunner 64 mb usb 2.0 memori drive flash stik kontrol jempol pena u disk penyimpanan hadiah biru,【free pengiriman + super deal + terbatas offer】cartoon piano mini 64 gb usb 2.0 flash pena stik memori diska lepas disk u hadiah super speed flash disk flash drive,【free pengiriman + super deal + terbatas offer】cute kartun unicorn kuda model 32 gb usb 2.0 memori stik usb hadiah,【free pengiriman + super deal + terbatas offer】new 64 gb usb 2.0 mobil kunci flash drive memori tongkat ski jempol cakram pen- kami,【free pengiriman + super deal + terbatas offer】new 64 gb usb 2.0 mobil kunci flash drive memori tongkat ski jempol cakram pen- kami-internasional,【free pengiriman + super deal + terbatas offer】new mini 64 gb usb logam putar 2.0 gantungan kunci memori flash tongkat ski jempol pena,【free pengiriman + super deal + terbatas offer】swivel 32 gb 32g usb 2.0 flash stik memori drive penyimpanan u disk untuk otg telepon pc abu-abu,﻿seagate harddisk external backup plus slim 1tb,﻿seagate harddisk internal pc 320gb sata,﻿toshiba usb flash memory 8gb - putih
user_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Aaiiu R.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.....,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
......,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Versi 1.0 - Without Using Nearest Neighbors

In [None]:
# from IPython.display import clear_output

# def search_item():
#     while True:
#         clear_output(wait=True)
#         tmp = input('Item yang dicari:\n>> ')
#         string = tmp.split()
#         get_result = item_matrix.columns[item_matrix.columns.str.contains(' '.join(string))]
#         if len(get_result) == 0:
#             clear_output(wait=True)
#             print('Item tidak tersedia, mohon input ulang.\n')
#             continue
#         else:
#             for i in range(len(get_result)):
#                 print(str(i) + ': ' + get_result[i])
#         clear_output(wait=True)
#         usr_inp = int(input('\nMasukkan nomor item (0-'+ str(len(get_result) - 1) +'):\n>> '))

#         if usr_inp > len(get_result):                  ## ini masih belum bisa. jadi kalo inputnya > len(get_result) output error
#             print('Maaf, nomor item melebihi batas.')
#             break
#         else:
#             return item_matrix.columns[item_matrix.columns.str.contains(' '.join(string))][usr_inp]

# def get_recommendation(item, nitem):
#         user_input = item_matrix[item]
#         similar_to_input = item_matrix.corrwith(user_input)
#         corr_list = pd.DataFrame(similar_to_input, columns=['correlation'])
#         corr_list.dropna(inplace=True)
#         corr_list = corr_list.join(ratings['total_reviews'])
#         get_list = corr_list[corr_list['total_reviews'] > 2000].sort_values(by='correlation', ascending=False).head(int(nitem))
#         result = get_list.index.tolist()
#         if len(result) == 0:
#             print('Rekomendasi untuk item "' + item + '" tidak tersedia.')
#         else:
#             print('Nama item yang dipilih: ' + item)
#             print('\n\nHai, ini adalah top-'+str(nitem)+' item yang direkomendasikan untuk kamu!\n=============================================================')  
#             for i in range(len(result)):
#                 print(result[i])

# def main():
#         clear_output(wait=True)
#         get_search = search_item()
#         clear_output(wait=True)
#         inp2 = int(input('\nMasukkan jumlah item yang ingin direkomendasikan.\n>> '))
#         clear_output(wait=True)
#         get_recommendation(get_search, inp2)

In [None]:
# main()

### Unsupervised Nearest Neighbors

In [47]:
def get_name():
    clear_output(wait=True)
    inp = input('\nMasukkan nama item:\n>> ')
    return inp

def validate(string):
    while True:
        get_result = [key for key in item_matrix.columns if string in key.lower()]
        if len(get_result) == 0:
            clear_output(wait=True)
            print('>> Item tidak tersedia, mohon input ulang.\n')
            continue
        else:
            match_tuple = []
            # get match
            for name in get_result:
                ratio = fuzz.ratio(name.lower(), string.lower())
                if ratio >= 60:
                    match_tuple.append((name, ratio))
                elif len(match_tuple) < 15:
                    x = re.search(string.lower(), name.lower())
                    if x:
                        match_tuple.append((name, ratio))
            
            for i in range(len(match_tuple)):
                print(str(i) + ': ' + match_tuple[i][0])
        clear_output(wait=True)
    
        usr_inp = int(input('\nMasukkan nomor item (0-'+ str(len(match_tuple) - 1) +'):\n>> '))
        clear_output(wait=True)
        if usr_inp  > len(get_result) - 1:
            print('>> Maaf, nomor item melebihi batas, mohon input ulang.\n')
            continue
        else:
            return usr_inp, get_result[usr_inp]


def get_nrec():
    clear_output(wait=True)
    inp = int(input('\nMasukkan jumlah item yang ingin direkomendasikan:\n>> '))
    return inp

def nn(n_nei):
    t_itemmatrix = item_matrix.T
    cosine_nn = NearestNeighbors(n_neighbors=n_nei, algorithm='brute', metric='cosine')
    item_fit = cosine_nn.fit(t_itemmatrix)
    dist, idx = item_fit.kneighbors(t_itemmatrix.values)
    return dist, idx

def get_dict(idx):
    items = {}
    for i in range(len(item_matrix.T.index)):
        item_idx = idx[i]
        col_names = item_matrix.T.index[item_idx].tolist()
        items[item_matrix.T.index[i]] = col_names
    return items

def recommend(idx, name, items_dic, dist):
    tmp = dict(zip(items_dic[name], dist[idx]))
    result = dict(sorted(tmp.items(), key=lambda x: x[1], reverse=True))
    clear_output(wait=True)
    print('Nama item yang dipilih: ' + name)
    print('\n\nHai, ini adalah item yang direkomendasikan untuk kamu!\n======================================================')
    for k, v in result.items():
        print(k + ' | Correlation: ' + str(v))

def main():
    global dist
    item_name_input = get_name()
    search_idx, item_name = validate(item_name_input)
    item_num = get_nrec()
    dist, idx = nn(item_num)
    itemdic = get_dict(idx)
    recommend(search_idx, item_name, itemdic, dist)

In [50]:
main()

Nama item yang dipilih: flashdisk sandisk cruzer blade 32gb - original


Hai, ini adalah item yang direkomendasikan untuk kamu!
adata uv128 16gb flashdisk usb3.1 - biru | Correlation: 1.0
lg 28tl430 tv led khusus jabodetabek (untuk keluar kota wajib di packing kayu) | Correlation: 1.0
adata flashdrives uv150 - flashdisk usb 3.1 super speed 32 black | Correlation: 1.0
adata uv150 32gb flashdisk usb3.1 - merah | Correlation: 1.0
adata uv128 16gb flashdisk usb3.1 - kuning | Correlation: 0.9795918022603369
promo -ready !!!! flashdisk samsung otg 32 gb otg drive di raihan style | Correlation: 0.9461006440183807
usb flashdisk bootable windows 16gb | Correlation: 0.9378416216342499
lenovo ideapad 130-14ast-9125-4gb-500gb-win10 black (amd a4-9125/4gb/500gb/dvd/amd r3/14"/win10) | Correlation: 0.9090064773961188
laptop gaming terbaik asus rog gl 552x kbl dm409t plus back pack asus rog i7 kabylake 8gb ddr4 hdd 1tb vga nvidia geforce gtx 950m 4gb windows 10 | Correlation: 0.8703730098344761
flash

### Evaluate The Result

In [51]:
item_distances = 1 - dist
predictions = item_distances.T.dot(item_matrix.T.values) / np.array([np.abs(item_distances.T).sum(axis=1)]).T
penalize = item_matrix.T.values[item_distances.argsort()[0]]

In [52]:
def rmse(prediction, ground_truth):
        prediction = prediction[ground_truth.nonzero()].flatten()
        ground_truth = ground_truth[ground_truth.nonzero()].flatten()
        return math.sqrt(mean_squared_error(prediction, ground_truth))

In [53]:
error_rate = rmse(predictions, penalize)
print('Nearest Neighbors Evaluation\n')
print('Accuracy: {}'.format(100 - error_rate))
print('RMSE: {}'.format(error_rate))

Nearest Neighbors Evaluation

Accuracy: 95.48569649544375
RMSE: 4.5143035045562465


## Create GUI

In [54]:
import tkinter as tk
from tkinter import *
from tkinter.ttk import *
import os

In [55]:
def get_name():
    global str_input
    str_input = str(entry1.get())
    
    
def validate():
    get_name()
    
    str_notexist = ''
    str_res = ''
    
    get_result = [key for key in item_matrix.columns if str_input in key.lower()]
    
    if len(get_result) == 0:
        clear_output(wait=True)
        str_notexist = 'Item tidak tersedia, mohon input ulang.'
        listbox_widget.delete(0,END)
        listbox_widget.insert(END, str_notexist)
    else:
        listbox_widget.delete(0,END)
        
        match_tuple = []
        for name in get_result:
            ratio = fuzz.ratio(name.lower(), str_input.lower())
            if ratio >= 50:
                match_tuple.append((name, ratio))
            elif len(match_tuple) < 15:
                x = re.search(str_input.lower(), name.lower())
                if x:
                    match_tuple.append((name, ratio))
        
        for i in range(len(match_tuple)):
            listbox_widget.insert(END, match_tuple[i][0])

def get_nrec():
    item_num = int(cbb1.get())
    return item_num

def get_item():
    item_name = str(listbox_widget.get(ACTIVE))
    return item_name

def nn(n):
    get_nrec()
    t_itemmatrix = item_matrix.T
    cosine_nn = NearestNeighbors(n_neighbors=n, algorithm='brute', metric='cosine')
    item_fit = cosine_nn.fit(t_itemmatrix)
    dist, idx = item_fit.kneighbors(t_itemmatrix.values)
    return dist, idx

def get_dict(idx):
    items = {}
    for i in range(len(item_matrix.T.index)):
        item_idx = idx[i]
        col_names = item_matrix.T.index[item_idx].tolist()
        items[item_matrix.T.index[i]] = col_names
    return items

def recommend(idx, name, items_dic, dist):
    str_res = []
    tmp = dict(zip(items_dic[name], dist[idx]))
    result = dict(sorted(tmp.items(), key=lambda x: x[1], reverse=True))
    for k, v in result.items():
        str_res.append(k)
    return str_res

def print_res(string):
    listbox_result.delete(0,END)
    for i in range(len(string)):
        listbox_result.insert(END, string[i])

def main_exe():
    n_item = get_nrec()
    item_name = get_item()    
    dist, idx = nn(n_item)
    items_dic = get_dict(idx)
    recommend(n_item, item_name, items_dic, dist)
    string_res = recommend(n_item, item_name, items_dic, dist)
    print_res(string_res)

In [56]:
root = tk.Tk()
root.title('Sistem Rekomendasi Item')

canvas = tk.Canvas(root, width=1280, height=720, bg='#3092c6')
canvas.pack()

frame1 = tk.Frame(root, bg='#516773')
frame1.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.1)

label1 = tk.Label(
    frame1,
    text="Selamat Datang di Sistem Rekomendasi\nRecsys Bisa!",
    font=40,
    bg='#516773',
    fg='white'
)
# label.config(font=('Times New Roman', 32))
label1.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.8)

frame2 = tk.Frame(root, bg='#516773')
frame2.place(relx=0.1, rely=0.25, relwidth=0.8, relheight=0.7)

label1 = tk.Label(
    frame2,
    text="Masukkan Nama Item",
    bg='#516773',
    fg='white', 
    font=40
)
# label.config(font=('Times New Roman', 32))
label1.place(relx=0, rely=0.05, relwidth=0.3, relheight=0.08)

entry1 = tk.Entry(
    frame2,
)
entry1.place(relx=0.098, rely=0.13, relwidth=0.3, relheight=0.08)

button1 = tk.Button(frame2, text='Cari!', font=40, command=lambda: validate())
button1.place(relx=0.268, rely=0.235, relwidth=0.13, relheight=0.08)

label2 = tk.Label(
    frame2,
    text="Silakan pilih item berikut.",
    bg='#516773',
    fg='white', 
    font=40
)
label2.place(relx=0.028, rely=0.35, relwidth=0.3, relheight=0.08)

listbox_widget = Listbox(frame2)
listbox_widget.place(relx=0.098, rely=0.425, relwidth=0.3, relheight=0.525)


label3 = tk.Label(
    frame2,
    text="Masukkan Jumlah Item",
    bg='#516773',
    fg='white', 
    font=40
)
label3.place(relx=0.443, rely=0.06, relwidth=0.13, relheight=0.08)

cbb1 = Combobox(frame2,
                    values=[
                    5,
                    10,
                    15,
                    20,
                    25,
                    30])
cbb1.set('...')
cbb1.place(relx=0.448, rely=0.128, relwidth=0.1, relheight=0.08)

button2 = tk.Button(frame2, text='Cari!', font=40, command=lambda: [main_exe()])
button2.place(relx=0.57, rely=0.128, relwidth=0.13, relheight=0.08)


label4 = tk.Label(
    frame2,
    text="Daftar Rekomendasi Item:",
    bg='#516773',
    fg='white', 
    font=40
)
label4.place(relx=0.445, rely=0.225, relwidth=0.15, relheight=0.08)

listbox_result = Listbox(frame2)
listbox_result.place(relx=0.45, rely=0.3, relwidth=0.45, relheight=0.65)

label_load = tk.Label(
    frame2,
    bg='#516773',
    fg='white', 
    font=40
)
label_load.place(relx=0.71, rely=0.128, relwidth=0.1, relheight=0.08)

root.mainloop()

## Log Recap

**UPDATE v1.0** <br>

<ul>
<li>Perbaikan pencarian item</li>
</ul>

<b>by Naufal Hilmiaji (17-12-2020)</b>
<br><br>

**UPDATE  v2.0** <br>

<ul>
<li>Menggunakan metode <b><i>Unsupervised Nearest Neighbors</i></b></li>
<li>Akurasi: 95.9%</li>
<li>RMSE: 4.1%</li>
</ul>


**by Naufal Hilmiaji (18-12-2020)**<br><br>

**UPDATE  v2.1** <br>

<ul>
<li>Metode pencarian item menggunakan Fuzzy.</li>
</ul>


**by Ghozy Ghulamul Afif (29-12-2020)**<br><br>


**UPDATE  v3.0** <br>

<ul>
<li>Pembuatan GUI untuk aplikasi.</li>
</ul>


**by Naufal Hilmiaji (30-12-2020)** <br><br>