# Topic detection based on certain given keywords

Here we doing this using static keyword's cosine similarity with sentences

In [1]:
from numpy import dot
from numpy.linalg import norm
from itertools import chain

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import string
from collections import Counter

# spaCy based imports
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

from sklearn.feature_extraction.text import CountVectorizer


## read df

In [2]:
reviews = pd.read_excel('../data/Bag_Reviews.xlsx')
reviews = reviews[['rating','comments']]
reviews = reviews.drop_duplicates()
reviews['sentiment'] = 'neutral'
reviews.loc[(reviews['rating']==1) | (reviews['rating']==2),'sentiment'] = 'negative'
reviews.loc[(reviews['rating']==4) | (reviews['rating']==5),'sentiment'] = 'positive'

reviews.head()

Unnamed: 0,rating,comments,sentiment
0,4,Give 4 stars because order at the price 37 but...,positive
1,5,Ordered at a discount of 10 baht per piece. It...,positive
2,5,"Small, cute, compact, good But the sash looks ...",positive
3,1,The size is not as large as it is down. The st...,negative
4,1,The product is compared to the price. Okay. Se...,negative


In [3]:
punctuations = string.punctuation
stopwords = list(STOP_WORDS)

# turn a doc into clean tokens
def clean_doc(doc):
    doc = doc.strip()
    # split into tokens by white space
    tokens = doc.split()
    # remove punctuation from each token
    table = str.maketrans('', '', punctuations)
    tokens = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # filter out stop words
    tokens = [w for w in tokens if not w in stopwords]
    # filter out short tokens
    tokens = [word.lower() for word in tokens if len(word) > 1]
    return tokens
    

In [4]:
vocab = sorted(set(token.lower() for token in chain(*list(map(clean_doc, reviews.comments)))))


In [5]:
quality = [1 if token in ['quality'] else 0 for token in vocab]
delivery = [1 if token in ['delivery','fast'] else 0 for token in vocab]
price = [1 if token in ['price','cheap','money','discount','offer'] else 0 for token in vocab]
beauty = [1 if token in ['beautiful','color','beauty','cute'] else 0 for token in vocab]

In [6]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [7]:
reviews['tokens'] = reviews.comments.apply(clean_doc)
reviews = reviews[reviews.tokens.apply(len)!=0]
reviews.index = range(len(reviews))
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [8]:
cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors):
    print(s_num)
    for name, topic_vec in topics.items():
        print('\t', name, cos_sim(s_vec, topic_vec))

0
	 quality 0.0
	 delivery 0.0
	 price 0.16903085094570328
	 beauty 0.0
1
	 quality 0.0
	 delivery 0.0
	 price 0.10846522890932808
	 beauty 0.0
2
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
3
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4
	 quality 0.0
	 delivery 0.0
	 price 0.16903085094570328
	 beauty 0.0944911182523068
5
	 quality 0.0
	 delivery 0.0
	 price 0.14907119849998599
	 beauty 0.16666666666666666
6
	 quality 0.0
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.0
7
	 quality 0.0
	 delivery 0.0
	 price 0.08944271909999159
	 beauty 0.1
8
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
9
	 quality 0.0
	 delivery 0.0
	 price 0.22360679774997896
	 beauty 0.25
10
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2672612419124244
11
	 quality 0.0
	 delivery 0.1360827634879543
	 price 0.0
	 beauty 0.19245008972987526
12
	 quality 0.2182178902359924
	 delivery 0.0
	 price 0.09759000729485333
	 beauty 0.0
13
	 quality 0.0
	 delivery 0.0


	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
179
	 quality 0.0
	 delivery 0.3779644730092272
	 price 0.11952286093343936
	 beauty 0.1336306209562122
180
	 quality 0.2886751345948129
	 delivery 0.20412414523193148
	 price 0.2581988897471611
	 beauty 0.0
181
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
182
	 quality 0.30151134457776363
	 delivery 0.42640143271122083
	 price 0.0
	 beauty 0.0
183
	 quality 0.0
	 delivery 0.1889822365046136
	 price 0.0
	 beauty 0.0
184
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
185
	 quality 0.2886751345948129
	 delivery 0.20412414523193148
	 price 0.2581988897471611
	 beauty 0.0
186
	 quality 0.2886751345948129
	 delivery 0.40824829046386296
	 price 0.2581988897471611
	 beauty 0.0
187
	 quality 0.0
	 delivery 0.0
	 price 0.31622776601683794
	 beauty 0.0
188
	 quality 0.0
	 delivery 0.17677669529663687
	 price 0.22360679774997896
	 beauty 0.0
189
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.353553390593273

	 delivery 0.0
	 price 0.0
	 beauty 0.0
373
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
374
	 quality 0.24253562503633297
	 delivery 0.17149858514250882
	 price 0.10846522890932808
	 beauty 0.12126781251816648
375
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
376
	 quality 0.5
	 delivery 0.0
	 price 0.0
	 beauty 0.0
377
	 quality 0.0
	 delivery 0.35355339059327373
	 price 0.0
	 beauty 0.0
378
	 quality 0.21320071635561041
	 delivery 0.1507556722888818
	 price 0.09534625892455921
	 beauty 0.10660035817780521
379
	 quality 0.3779644730092272
	 delivery 0.0
	 price 0.16903085094570328
	 beauty 0.0
380
	 quality 0.0
	 delivery 0.14433756729740646
	 price 0.09128709291752768
	 beauty 0.0
381
	 quality 0.2886751345948129
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
382
	 quality 0.0
	 delivery 0.0
	 price 0.23904572186687872
	 beauty 0.0
383
	 quality 0.0
	 delivery 0.0
	 price 0.13483997249264842
	 beauty 0.15075567228888181
384
	 quality 0.0
	 de

	 beauty 0.0
565
	 quality 0.2773500981126146
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.1386750490563073
566
	 quality 0.25
	 delivery 0.0
	 price 0.0
	 beauty 0.0
567
	 quality 0.2773500981126146
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.1386750490563073
568
	 quality 0.0
	 delivery 0.20412414523193148
	 price 0.0
	 beauty 0.0
569
	 quality 0.2773500981126146
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.1386750490563073
570
	 quality 0.0
	 delivery 0.0
	 price 0.31622776601683794
	 beauty 0.17677669529663687
571
	 quality 0.30151134457776363
	 delivery 0.0
	 price 0.0
	 beauty 0.15075567228888181
572
	 quality 0.0
	 delivery 0.36514837167011066
	 price 0.0
	 beauty 0.12909944487358055
573
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
574
	 quality 0.0
	 delivery 0.18257418583505533
	 price 0.11547005383792514
	 beauty 0.0
575
	 quality 0.4082482904638631
	 delivery 0.5773502691896258
	 price 0.0
	 beauty 0.0
576
	 quality 0.0
	 delivery 0.31622

	 price 0.0
	 beauty 0.0
735
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
736
	 quality 0.0
	 delivery 0.18257418583505533
	 price 0.11547005383792514
	 beauty 0.2581988897471611
737
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.15811388300841897
738
	 quality 0.0
	 delivery 0.40824829046386296
	 price 0.0
	 beauty 0.14433756729740646
739
	 quality 0.2773500981126146
	 delivery 0.39223227027636803
	 price 0.24806946917841693
	 beauty 0.0
740
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.10540925533894598
	 beauty 0.11785113019775793
741
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
742
	 quality 0.2886751345948129
	 delivery 0.40824829046386296
	 price 0.12909944487358055
	 beauty 0.0
743
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.35355339059327373
744
	 quality 0.0
	 delivery 0.35355339059327373
	 price 0.0
	 beauty 0.125
745
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.5773502691896258
746
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beau

	 quality 0.0
	 delivery 0.4714045207910316
	 price 0.0
	 beauty 0.16666666666666666
922
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
923
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.25
924
	 quality 0.0
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
925
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.25
926
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
927
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
928
	 quality 0.4082482904638631
	 delivery 0.0
	 price 0.0
	 beauty 0.0
929
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
930
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666666
931
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.35355339059327373
932
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.35355339059327373
933
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
934
	 quality 0.0
	 delivery 0.0
	 price 0.08451542547285164
	 beauty 0.0
935
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 bea

	 delivery 0.0
	 price 0.0
	 beauty 0.0
1105
	 quality 0.0
	 delivery 0.0
	 price 0.16903085094570328
	 beauty 0.0
1106
	 quality 0.30151134457776363
	 delivery 0.42640143271122083
	 price 0.13483997249264842
	 beauty 0.0
1107
	 quality 0.2886751345948129
	 delivery 0.40824829046386296
	 price 0.0
	 beauty 0.0
1108
	 quality 0.2672612419124244
	 delivery 0.1889822365046136
	 price 0.0
	 beauty 0.0
1109
	 quality 0.21320071635561041
	 delivery 0.3015113445777636
	 price 0.09534625892455921
	 beauty 0.21320071635561041
1110
	 quality 0.0
	 delivery 0.5773502691896258
	 price 0.0
	 beauty 0.20412414523193154
1111
	 quality 0.20851441405707477
	 delivery 0.29488391230979427
	 price 0.0
	 beauty 0.10425720702853739
1112
	 quality 0.0
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.0
1113
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193154
1114
	 quality 0.23570226039551587
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
1115
	 quality 0.0
	 delivery 0.33333333

	 quality 0.4472135954999579
	 delivery 0.0
	 price 0.19999999999999996
	 beauty 0.0
1204
	 quality 0.25
	 delivery 0.35355339059327373
	 price 0.22360679774997896
	 beauty 0.25
1205
	 quality 0.0
	 delivery 0.0
	 price 0.14142135623730948
	 beauty 0.0
1206
	 quality 0.2581988897471611
	 delivery 0.36514837167011066
	 price 0.0
	 beauty 0.0
1207
	 quality 0.0
	 delivery 0.19611613513818402
	 price 0.12403473458920847
	 beauty 0.1386750490563073
1208
	 quality 0.3333333333333333
	 delivery 0.4714045207910316
	 price 0.14907119849998599
	 beauty 0.0
1209
	 quality 0.0
	 delivery 0.0
	 price 0.24806946917841693
	 beauty 0.0
1210
	 quality 0.2581988897471611
	 delivery 0.36514837167011066
	 price 0.11547005383792514
	 beauty 0.12909944487358055
1211
	 quality 0.24253562503633297
	 delivery 0.34299717028501764
	 price 0.10846522890932808
	 beauty 0.0
1212
	 quality 0.2886751345948129
	 delivery 0.20412414523193148
	 price 0.0
	 beauty 0.0
1213
	 quality 0.25
	 delivery 0.35355339059327373
	

	 beauty 0.0
1390
	 quality 0.0
	 delivery 0.0
	 price 0.16903085094570328
	 beauty 0.1889822365046136
1391
	 quality 0.0
	 delivery 0.0
	 price 0.11547005383792514
	 beauty 0.0
1392
	 quality 0.3779644730092272
	 delivery 0.26726124191242434
	 price 0.0
	 beauty 0.0
1393
	 quality 0.4082482904638631
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1394
	 quality 0.24253562503633297
	 delivery 0.17149858514250882
	 price 0.10846522890932808
	 beauty 0.12126781251816648
1395
	 quality 0.22941573387056174
	 delivery 0.0
	 price 0.1025978352085154
	 beauty 0.11470786693528087
1396
	 quality 0.25
	 delivery 0.35355339059327373
	 price 0.22360679774997896
	 beauty 0.0
1397
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1398
	 quality 0.2672612419124244
	 delivery 0.0
	 price 0.23904572186687872
	 beauty 0.0
1399
	 quality 0.30151134457776363
	 delivery 0.21320071635561041
	 price 0.0
	 beauty 0.0
1400
	 quality 0.0
	 delivery 0.36514837167011066
	 price 0.11547005383792514
	 beauty 0.12909944

	 delivery 0.0
	 price 0.0
	 beauty 0.22360679774997896
1521
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
1522
	 quality 0.2773500981126146
	 delivery 0.39223227027636803
	 price 0.24806946917841693
	 beauty 0.0
1523
	 quality 0.23570226039551587
	 delivery 0.3333333333333333
	 price 0.10540925533894598
	 beauty 0.11785113019775793
1524
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193154
1525
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1526
	 quality 0.0
	 delivery 0.17677669529663687
	 price 0.0
	 beauty 0.125
1527
	 quality 0.0
	 delivery 0.18257418583505533
	 price 0.11547005383792514
	 beauty 0.0
1528
	 quality 0.0
	 delivery 0.40824829046386296
	 price 0.0
	 beauty 0.0
1529
	 quality 0.25
	 delivery 0.35355339059327373
	 price 0.0
	 beauty 0.0
1530
	 quality 0.2773500981126146
	 delivery 0.19611613513818402
	 price 0.0
	 beauty 0.0
1531
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1532
	 quality 0.23570226039551587
	 del

	 delivery 0.0
	 price 0.13483997249264842
	 beauty 0.15075567228888181
1680
	 quality 0.2886751345948129
	 delivery 0.40824829046386296
	 price 0.0
	 beauty 0.0
1681
	 quality 0.0
	 delivery 0.32444284226152503
	 price 0.1025978352085154
	 beauty 0.22941573387056174
1682
	 quality 0.0
	 delivery 0.39223227027636803
	 price 0.24806946917841693
	 beauty 0.1386750490563073
1683
	 quality 0.5
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1684
	 quality 0.0
	 delivery 0.4714045207910316
	 price 0.0
	 beauty 0.0
1685
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1686
	 quality 0.25
	 delivery 0.35355339059327373
	 price 0.0
	 beauty 0.125
1687
	 quality 0.0
	 delivery 0.35355339059327373
	 price 0.11180339887498948
	 beauty 0.0
1688
	 quality 0.0
	 delivery 0.0
	 price 0.23094010767585027
	 beauty 0.12909944487358055
1689
	 quality 0.4472135954999579
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1690
	 quality 0.0
	 delivery 0.0
	 price 0.12403473458920847
	 beauty 0.0
1691
	 quality 0.28867513

	 beauty 0.0
1839
	 quality 0.2182178902359924
	 delivery 0.3086066999241838
	 price 0.09759000729485333
	 beauty 0.0
1840
	 quality 0.0
	 delivery 0.39223227027636803
	 price 0.12403473458920847
	 beauty 0.0
1841
	 quality 0.2672612419124244
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1842
	 quality 0.0
	 delivery 0.35355339059327373
	 price 0.0
	 beauty 0.0
1843
	 quality 0.23570226039551587
	 delivery 0.16666666666666666
	 price 0.10540925533894598
	 beauty 0.0
1844
	 quality 0.2773500981126146
	 delivery 0.39223227027636803
	 price 0.0
	 beauty 0.0
1845
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.21081851067789195
	 beauty 0.11785113019775793
1846
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1847
	 quality 0.0
	 delivery 0.0
	 price 0.33806170189140655
	 beauty 0.0
1848
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1849
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.35355339059327373
1850
	 quality 0.0
	 delivery 0.0
	 price 0.13483997249264842
	 beauty 

	 price 0.0
	 beauty 0.0
2010
	 quality 0.0
	 delivery 0.0
	 price 0.22360679774997896
	 beauty 0.0
2011
	 quality 0.0
	 delivery 0.0
	 price 0.18257418583505536
	 beauty 0.0
2012
	 quality 0.0
	 delivery 0.0
	 price 0.22360679774997896
	 beauty 0.0
2013
	 quality 0.31622776601683794
	 delivery 0.0
	 price 0.42426406871192845
	 beauty 0.0
2014
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2015
	 quality 0.0
	 delivery 0.0
	 price 0.11952286093343936
	 beauty 0.0
2016
	 quality 0.2672612419124244
	 delivery 0.3779644730092272
	 price 0.11952286093343936
	 beauty 0.0
2017
	 quality 0.0
	 delivery 0.0
	 price 0.31622776601683794
	 beauty 0.0
2018
	 quality 0.2581988897471611
	 delivery 0.18257418583505533
	 price 0.0
	 beauty 0.0
2019
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2020
	 quality 0.0
	 delivery 0.39223227027636803
	 price 0.12403473458920847
	 beauty 0.1386750490563073
2021
	 quality 0.0
	 delivery 0.0
	 price 0.29814239699997197
	 beauty 0.0
2022
	 quality 

	 price 0.11547005383792514
	 beauty 0.12909944487358055
2179
	 quality 0.5
	 delivery 0.0
	 price 0.22360679774997896
	 beauty 0.0
2180
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2181
	 quality 0.2773500981126146
	 delivery 0.39223227027636803
	 price 0.12403473458920847
	 beauty 0.0
2182
	 quality 0.2581988897471611
	 delivery 0.36514837167011066
	 price 0.0
	 beauty 0.0
2183
	 quality 0.24253562503633297
	 delivery 0.34299717028501764
	 price 0.21693045781865616
	 beauty 0.0
2184
	 quality 0.0
	 delivery 0.39223227027636803
	 price 0.12403473458920847
	 beauty 0.0
2185
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2186
	 quality 0.0
	 delivery 0.39223227027636803
	 price 0.12403473458920847
	 beauty 0.0
2187
	 quality 0.23570226039551587
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2188
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2189
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
2190
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.1889822365046136
2361
	 quality 0.25
	 delivery 0.17677669529663687
	 price 0.11180339887498948
	 beauty 0.0
2362
	 quality 0.2182178902359924
	 delivery 0.1543033499620919
	 price 0.09759000729485333
	 beauty 0.0
2363
	 quality 0.2672612419124244
	 delivery 0.3779644730092272
	 price 0.0
	 beauty 0.0
2364
	 quality 0.3779644730092272
	 delivery 0.26726124191242434
	 price 0.0
	 beauty 0.0
2365
	 quality 0.0
	 delivery 0.7071067811865475
	 price 0.0
	 beauty 0.0
2366
	 quality 0.22941573387056174
	 delivery 0.32444284226152503
	 price 0.1025978352085154
	 beauty 0.11470786693528087
2367
	 quality 0.0
	 delivery 0.19611613513818402
	 price 0.12403473458920847
	 beauty 0.0
2368
	 quality 0.2581988897471611
	 delivery 0.36514837167011066
	 price 0.0
	 beauty 0.0
2369
	 quality 0.0
	 delivery 0.4999999999999999
	 price 0.31622776601683794
	 beauty 0.0
2370
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2371
	 qualit

	 price 0.18257418583505536
	 beauty 0.0
2456
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
2457
	 quality 0.4082482904638631
	 delivery 0.0
	 price 0.18257418583505536
	 beauty 0.20412414523193154
2458
	 quality 0.0
	 delivery 0.19611613513818402
	 price 0.24806946917841693
	 beauty 0.1386750490563073
2459
	 quality 0.30151134457776363
	 delivery 0.42640143271122083
	 price 0.26967994498529685
	 beauty 0.0
2460
	 quality 0.0
	 delivery 0.40824829046386296
	 price 0.12909944487358055
	 beauty 0.14433756729740646
2461
	 quality 0.0
	 delivery 0.42640143271122083
	 price 0.26967994498529685
	 beauty 0.0
2462
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2463
	 quality 0.35355339059327373
	 delivery 0.0
	 price 0.31622776601683794
	 beauty 0.0
2464
	 quality 0.35355339059327373
	 delivery 0.4999999999999999
	 price 0.0
	 beauty 0.0
2465
	 quality 0.2581988897471611
	 delivery 0.36514837167011066
	 price 0.23094010767585027
	 beauty 0.0
2466
	 quality 0.0
	 d

In [9]:
i=8
reviews.comments.iloc[i]

'Very nice, but a little crumbling.'

# Using W2V

In [10]:
from nltk.corpus import wordnet
import nltk
synonyms = []
antonyms = []

for syn in wordnet.synsets("price"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
             antonyms.append(l.antonyms()[0].name())

print(set(synonyms))
print(set(antonyms))

{'Leontyne_Price', 'monetary_value', 'terms', 'Price', 'Mary_Leontyne_Price', 'damage', 'price', 'toll', 'cost'}
set()


In [11]:
nltk.edit_distance("quality", "price")

6

In [12]:
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
model = Word2Vec(list(reviews.tokens), size=100, window=5, min_count=1, workers=4)
#model.train([vocab], total_examples=1, epochs=10)
model.wv.similarity('price','cheap')

0.9806101

In [45]:
model.wv.most_similar('price',topn=15)

[('products', 0.991207480430603),
 ('pack', 0.9892581701278687),
 ('the', 0.9881712198257446),
 ('transportation', 0.9869532585144043),
 ('shipping', 0.9866578578948975),
 ('company', 0.9848147630691528),
 ('sensitive', 0.9815781116485596),
 ('money', 0.9812229871749878),
 ('fast', 0.9807530641555786),
 ('cheap', 0.9806100726127625),
 ('delivery', 0.9805467128753662),
 ('providing', 0.9786281585693359),
 ('shop', 0.9760223627090454),
 ('reasonable', 0.97339928150177),
 ('speed', 0.9709498882293701)]

In [46]:
model.wv.most_similar('beautiful',topn=15)

[('cheap', 0.9843772649765015),
 ('shop', 0.9840930700302124),
 ('sensitive', 0.9837311506271362),
 ('impressive', 0.983634352684021),
 ('transport', 0.983027994632721),
 ('fair', 0.9825484752655029),
 ('services', 0.9820783138275146),
 ('management', 0.9814094305038452),
 ('affordable', 0.9810247421264648),
 ('suitable', 0.9805623888969421),
 ('providing', 0.9801583886146545),
 ('pretty', 0.9790631532669067),
 ('stores', 0.9783762693405151),
 ('ðÿœˆ', 0.9782328605651855),
 ('pack', 0.9778835773468018)]

In [27]:
model.wv.most_similar('quality',topn=15)

[('product', 0.998526930809021),
 ('good', 0.994843065738678),
 ('speed', 0.9946261048316956),
 ('value', 0.9930998682975769),
 ('delivery', 0.9917939901351929),
 ('fast', 0.9910227656364441),
 ('service', 0.9910087585449219),
 ('provided', 0.988125741481781),
 ('very', 0.9878133535385132),
 ('the', 0.9804855585098267),
 ('money', 0.9763387441635132),
 ('transportation', 0.9621716737747192),
 ('company', 0.9614475965499878),
 ('price', 0.9588143229484558),
 ('shipping', 0.9446331262588501)]

In [28]:
model.wv.most_similar('delivery',topn=15)

[('fast', 0.9992154836654663),
 ('speed', 0.9974116086959839),
 ('service', 0.9972316026687622),
 ('product', 0.9960111379623413),
 ('the', 0.9943673610687256),
 ('provided', 0.992739737033844),
 ('quality', 0.9917939901351929),
 ('money', 0.9901745319366455),
 ('company', 0.9869616627693176),
 ('transportation', 0.9867085218429565),
 ('good', 0.9849915504455566),
 ('value', 0.9828110933303833),
 ('very', 0.9817630052566528),
 ('price', 0.9805468320846558),
 ('shipping', 0.9764149785041809)]

In [17]:
model.wv.similarity('beautiful','color')

0.9298147

In [19]:
def find_related(text,maxn):
    return [val[0] for val in model.wv.most_similar(text,topn=maxn)]

delivery_related = find_related('delivery',15)+['delivery']
price_related = find_related('price',15)+['price']
beauty_related = find_related('beautiful',15)+['beautiful']
quality_related = find_related('quality',15)+['quality']


In [20]:
def syno_anto_finder(text):
    synonyms = []
    antonyms = []

    for syn in wordnet.synsets(text):
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                 antonyms.append(l.antonyms()[0].name())

    return list(synonyms),list(antonyms)

a = syno_anto_finder('quality')

In [21]:
quality = [1 if token in quality_related else 0 for token in vocab]
delivery = [1 if token in delivery_related else 0 for token in vocab]
price = [1 if token in price_related else 0 for token in vocab]
beauty = [1 if token in beauty_related else 0 for token in vocab]

In [22]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [23]:
#reviews['tokens'] = reviews.comments.apply(clean_doc)
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors_new'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [36]:
name

'beauty'

In [34]:
cos_sim(s_vec, topic_vec)

0.0

In [37]:
reviews['quality']  = 0.0
reviews['delivery'] = 0.0
reviews['price']    = 0.0
reviews['beauty']   = 0.0

In [38]:
cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors_new):
    print(s_num)
    for name, topic_vec in topics.items():
        similarity = cos_sim(s_vec, topic_vec)
        reviews.loc[s_num,name] = similarity
        print('\t', name, similarity)

0
	 quality 0.0944911182523068
	 delivery 0.0944911182523068
	 price 0.0944911182523068
	 beauty 0.0
1
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.06063390625908324
2
	 quality 0.06454972243679027
	 delivery 0.06454972243679027
	 price 0.0
	 beauty 0.0
3
	 quality 0.07216878364870323
	 delivery 0.07216878364870323
	 price 0.07216878364870323
	 beauty 0.0
4
	 quality 0.1889822365046136
	 delivery 0.1889822365046136
	 price 0.1889822365046136
	 beauty 0.0472455591261534
5
	 quality 0.16666666666666666
	 delivery 0.16666666666666666
	 price 0.08333333333333333
	 beauty 0.16666666666666666
6
	 quality 0.06933752452815364
	 delivery 0.06933752452815364
	 price 0.06933752452815364
	 beauty 0.0
7
	 quality 0.15
	 delivery 0.15
	 price 0.15
	 beauty 0.0
8
	 quality 0.125
	 delivery 0.125
	 price 0.0
	 beauty 0.0
9
	 quality 0.0
	 delivery 0.0
	 price 0.125
	 beauty 0.25
10
	 quality 0.2004459314343183
	 delivery 0.2004459314343183
	 price 0.0668153104781061
	 beauty 0.1336306209562122


	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
117
	 quality 0.2581988897471611
	 delivery 0.2581988897471611
	 price 0.12909944487358055
	 beauty 0.0
118
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
119
	 quality 0.7938566201357354
	 delivery 0.7938566201357354
	 price 0.36084391824351614
	 beauty 0.0
120
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
121
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.08333333333333333
	 beauty 0.0
122
	 quality 0.10206207261596577
	 delivery 0.10206207261596577
	 price 0.0
	 beauty 0.10206207261596577
123
	 quality 0.31622776601683794
	 delivery 0.31622776601683794
	 price 0.31622776601683794
	 beauty 0.07905694150420949
124
	 quality 0.25
	 delivery 0.25
	 price 0.0
	 beauty 0.125
125
	 quality 0.20412414523193154
	 delivery 0.20412414523193154
	 price 0.10206207261596577
	 beauty 0.10206207261596577
126
	 quality 0.44194173824159216
	 delivery 0.44194173824159216
	 price 0.35355339059327373
	 beauty 0.

	 price 0.1336306209562122
	 beauty 0.0668153104781061
240
	 quality 0.6030226891555273
	 delivery 0.6030226891555273
	 price 0.3768891807222045
	 beauty 0.07537783614444091
241
	 quality 0.19364916731037085
	 delivery 0.19364916731037085
	 price 0.06454972243679027
	 beauty 0.0
242
	 quality 0.35355339059327373
	 delivery 0.35355339059327373
	 price 0.2651650429449553
	 beauty 0.08838834764831843
243
	 quality 0.3779644730092272
	 delivery 0.3779644730092272
	 price 0.0944911182523068
	 beauty 0.1889822365046136
244
	 quality 0.375
	 delivery 0.375
	 price 0.3125
	 beauty 0.0625
245
	 quality 0.18190171877724973
	 delivery 0.18190171877724973
	 price 0.06063390625908324
	 beauty 0.0
246
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.08838834764831843
	 beauty 0.0
247
	 quality 0.05892556509887897
	 delivery 0.05892556509887897
	 price 0.0
	 beauty 0.0
248
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.08333333333333333
249
	

	 beauty 0.0
364
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.17677669529663687
365
	 quality 0.12909944487358055
	 delivery 0.12909944487358055
	 price 0.0
	 beauty 0.06454972243679027
366
	 quality 0.8320502943378437
	 delivery 0.8320502943378437
	 price 0.41602514716892186
	 beauty 0.06933752452815364
367
	 quality 0.3125
	 delivery 0.3125
	 price 0.125
	 beauty 0.0
368
	 quality 0.3779644730092272
	 delivery 0.3779644730092272
	 price 0.0944911182523068
	 beauty 0.0
369
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
370
	 quality 0.7115124735378853
	 delivery 0.7115124735378853
	 price 0.31622776601683794
	 beauty 0.07905694150420949
371
	 quality 0.0625
	 delivery 0.0625
	 price 0.125
	 beauty 0.125
372
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
373
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
374
	 quality 0.4244373438135827
	 delivery 0.4244373438135827
	 price 0.18190171877724973
	 beauty 0.06063390625908324
375
	 quality 0.0
	 

	 beauty 0.11180339887498948
486
	 quality 0.15811388300841897
	 delivery 0.15811388300841897
	 price 0.07905694150420949
	 beauty 0.07905694150420949
487
	 quality 0.1386750490563073
	 delivery 0.1386750490563073
	 price 0.1386750490563073
	 beauty 0.06933752452815364
488
	 quality 0.1720618004029213
	 delivery 0.1720618004029213
	 price 0.11470786693528087
	 beauty 0.057353933467640436
489
	 quality 0.1336306209562122
	 delivery 0.1336306209562122
	 price 0.2004459314343183
	 beauty 0.0668153104781061
490
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
491
	 quality 0.0
	 delivery 0.0
	 price 0.10206207261596577
	 beauty 0.10206207261596577
492
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.25
493
	 quality 0.5163977794943222
	 delivery 0.5163977794943222
	 price 0.2581988897471611
	 beauty 0.19364916731037085
494
	 quality 0.25
	 delivery 0.25
	 price 0.25
	 beauty 0.125
495
	 quality 0.2886751345948129
	 delivery 0.2886751345948129
	 price 0.21650635094610968
	 beauty 0.

	 delivery 0.11180339887498948
	 price 0.33541019662496846
	 beauty 0.11180339887498948
605
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
606
	 quality 0.25
	 delivery 0.25
	 price 0.25
	 beauty 0.0
607
	 quality 0.23570226039551587
	 delivery 0.23570226039551587
	 price 0.1767766952966369
	 beauty 0.11785113019775793
608
	 quality 0.3872983346207417
	 delivery 0.3872983346207417
	 price 0.2581988897471611
	 beauty 0.06454972243679027
609
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	 beauty 0.14433756729740646
610
	 quality 0.25
	 delivery 0.25
	 price 0.125
	 beauty 0.0
611
	 quality 0.0668153104781061
	 delivery 0.0668153104781061
	 price 0.0
	 beauty 0.0668153104781061
612
	 quality 0.22360679774997896
	 delivery 0.22360679774997896
	 price 0.22360679774997896
	 beauty 0.11180339887498948
613
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
614
	 quality 0.0
	 delivery 0.0
	 price 0.0944911182523068
	 beauty 0.0944911182523068
615
	 quality 

	 price 0.0
	 beauty 0.0
724
	 quality 0.36380343755449945
	 delivery 0.36380343755449945
	 price 0.06063390625908324
	 beauty 0.06063390625908324
725
	 quality 0.5051814855409226
	 delivery 0.5051814855409226
	 price 0.21650635094610968
	 beauty 0.07216878364870323
726
	 quality 0.8391463916782737
	 delivery 0.8391463916782737
	 price 0.5163977794943222
	 beauty 0.12909944487358055
727
	 quality 0.11785113019775793
	 delivery 0.11785113019775793
	 price 0.05892556509887897
	 beauty 0.05892556509887897
728
	 quality 0.35355339059327373
	 delivery 0.35355339059327373
	 price 0.35355339059327373
	 beauty 0.0
729
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
730
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
731
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	 beauty 0.0
732
	 quality 0.08838834764831843
	 delivery 0.08838834764831843
	 price 0.08838834764831843
	 beauty 0.0
733
	 quality 0.43301270189221935
	 delivery 0.43301270189221935
	 price 0

	 price 0.17677669529663687
	 beauty 0.17677669529663687
848
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
849
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
850
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
851
	 quality 0.23717082451262844
	 delivery 0.23717082451262844
	 price 0.0
	 beauty 0.0
852
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
853
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
854
	 quality 0.10206207261596577
	 delivery 0.10206207261596577
	 price 0.20412414523193154
	 beauty 0.10206207261596577
855
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
856
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
857
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
858
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
859
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.0
	 beauty 0.0
860
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.17677669529663687
	 beauty 0.176776695296636

	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
971
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.2886751345948129
	 beauty 0.0
972
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
973
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
974
	 quality 0.10206207261596577
	 delivery 0.10206207261596577
	 price 0.10206207261596577
	 beauty 0.0
975
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
976
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.08333333333333333
	 beauty 0.0
977
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	 beauty 0.0
978
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
979
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
980
	 quality 0.1889822365046136
	 delivery 0.1889822365046136
	 price 0.0944911182523068
	 beauty 0.0
981
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
982
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
983
	 quality 0.0
	 delivery

	 price 0.06933752452815364
	 beauty 0.1386750490563073
1092
	 quality 0.31622776601683794
	 delivery 0.31622776601683794
	 price 0.07905694150420949
	 beauty 0.0
1093
	 quality 0.44194173824159216
	 delivery 0.44194173824159216
	 price 0.08838834764831843
	 beauty 0.0
1094
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1095
	 quality 0.2175970699446223
	 delivery 0.2175970699446223
	 price 0.13055824196677338
	 beauty 0.0
1096
	 quality 0.20412414523193154
	 delivery 0.20412414523193154
	 price 0.0
	 beauty 0.10206207261596577
1097
	 quality 0.10206207261596577
	 delivery 0.10206207261596577
	 price 0.051031036307982884
	 beauty 0.051031036307982884
1098
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.04490132550669373
1099
	 quality 0.25
	 delivery 0.25
	 price 0.0
	 beauty 0.0
1100
	 quality 0.5345224838248488
	 delivery 0.5345224838248488
	 price 0.2004459314343183
	 beauty 0.0668153104781061
1101
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	

	 beauty 0.125
1214
	 quality 0.8320502943378437
	 delivery 0.8320502943378437
	 price 0.41602514716892186
	 beauty 0.06933752452815364
1215
	 quality 0.25
	 delivery 0.25
	 price 0.125
	 beauty 0.0625
1216
	 quality 0.2672612419124244
	 delivery 0.2672612419124244
	 price 0.2004459314343183
	 beauty 0.1336306209562122
1217
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1218
	 quality 0.7216878364870323
	 delivery 0.7216878364870323
	 price 0.36084391824351614
	 beauty 0.14433756729740646
1219
	 quality 0.2867696673382022
	 delivery 0.2867696673382022
	 price 0.1720618004029213
	 beauty 0.0
1220
	 quality 0.5
	 delivery 0.5
	 price 0.125
	 beauty 0.0
1221
	 quality 0.45184805705753195
	 delivery 0.45184805705753195
	 price 0.3872983346207417
	 beauty 0.19364916731037085
1222
	 quality 0.3333333333333333
	 delivery 0.3333333333333333
	 price 0.25
	 beauty 0.08333333333333333
1223
	 quality 0.44194173824159216
	 delivery 0.44194173824159216
	 price 0.35355339059327373
	 beauty 0.0

	 delivery 0.36084391824351614
	 price 0.21650635094610968
	 beauty 0.0
1329
	 quality 0.16666666666666666
	 delivery 0.16666666666666666
	 price 0.16666666666666666
	 beauty 0.16666666666666666
1330
	 quality 0.1767766952966369
	 delivery 0.1767766952966369
	 price 0.1767766952966369
	 beauty 0.1767766952966369
1331
	 quality 0.3952847075210474
	 delivery 0.3952847075210474
	 price 0.15811388300841897
	 beauty 0.0
1332
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1333
	 quality 0.22360679774997896
	 delivery 0.22360679774997896
	 price 0.0
	 beauty 0.0
1334
	 quality 0.3333333333333333
	 delivery 0.3333333333333333
	 price 0.25
	 beauty 0.0
1335
	 quality 0.6030226891555273
	 delivery 0.6030226891555273
	 price 0.45226701686664544
	 beauty 0.07537783614444091
1336
	 quality 0.33407655239053047
	 delivery 0.33407655239053047
	 price 0.2672612419124244
	 beauty 0.2004459314343183
1337
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.25
1338
	 quality 0.5533985905294664
	 del

	 delivery 0.4743416490252569
	 price 0.23717082451262844
	 beauty 0.31622776601683794
1449
	 quality 0.46770717334674267
	 delivery 0.46770717334674267
	 price 0.2004459314343183
	 beauty 0.0668153104781061
1450
	 quality 0.236227795630767
	 delivery 0.236227795630767
	 price 0.1417366773784602
	 beauty 0.0944911182523068
1451
	 quality 0.1386750490563073
	 delivery 0.1386750490563073
	 price 0.1386750490563073
	 beauty 0.0
1452
	 quality 0.472455591261534
	 delivery 0.472455591261534
	 price 0.0944911182523068
	 beauty 0.1889822365046136
1453
	 quality 0.33407655239053047
	 delivery 0.33407655239053047
	 price 0.2004459314343183
	 beauty 0.0668153104781061
1454
	 quality 0.5
	 delivery 0.5
	 price 0.25
	 beauty 0.08333333333333333
1455
	 quality 0.45
	 delivery 0.45
	 price 0.25
	 beauty 0.15
1456
	 quality 0.3333333333333333
	 delivery 0.3333333333333333
	 price 0.16666666666666666
	 beauty 0.16666666666666666
1457
	 quality 0.2651650429449553
	 delivery 0.2651650429449553
	 price 0

	 quality 0.1875
	 delivery 0.1875
	 price 0.1875
	 beauty 0.0625
1567
	 quality 0.3333333333333333
	 delivery 0.3333333333333333
	 price 0.25
	 beauty 0.0
1568
	 quality 0.36380343755449945
	 delivery 0.36380343755449945
	 price 0.24253562503633297
	 beauty 0.06063390625908324
1569
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1570
	 quality 0.07216878364870323
	 delivery 0.07216878364870323
	 price 0.07216878364870323
	 beauty 0.07216878364870323
1571
	 quality 0.0
	 delivery 0.0
	 price 0.07537783614444091
	 beauty 0.15075567228888181
1572
	 quality 0.2886751345948129
	 delivery 0.2886751345948129
	 price 0.14433756729740646
	 beauty 0.14433756729740646
1573
	 quality 0.3768891807222045
	 delivery 0.3768891807222045
	 price 0.15075567228888181
	 beauty 0.0
1574
	 quality 0.44194173824159216
	 delivery 0.44194173824159216
	 price 0.17677669529663687
	 beauty 0.0
1575
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.35355339059327373
	 beauty 0.0
1576
	 q

	 delivery 0.1767766952966369
	 price 0.05892556509887897
	 beauty 0.0
1668
	 quality 0.22360679774997896
	 delivery 0.22360679774997896
	 price 0.11180339887498948
	 beauty 0.11180339887498948
1669
	 quality 0.0625
	 delivery 0.0625
	 price 0.0
	 beauty 0.0625
1670
	 quality 0.40147753427348304
	 delivery 0.40147753427348304
	 price 0.1720618004029213
	 beauty 0.0
1671
	 quality 0.2886751345948129
	 delivery 0.2886751345948129
	 price 0.0
	 beauty 0.0
1672
	 quality 0.5163977794943222
	 delivery 0.5163977794943222
	 price 0.3227486121839514
	 beauty 0.3227486121839514
1673
	 quality 0.15
	 delivery 0.15
	 price 0.05
	 beauty 0.05
1674
	 quality 0.6708203932499369
	 delivery 0.6708203932499369
	 price 0.33541019662496846
	 beauty 0.22360679774997896
1675
	 quality 0.5547001962252291
	 delivery 0.5547001962252291
	 price 0.20801257358446093
	 beauty 0.0
1676
	 quality 0.057353933467640436
	 delivery 0.057353933467640436
	 price 0.0
	 beauty 0.0
1677
	 quality 0.22360679774997896
	 deliv

	 beauty 0.0
1772
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	 beauty 0.0
1773
	 quality 0.2886751345948129
	 delivery 0.2886751345948129
	 price 0.0
	 beauty 0.14433756729740646
1774
	 quality 0.33541019662496846
	 delivery 0.33541019662496846
	 price 0.22360679774997896
	 beauty 0.0
1775
	 quality 0.22360679774997896
	 delivery 0.22360679774997896
	 price 0.0
	 beauty 0.11180339887498948
1776
	 quality 0.5625
	 delivery 0.5625
	 price 0.375
	 beauty 0.0625
1777
	 quality 0.6933752452815365
	 delivery 0.6933752452815365
	 price 0.20801257358446093
	 beauty 0.0
1778
	 quality 0.7071067811865475
	 delivery 0.7071067811865475
	 price 0.2651650429449553
	 beauty 0.0
1779
	 quality 0.06454972243679027
	 delivery 0.06454972243679027
	 price 0.06454972243679027
	 beauty 0.12909944487358055
1780
	 quality 0.2886751345948129
	 delivery 0.2886751345948129
	 price 0.14433756729740646
	 beauty 0.0
1781
	 quality 0.6123724356957946
	 delivery 0.6123724356957946
	 pric

	 beauty 0.0
1878
	 quality 0.5276448530110863
	 delivery 0.5276448530110863
	 price 0.3768891807222045
	 beauty 0.15075567228888181
1879
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.08333333333333333
1880
	 quality 0.11470786693528087
	 delivery 0.11470786693528087
	 price 0.1720618004029213
	 beauty 0.1720618004029213
1881
	 quality 0.2004459314343183
	 delivery 0.2004459314343183
	 price 0.1336306209562122
	 beauty 0.0
1882
	 quality 0.5103103630798288
	 delivery 0.5103103630798288
	 price 0.3061862178478973
	 beauty 0.10206207261596577
1883
	 quality 0.8320502943378437
	 delivery 0.8320502943378437
	 price 0.41602514716892186
	 beauty 0.06933752452815364
1884
	 quality 0.43301270189221935
	 delivery 0.43301270189221935
	 price 0.0
	 beauty 0.0
1885
	 quality 0.20412414523193154
	 delivery 0.20412414523193154
	 price 0.10206207261596577
	 beauty 0.0
1886
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.0
	 beauty 0.1767766

	 quality 0.48536267169707553
	 delivery 0.48536267169707553
	 price 0.1386750490563073
	 beauty 0.2773500981126146
2001
	 quality 0.30151134457776363
	 delivery 0.30151134457776363
	 price 0.15075567228888181
	 beauty 0.0
2002
	 quality 0.49099025303098287
	 delivery 0.49099025303098287
	 price 0.3818813079129867
	 beauty 0.1636634176769943
2003
	 quality 0.2834733547569204
	 delivery 0.2834733547569204
	 price 0.0944911182523068
	 beauty 0.0
2004
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2005
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2006
	 quality 0.11470786693528087
	 delivery 0.11470786693528087
	 price 0.057353933467640436
	 beauty 0.0
2007
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2008
	 quality 0.4472135954999579
	 delivery 0.4472135954999579
	 price 0.22360679774997896
	 beauty 0.0
2009
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2010
	 quality 0.25
	 delivery 0.25
	 price 0.25
	 beauty 0.0
2011
	 quality 0.20412414523193154
	 deli

	 quality 0.16666666666666666
	 delivery 0.16666666666666666
	 price 0.25
	 beauty 0.08333333333333333
2124
	 quality 0.22097086912079608
	 delivery 0.22097086912079608
	 price 0.08838834764831843
	 beauty 0.044194173824159216
2125
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2126
	 quality 0.15075567228888181
	 delivery 0.15075567228888181
	 price 0.22613350843332272
	 beauty 0.07537783614444091
2127
	 quality 0.2651650429449553
	 delivery 0.2651650429449553
	 price 0.2651650429449553
	 beauty 0.0
2128
	 quality 0.4364357804719848
	 delivery 0.4364357804719848
	 price 0.2182178902359924
	 beauty 0.0545544725589981
2129
	 quality 0.1563858105428061
	 delivery 0.1563858105428061
	 price 0.20851441405707477
	 beauty 0.05212860351426869
2130
	 quality 0.4472135954999579
	 delivery 0.4472135954999579
	 price 0.33541019662496846
	 beauty 0.0
2131
	 quality 0.5276448530110863
	 delivery 0.5276448530110863
	 price 0.30151134457776363
	 beauty 0.0
2132
	 quality 0.0
	 delivery 0.0
	 p

	 price 0.2672612419124244
	 beauty 0.0668153104781061
2246
	 quality 0.125
	 delivery 0.125
	 price 0.0
	 beauty 0.0
2247
	 quality 0.14433756729740646
	 delivery 0.14433756729740646
	 price 0.0
	 beauty 0.0
2248
	 quality 0.0668153104781061
	 delivery 0.0668153104781061
	 price 0.0
	 beauty 0.0
2249
	 quality 0.3061862178478973
	 delivery 0.3061862178478973
	 price 0.20412414523193154
	 beauty 0.0
2250
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.08333333333333333
	 beauty 0.0
2251
	 quality 0.5
	 delivery 0.5
	 price 0.3333333333333333
	 beauty 0.0
2252
	 quality 0.5669467095138409
	 delivery 0.5669467095138409
	 price 0.3779644730092272
	 beauty 0.0
2253
	 quality 0.1386750490563073
	 delivery 0.1386750490563073
	 price 0.1386750490563073
	 beauty 0.1386750490563073
2254
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.35355339059327373
	 beauty 0.0
2255
	 quality 0.17677669529663687
	 delivery 0.17677669529663687
	 price 0.0
	 beauty

	 price 0.2867696673382022
	 beauty 0.057353933467640436
2367
	 quality 0.20801257358446093
	 delivery 0.20801257358446093
	 price 0.1386750490563073
	 beauty 0.0
2368
	 quality 0.3872983346207417
	 delivery 0.3872983346207417
	 price 0.19364916731037085
	 beauty 0.06454972243679027
2369
	 quality 0.44194173824159216
	 delivery 0.44194173824159216
	 price 0.35355339059327373
	 beauty 0.08838834764831843
2370
	 quality 0.24253562503633297
	 delivery 0.24253562503633297
	 price 0.12126781251816648
	 beauty 0.06063390625908324
2371
	 quality 0.5345224838248488
	 delivery 0.5345224838248488
	 price 0.33407655239053047
	 beauty 0.1336306209562122
2372
	 quality 0.47140452079103173
	 delivery 0.47140452079103173
	 price 0.1767766952966369
	 beauty 0.05892556509887897
2373
	 quality 0.36380343755449945
	 delivery 0.36380343755449945
	 price 0.24253562503633297
	 beauty 0.0
2374
	 quality 0.10206207261596577
	 delivery 0.10206207261596577
	 price 0.0
	 beauty 0.0
2375
	 quality 0.3227486121839

	 delivery 0.0
	 price 0.0
	 beauty 0.0
2490
	 quality 0.6123724356957946
	 delivery 0.6123724356957946
	 price 0.20412414523193154
	 beauty 0.0
2491
	 quality 0.19611613513818404
	 delivery 0.19611613513818404
	 price 0.14708710135363803
	 beauty 0.09805806756909202
2492
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.17677669529663687
2493
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2494
	 quality 0.5345224838248488
	 delivery 0.5345224838248488
	 price 0.33407655239053047
	 beauty 0.0668153104781061
2495
	 quality 0.6324555320336759
	 delivery 0.6324555320336759
	 price 0.15811388300841897
	 beauty 0.0
2496
	 quality 0.08333333333333333
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.0
2497
	 quality 0.6666666666666666
	 delivery 0.6666666666666666
	 price 0.25
	 beauty 0.0
2498
	 quality 0.11180339887498948
	 delivery 0.11180339887498948
	 price 0.22360679774997896
	 beauty 0.11180339887498948
2499
	 quality 0.5669467095138409
	 delivery 0.5669467095138409
	 pri

In [40]:
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new,quality,delivery,price,beauty
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.094491,0.094491,0.094491,0.0
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0.0,0.0,0.060634
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.06455,0.06455,0.0,0.0
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.072169,0.072169,0.072169,0.0
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.188982,0.188982,0.188982,0.047246


In [49]:
delivery_related

['fast',
 'speed',
 'service',
 'product',
 'the',
 'provided',
 'quality',
 'money',
 'company',
 'transportation',
 'good',
 'value',
 'very',
 'price',
 'shipping',
 'delivery']

## plotting

In [41]:
from bokeh.io import output_notebook,show, push_notebook
from bokeh.layouts import widgetbox,column,row
from bokeh.models.widgets import Dropdown,PreText, Select

from bokeh.models import HoverTool
from bokeh.plotting import curdoc,figure
from ipywidgets import interact

from bokeh.models import ColumnDataSource

output_notebook()

In [43]:

menu = ['quality','price','beauty','delivery']
menu = menu+['all']

def update(category):
    
    if category=='all':
        reviews_ms1 = reviews.iloc[0:10]
    else:
        reviews_ms1 = reviews.copy()
        reviews_ms1 = reviews_ms1.sort_values(by=category,ascending=False)
        reviews_ms1 = reviews_ms1.iloc[0:10]
    
    push_notebook()
    
    plot_all(reviews_ms1)

    
def plot_all(reviews_ms1):
    print('--------------------------------------------------------------------------------')
    print('------------------------------ reviews -----------------------------------------')
    print('--------------------------------------------------------------------------------')
    for i in range(len(reviews_ms1)):
        print(reviews_ms1.comments.iloc[i])
        
        print('--------------------------------------------------------------------------------')

In [47]:
interact(update,category = menu)

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

<function __main__.update(category)>