# Topic detection based on certain given keywords

Here we doing this using static keyword's cosine similarity with sentences

In [1]:
from numpy import dot
from numpy.linalg import norm
from itertools import chain

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import string
from collections import Counter

# spaCy based imports
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

from sklearn.feature_extraction.text import CountVectorizer

from nltk.stem import WordNetLemmatizer 

## read df

In [2]:
reviews = pd.read_excel('../data/Bag_Reviews.xlsx')
reviews = reviews[['rating','comments']]
reviews = reviews.drop_duplicates()
reviews.comments=reviews.comments.apply(lambda x: x.replace('👍','good '))
reviews['sentiment'] = 'neutral'
reviews.loc[(reviews['rating']==1) | (reviews['rating']==2),'sentiment'] = 'negative'
reviews.loc[(reviews['rating']==4) | (reviews['rating']==5),'sentiment'] = 'positive'

reviews.head()

Unnamed: 0,rating,comments,sentiment
0,4,Give 4 stars because order at the price 37 but...,positive
1,5,Ordered at a discount of 10 baht per piece. It...,positive
2,5,"Small, cute, compact, good But the sash looks ...",positive
3,1,The size is not as large as it is down. The st...,negative
4,1,The product is compared to the price. Okay. Se...,negative


## Sentencing the full reviews

In [3]:
full_comments = []
for i in tqdm(range(len(reviews))):
    c = reviews.comments.iloc[i]
    c = c.split('.')
    full_comments = full_comments+c

100%|██████████| 2677/2677 [00:00<00:00, 34692.91it/s]


In [4]:
full_comments = pd.Series(full_comments).drop_duplicates()
full_comments

0        Give 4 stars because order at the price 37 but...
1               Ordered at a discount of 10 baht per piece
2         It is worth noting that the leaves are very s...
3         Adjustable cable length Suitable for wearing ...
4                                                         
5        Small, cute, compact, good But the sash looks ...
6                   The size is not as large as it is down
7                                  The stitching was wrong
8                             The bag line is contemplated
9                 No, why are you like this? Lost 8 orders
10                    The product is compared to the price
11                                                    Okay
12                                          Send it slowly
13        But the bad thing is that the shop sent the w...
14        And the item was not fully received at the sh...
16       Beautiful work Sewing Good compact Suitable fo...
17                                     I like it very mu

In [5]:
reviews_old = reviews.copy()


In [6]:
def stripper(s): return s.strip()

reviews = pd.DataFrame()
reviews['comments'] = full_comments
reviews['comments'] = reviews['comments'].apply(stripper)
reviews = reviews[reviews.comments!='']
reviews.head()

Unnamed: 0,comments
0,Give 4 stars because order at the price 37 but...
1,Ordered at a discount of 10 baht per piece
2,It is worth noting that the leaves are very small
3,Adjustable cable length Suitable for wearing s...
5,"Small, cute, compact, good But the sash looks ..."


In [7]:
reviews.shape

(5224, 1)

In [8]:
punctuations = string.punctuation
stopwords = list(STOP_WORDS)
lemmatizer = WordNetLemmatizer() 

# turn a doc into clean tokens
def clean_doc(doc):
    doc = doc.strip()
    # split into tokens by white space
    tokens = doc.split()
    # remove punctuation from each token
    table = str.maketrans('', '', punctuations)
    tokens = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # filter out stop words
    tokens = [w for w in tokens if not w in stopwords]
    # filter out short tokens
    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens if len(word) > 1]
    
    return tokens
    

In [9]:
import sys


In [10]:
lemmatizer.lemmatize('broken',pos='n')

'broken'

In [11]:
vocab = sorted(set(token.lower() for token in chain(*list(map(clean_doc, reviews.comments)))))


In [12]:
#quality = [1 if token in ['quality','size','small','smaller','large','larger','stitching','service','suitable','good','bad','weak','strong','defective','comfortable','broken','damaged'] else 0 for token in vocab]
quality = [1 if token in ['quality','stitching','service','suitable','weak','strong','defective','comfortable','broken','damaged'] else 0 for token in vocab]
delivery = [1 if token in ['delivery','deliver','delivered','fast','slow','slowly','late','shipping','shipped','send','received','receive','ship','shop','arrived','order','ordered','store'] else 0 for token in vocab]
price = [1 if token in ['price','cheap','money','discount','offer','value','worth','purchase','refund','expensive','worthwhile','affordable'] else 0 for token in vocab]
beauty = [1 if token in ['beautiful','color','beauty','cute','shape','style','luxury','luxurious','looks','attractive','unattractive','impressive','picture'] else 0 for token in vocab]

In [13]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [14]:
reviews['tokens'] = reviews.comments.apply(clean_doc)
reviews = reviews[reviews.tokens.apply(len)!=0]
reviews.index = range(len(reviews))
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,comments,tokens,vectors
0,Give 4 stars because order at the price 37 but...,"[give, star, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Ordered at a discount of 10 baht per piece,"[ordered, discount, baht, piece]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,It is worth noting that the leaves are very small,"[it, worth, noting, leaf, small]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Adjustable cable length Suitable for wearing s...,"[adjustable, cable, length, suitable, wearing,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"Small, cute, compact, good But the sash looks ...","[small, cute, compact, good, but, sash, look, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [15]:
threshold = 5
def junk_remover(a):
    a = a.split()
    max_val = max(pd.Series(a).value_counts())
    if max_val>threshold:
        return 1
    else:
        return 0
    
reviews['is_junk'] = 0
reviews['is_junk'] = reviews.comments.apply(junk_remover)
reviews = reviews[reviews.is_junk==0]
del reviews['is_junk']

In [16]:
reviews.comments.iloc[67]

'Worth it'

In [17]:
a = ['hi','do','hi','o']
max(pd.Series(a).value_counts())

2

In [18]:
reviews['quality']  = 0.0
reviews['delivery'] = 0.0
reviews['price']    = 0.0
reviews['beauty']   = 0.0

cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors):
    print(s_num)
    for name, topic_vec in topics.items():
        similarity = cos_sim(s_vec, topic_vec)
        reviews.loc[s_num,name] = similarity
        print('\t', name, similarity)

0
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
1
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.14433756729740646
	 beauty 0.0
2
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
3
	 quality 0.11180339887498948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0778498944161523
5
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
6
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.0
	 beauty 0.0
7
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
8
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
9
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
10
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
11
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
12
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.11396057645963795
13
	 quality 0.0
	 delivery 0.17149858514250885
	 price 0.07001400

	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
129
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
130
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
131
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
132
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
133
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
134
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
135
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
136
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.10660035817780521
137
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
138
	 quality 0.0
	 delivery 0.0
	 price 0.056613851707229795
	 beauty 0.0
139
	 quality 0.0
	 delivery 0.0
	 price 0.40824829046386296
	 beauty 0.0
140
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
141
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
142
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
143
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
144
	 quality 0.0
	 deliv

	 quality 0.0
	 delivery 0.0
	 price 0.15430334996209194
	 beauty 0.24174688920761409
261
	 quality 0.11180339887498948
	 delivery 0.0
	 price 0.0
	 beauty 0.10660035817780521
262
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
263
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
264
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
265
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
266
	 quality 0.12909944487358058
	 delivery 0.0
	 price 0.0
	 beauty 0.12309149097933275
267
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
268
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.14433756729740646
	 beauty 0.0
269
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
270
	 quality 0.14142135623730948
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
271
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.10660035817780521
272
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
273
	 qualit

	 price 0.16012815380508716
	 beauty 0.0
392
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.23570226039551587
	 beauty 0.0
393
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
394
	 quality 0.12909944487358058
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
395
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
396
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
397
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
398
	 quality 0.22360679774997896
	 delivery 0.0
	 price 0.0
	 beauty 0.0
399
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.23570226039551587
	 beauty 0.0
400
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
401
	 quality 0.09534625892455924
	 delivery 0.0
	 price 0.0
	 beauty 0.0
402
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
403
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
404
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
405
	 quality 0.0
	 delivery 0

	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
523
	 quality 0.0
	 delivery 0.055555555555555566
	 price 0.0
	 beauty 0.07106690545187015
524
	 quality 0.0
	 delivery 0.15713484026367722
	 price 0.0
	 beauty 0.10050378152592121
525
	 quality 0.0
	 delivery 0.21081851067789195
	 price 0.12909944487358055
	 beauty 0.0
526
	 quality 0.0
	 delivery 0.23570226039551584
	 price 0.19245008972987526
	 beauty 0.0
527
	 quality 0.0
	 delivery 0.23570226039551584
	 price 0.09622504486493763
	 beauty 0.0
528
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
529
	 quality 0.0
	 delivery 0.14907119849998599
	 price 0.0
	 beauty 0.09534625892455924
530
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.11396057645963795
531
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
532
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
533
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
534
	 quality 0.0
	 delivery 0.117

	 price 0.0
	 beauty 0.0
655
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
656
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
657
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
658
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
659
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.11396057645963795
660
	 quality 0.09999999999999998
	 delivery 0.29814239699997197
	 price 0.09128709291752768
	 beauty 0.09534625892455924
661
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
662
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
663
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
664
	 quality 0.09128709291752768
	 delivery 0.20412414523193154
	 price 0.0
	 beauty 0.0
665
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
666
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
667
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0

	 beauty 0.0
786
	 quality 0.0
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
787
	 quality 0.0
	 delivery 0.23570226039551584
	 price 0.0
	 beauty 0.10050378152592121
788
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
789
	 quality 0.09534625892455924
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.0
790
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
791
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
792
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
793
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
794
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
795
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
796
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
797
	 quality 0.0
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
798
	 quality 0.0
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
799
	 quality 0.0
	 delivery 0.1178511301

	 beauty 0.0
915
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
916
	 quality 0.12909944487358058
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.0
917
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12309149097933275
918
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
919
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
920
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
921
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.0
	 beauty 0.0
922
	 quality 0.25819888974716115
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
923
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
924
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12309149097933275
925
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
926
	 quality 0.0
	 delivery 0.07453559924999299
	 price 0.0
	 beauty 0.0
927
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
928
	 quality 0.158113883

	 quality 0.0
	 delivery 0.06804138174397717
	 price 0.08333333333333334
	 beauty 0.0
1048
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
1049
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.13483997249264842
1050
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1051
	 quality 0.0
	 delivery 0.23570226039551584
	 price 0.09622504486493763
	 beauty 0.0
1052
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.0
1053
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1054
	 quality 0.10540925533894598
	 delivery 0.07856742013183861
	 price 0.09622504486493763
	 beauty 0.10050378152592121
1055
	 quality 0.0
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.12309149097933275
1056
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1057
	 quality 0.08770580193070293
	 delivery 0.0
	 price 0.16012815380508716
	 beauty 0.25087260300212727
1058
	 quality 0.10540925533894598
	 delivery 0.15713484026367722
	 

	 price 0.3849001794597505
	 beauty 0.0
1179
	 quality 0.10540925533894598
	 delivery 0.07856742013183861
	 price 0.0
	 beauty 0.10050378152592121
1180
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1181
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12309149097933275
1182
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.28603877677367767
1183
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
1184
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1185
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
1186
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.17407765595569785
1187
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1188
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.21320071635561041
1189
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.15075567228888181
1190
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1191
	 quality 0.0
	 delivery 0.0
	 price 0.0

	 quality 0.08770580193070293
	 delivery 0.06537204504606135
	 price 0.24019223070763074
	 beauty 0.0
1307
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1308
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.30151134457776363
1309
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1310
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1311
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
1312
	 quality 0.0
	 delivery 0.07856742013183861
	 price 0.0
	 beauty 0.10050378152592121
1313
	 quality 0.12909944487358058
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1314
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1315
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1316
	 quality 0.0
	 delivery 0.07453559924999299
	 price 0.0
	 beauty 0.09534625892455924
1317
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1318
	 quality 0.0
	 delivery 0.3535533905932738
	 price 0.0
	 beauty 0.0

	 price 0.16666666666666669
	 beauty 0.0
1438
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.31980107453341566
1439
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.12309149097933275
1440
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
1441
	 quality 0.0
	 delivery 0.15713484026367722
	 price 0.0
	 beauty 0.10050378152592121
1442
	 quality 0.09534625892455924
	 delivery 0.1421338109037403
	 price 0.17407765595569785
	 beauty 0.0
1443
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1444
	 quality 0.0
	 delivery 0.07856742013183861
	 price 0.0
	 beauty 0.10050378152592121
1445
	 quality 0.10540925533894598
	 delivery 0.07856742013183861
	 price 0.19245008972987526
	 beauty 0.0
1446
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1447
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1448
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1449
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1450
	 quality 0.19999999999

	 delivery 0.0
	 price 0.0
	 beauty 0.0
1571
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1572
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1573
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.13483997249264842
1574
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
1575
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1576
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.13483997249264842
1577
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1578
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1579
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1580
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.21320071635561041
1581
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1582
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1583
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.13483997249264842
1584
	 quality 0.

	 beauty 0.0
1705
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
1706
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1707
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1708
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1709
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1710
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.15075567228888181
1711
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.21320071635561041
1712
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1713
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1714
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1715
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1716
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
1717
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
1718
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
17

	 quality 0.0
	 delivery 0.0
	 price 0.15430334996209194
	 beauty 0.16116459280507606
1839
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1840
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1841
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1842
	 quality 0.0
	 delivery 0.0
	 price 0.09622504486493763
	 beauty 0.10050378152592121
1843
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1844
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
1845
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1846
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1847
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1848
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.17407765595569785
1849
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1850
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1851
	 quality 0.0
	 delivery 0.0
	 price 0.09128709291752768
	 beauty 0.09534

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1973
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1974
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1975
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1976
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
1977
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
1978
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1979
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1980
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1981
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
1982
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1983
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1984
	 quality 0.15811388300841897
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
1985
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1986
	 quality 0.0


	 price 0.0
	 beauty 0.0
2097
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2098
	 quality 0.09534625892455924
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.0
2099
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
2100
	 quality 0.09534625892455924
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.0
2101
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.12909944487358055
	 beauty 0.0
2102
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2103
	 quality 0.11952286093343936
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
2104
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2105
	 quality 0.28284271247461895
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2106
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2107
	 quality 0.11180339887498948
	 delivery 0.16666666666666666
	 price 0.20412414523193148
	 beauty 0.0
2108
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2109
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.2

	 price 0.12909944487358055
	 beauty 0.0
2226
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2227
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2228
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2229
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
2230
	 quality 0.08770580193070293
	 delivery 0.1307440900921227
	 price 0.16012815380508716
	 beauty 0.0
2231
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
2232
	 quality 0.0
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.11396057645963795
2233
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2234
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2235
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
2236
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2237
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
2238
	 quality 0.11180339887498948
	 delivery 0.0
	 price 0.1020

	 delivery 0.0
	 price 0.0
	 beauty 0.0
2357
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2358
	 quality 0.08451542547285165
	 delivery 0.18898223650461363
	 price 0.15430334996209194
	 beauty 0.16116459280507606
2359
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
2360
	 quality 0.08451542547285165
	 delivery 0.18898223650461363
	 price 0.23145502494313788
	 beauty 0.0
2361
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0.10206207261596574
	 beauty 0.0
2362
	 quality 0.0
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
2363
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2364
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
2365
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
2366
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
2367
	 quality 0.08770580193070293
	 delivery 0.1307440900921227
	 price 0.08006407690254358
	 beauty 0.0
2368
	 quality 0.09534625

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2489
	 quality 0.19999999999999996
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
2490
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.23570226039551587
	 beauty 0.0
2491
	 quality 0.19069251784911848
	 delivery 0.1421338109037403
	 price 0.17407765595569785
	 beauty 0.0
2492
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
2493
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2494
	 quality 0.0
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
2495
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.20412414523193148
	 beauty 0.10660035817780521
2496
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
2497
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.13483997249264842
2498
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
2499
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty

	 delivery 0.0
	 price 0.0
	 beauty 0.0
2621
	 quality 0.0
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
2622
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.0
2623
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
2624
	 quality 0.08451542547285165
	 delivery 0.06299407883487121
	 price 0.23145502494313788
	 beauty 0.08058229640253803
2625
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2626
	 quality 0.09999999999999998
	 delivery 0.223606797749979
	 price 0.09128709291752768
	 beauty 0.0
2627
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2628
	 quality 0.09999999999999998
	 delivery 0.0
	 price 0.09128709291752768
	 beauty 0.0
2629
	 quality 0.0
	 delivery 0.0
	 price 0.10206207261596574
	 beauty 0.0
2630
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.08058229640253803
2631
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2632
	 quality 0.0
	 delivery 0.0
	 price 0

	 beauty 0.0
2751
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.2279211529192759
2752
	 quality 0.11952286093343936
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.11396057645963795
2753
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2754
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
2755
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.15075567228888181
2756
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2757
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2758
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
2759
	 quality 0.0
	 delivery 0.2886751345948129
	 price 0.0
	 beauty 0.0
2760
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2761
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
2762
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
2763
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.0
2

	 price 0.14907119849998599
	 beauty 0.0
2882
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2883
	 quality 0.11180339887498948
	 delivery 0.0
	 price 0.0
	 beauty 0.10660035817780521
2884
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
2885
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2886
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
2887
	 quality 0.11952286093343936
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
2888
	 quality 0.14142135623730948
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
2889
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2890
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2891
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2892
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2893
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2894
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2895
	 quality 0.0

	 delivery 0.0
	 price 0.0
	 beauty 0.0
3012
	 quality 0.19069251784911848
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.09090909090909091
3013
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3014
	 quality 0.0
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.12309149097933275
3015
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3016
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
3017
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3018
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.17407765595569785
3019
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3020
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.17407765595569785
3021
	 quality 0.09128709291752768
	 delivery 0.06804138174397717
	 price 0.33333333333333337
	 beauty 0.0
3022
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
3023
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.26967994498529685
3024
	 quality 0.0
	 delivery 0.0
	 price 0.

	 price 0.08703882797784893
	 beauty 0.0
3140
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
3141
	 quality 0.11180339887498948
	 delivery 0.25
	 price 0.10206207261596574
	 beauty 0.0
3142
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
3143
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
3144
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
3145
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3146
	 quality 0.0
	 delivery 0.0
	 price 0.16012815380508716
	 beauty 0.0
3147
	 quality 0.0
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.13483997249264842
3148
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
3149
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
3150
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3151
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
3152
	 quality 0.0
	 delivery 0.1178511301977

	 beauty 0.0
3270
	 quality 0.0
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
3271
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
3272
	 quality 0.0
	 delivery 0.25
	 price 0.0
	 beauty 0.0
3273
	 quality 0.08770580193070293
	 delivery 0.06537204504606135
	 price 0.16012815380508716
	 beauty 0.0
3274
	 quality 0.0
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.0
3275
	 quality 0.10540925533894598
	 delivery 0.15713484026367722
	 price 0.0
	 beauty 0.10050378152592121
3276
	 quality 0.09534625892455924
	 delivery 0.21320071635561047
	 price 0.3481553119113957
	 beauty 0.0
3277
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
3278
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3279
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3280
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3281
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
3282
	 quality 0.0
	 delivery 0.0
	 price 0.1666666666666

	 price 0.0
	 beauty 0.17407765595569785
3390
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
3391
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.15075567228888181
3392
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3393
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.13483997249264842
3394
	 quality 0.0
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
3395
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3396
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3397
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3398
	 quality 0.0
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
3399
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
3400
	 quality 0.17541160386140586
	 delivery 0.19611613513818407
	 price 0.08006407690254358
	 beauty 0.0
3401
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3402
	 quality 0.0
	 delivery 0.117851130197

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3520
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
3521
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3522
	 quality 0.0
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.09090909090909091
3523
	 quality 0.09999999999999998
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3524
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.16666666666666669
	 beauty 0.0
3525
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
3526
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
3527
	 quality 0.10540925533894598
	 delivery 0.0
	 price 0.0
	 beauty 0.30151134457776363
3528
	 quality 0.0
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
3529
	 quality 0.09999999999999998
	 delivery 0.0
	 price 0.09128709291752768
	 beauty 0.0
3530
	 quality 0.08451542547285165
	 delivery 0.06299407883487121
	 price 0.07715167498104597
	 beauty 0.16116459280507606
3531
	 quality 0.0
	 delive

	 beauty 0.13483997249264842
3650
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3651
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.15075567228888181
3652
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3653
	 quality 0.18257418583505536
	 delivery 0.2721655269759087
	 price 0.08333333333333334
	 beauty 0.0
3654
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3655
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
3656
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3657
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3658
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3659
	 quality 0.12909944487358058
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.12309149097933275
3660
	 quality 0.14142135623730948
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
3661
	 quality 0.18257418583505536
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
3662
	 quality 0.15811388300841897
	 delivery 0.23570226

	 price 0.0
	 beauty 0.0
3783
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3784
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3785
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3786
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3787
	 quality 0.07905694150420949
	 delivery 0.0
	 price 0.0
	 beauty 0.15075567228888181
3788
	 quality 0.07669649888473704
	 delivery 0.1143323900950059
	 price 0.2100420126042015
	 beauty 0.07312724241271307
3789
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3790
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3791
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.13483997249264842
3792
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
3793
	 quality 0.14142135623730948
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
3794
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
3795
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3796
	 quality 0.3

	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
3916
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3917
	 quality 0.0
	 delivery 0.31426968052735443
	 price 0.0
	 beauty 0.10050378152592121
3918
	 quality 0.11180339887498948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3919
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3920
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
3921
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
3922
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
3923
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.11396057645963795
3924
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3925
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.21320071635561041
3926
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3927
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3928
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	

	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.2279211529192759
4048
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
4049
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4050
	 quality 0.19069251784911848
	 delivery 0.1421338109037403
	 price 0.08703882797784893
	 beauty 0.18181818181818182
4051
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4052
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
4053
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.30151134457776363
4054
	 quality 0.14142135623730948
	 delivery 0.21081851067789195
	 price 0.12909944487358055
	 beauty 0.0
4055
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4056
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.15075567228888181
4057
	 quality 0.28603877677367767
	 delivery 0.1421338109037403
	 price 0.17407765595569785
	 beauty 0.0
4058
	 quality 0.0
	 delivery 0.0
	 price 0.23570226039

	 price 0.0
	 beauty 0.0
4179
	 quality 0.0
	 delivery 0.12171612389003691
	 price 0.14907119849998599
	 beauty 0.0
4180
	 quality 0.22360679774997896
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4181
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4182
	 quality 0.0
	 delivery 0.18898223650461363
	 price 0.0
	 beauty 0.0
4183
	 quality 0.15339299776947407
	 delivery 0.1143323900950059
	 price 0.140028008402801
	 beauty 0.07312724241271307
4184
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4185
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0.20412414523193148
	 beauty 0.10660035817780521
4186
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
4187
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
4188
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
4189
	 quality 0.0
	 delivery 0.0
	 price 0.10206207261596574
	 beauty 0.0
4190
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4191
	 quality 

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4307
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4308
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.15075567228888181
4309
	 quality 0.0
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.18181818181818182
4310
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
4311
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20100756305184242
4312
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4313
	 quality 0.0
	 delivery 0.2672612419124244
	 price 0.1091089451179962
	 beauty 0.0
4314
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4315
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4316
	 quality 0.0
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
4317
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
4318
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
4319
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 b

	 beauty 0.0
4436
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
4437
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4438
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4439
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4440
	 quality 0.09128709291752768
	 delivery 0.06804138174397717
	 price 0.16666666666666669
	 beauty 0.0
4441
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.0
4442
	 quality 0.0
	 delivery 0.05270462766947299
	 price 0.06454972243679027
	 beauty 0.20225995873897262
4443
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4444
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
4445
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4446
	 quality 0.0
	 delivery 0.14907119849998599
	 price 0.0
	 beauty 0.0
4447
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4448
	 quality 0.0
	 delivery 0.166666666

	 price 0.20412414523193148
	 beauty 0.0
4568
	 quality 0.0
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
4569
	 quality 0.08770580193070293
	 delivery 0.1307440900921227
	 price 0.08006407690254358
	 beauty 0.0
4570
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.11396057645963795
4571
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4572
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4573
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4574
	 quality 0.0
	 delivery 0.0
	 price 0.10206207261596574
	 beauty 0.0
4575
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.09090909090909091
4576
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4577
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4578
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.2182178902359924
	 beauty 0.0
4579
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.14433756729740646
	 beauty 0.0
4580
	 quality 0.12909944487358

	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4682
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4683
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
4684
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.2886751345948129
	 beauty 0.0
4685
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4686
	 quality 0.11952286093343936
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
4687
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4688
	 quality 0.15811388300841897
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4689
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4690
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4691
	 quality 0.14142135623730948
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4692
	 quality 0.11952286093343936
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4693
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4694
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	

In [19]:
def max_topic_finder(quality1,delivery1,price1,beauty1):
    max_val = max(quality1,delivery1,price1,beauty1)
    if max_val == quality1:
        return 'quality'
    elif max_val == delivery1:
        return 'delivery'
    elif max_val == price1:
        return 'price'
    elif max_val == beauty1:
        return 'beauty'
    
reviews['major_topic'] = reviews.apply(lambda row:max_topic_finder(row['quality'],
                                                                   row['delivery'],row['price'],row['beauty']),axis=1)

In [20]:
i=163
reviews.comments.iloc[i]

'Products that are not shipped are notified to customers'

In [21]:
reviews.tokens.iloc[i]

['product', 'shipped', 'notified', 'customer']

In [22]:
reviews[(reviews.quality==0) & (reviews.delivery==0) & (reviews.price==0) & (reviews.beauty==0)].shape,reviews.shape

((1640, 8), (4990, 8))

In [23]:
unmatched = reviews[(reviews.quality==0) & (reviews.delivery==0) & (reviews.price==0) & (reviews.beauty==0)]
for c in unmatched.comments:
    print('--------------------------------------------------------------------------------------')
    print(c)

--------------------------------------------------------------------------------------
The size is not as large as it is down
--------------------------------------------------------------------------------------
The bag line is contemplated
--------------------------------------------------------------------------------------
Okay
--------------------------------------------------------------------------------------
I like it very much
--------------------------------------------------------------------------------------
This bag is like a shoulder bag
--------------------------------------------------------------------------------------
Very nice, but a little crumbling
--------------------------------------------------------------------------------------
But the leaves are a little small If compared to green, the wallet that is put in the black leaf again can be seen that the bag size is quite small
------------------------------------------------------------------------------------

Fast delivery, good product packaging is good, product quality is sufficient, suitable for affordable price
--------------------------------------------------------------------------------------
Yes, but the baby is a little small at this price
--------------------------------------------------------------------------------------
Good speed, good packaging
--------------------------------------------------------------------------------------
Not ok
--------------------------------------------------------------------------------------
Can not complete the item, contact the shop, there is no response to any
--------------------------------------------------------------------------------------
Very good product, good fabric, affordable price
--------------------------------------------------------------------------------------
Quality based on price
--------------------------------------------------------------------------------------
Beautiful color, can be obtained at the price of 8 bah

# LDA for seperate category

In [24]:
# Usual imports
import numpy as np
import pandas as pd
from tqdm import tqdm,tqdm_notebook
import string
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE
import concurrent.futures
import time
import pyLDAvis.sklearn
from pylab import bone, pcolor, colorbar, plot, show, rcParams, savefig
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
import os
print(os.listdir("../data"))

# Plotly based imports for visualization
from plotly import tools
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff

# spaCy based imports
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English
import bz2
import re

#!python -m spacy download en_core_web_lg

['.ipynb_checkpoints', 'Bag_Reviews.xlsx', 'for_w2v', 'models', 'productReviewShopee_1.csv']


### loading spacy

In [25]:
# Creating a spaCy object
nlp = spacy.load('en_core_web_lg')

punctuations = string.punctuation
stopwords = list(STOP_WORDS)

# Parser for reviews
parser = English()
def spacy_tokenizer(sentence):
    mytokens = parser(sentence)
    mytokens = [ word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in mytokens ]
    mytokens = [ word for word in mytokens if word not in stopwords and word not in punctuations ]
    mytokens = " ".join([i for i in mytokens])
    return mytokens

# Functions for printing keywords for each topic
def selected_topics(model, vectorizer, top_n=10):
    topics = []
    for idx, topic in enumerate(model.components_):
        #print("Topic %d:" % (idx))
        topic_keys = [(vectorizer.get_feature_names()[i], topic[i]) for i in topic.argsort()[:-top_n - 0:-1]]
        topics.append((idx,topic_keys))
        #print([(vectorizer.get_feature_names()[i], topic[i]) for i in topic.argsort()[:-top_n - 0:-1]]) 
    return topics

##################################################################################################

def lda_finder(reviews_ms,NUM_TOPICS):
    
    # reviews_ms=reviews_ms.drop_duplicates()

    reviews_ms.comments=reviews_ms.comments.astype(str)
    reviews_ms['len_review']=reviews_ms.comments.apply(len)
    reviews_ms.comments=reviews_ms.comments.apply(lambda x: x.replace('👍','good '))

    s_limit=50
    max_limit=1300
    reviews=reviews_ms.loc[(reviews_ms.len_review>=s_limit) & (reviews_ms.len_review<max_limit),:]


    ## lemmetization, stopword remove, punctuation remove etc
    tqdm.pandas()
    reviews["processed_description"] = reviews["comments"].progress_apply(spacy_tokenizer)
    #reviews["processed_description"] = reviews["comments"].apply(spacy_tokenizer)

    # Creating a vectorizer
    vectorizer = CountVectorizer(min_df=0.005, max_df=0.85, stop_words='english', lowercase=True, token_pattern='[a-zA-Z\-][a-zA-Z\-]{2,}')
    data_vectorized = vectorizer.fit_transform(reviews["processed_description"])

    #NUM_TOPICS = 4
    
    SOME_FIXED_SEED = 46

    # before training/inference:
    np.random.seed(SOME_FIXED_SEED)
    
    # Latent Dirichlet Allocation Model
    #lda = LatentDirichletAllocation(n_components=NUM_TOPICS, max_iter=50, learning_method='online',verbose=True)
    lda = LatentDirichletAllocation(n_components=NUM_TOPICS, max_iter=50, learning_method='batch',verbose=False)#,random_state=1)
    data_lda = lda.fit_transform(data_vectorized)

    # Keywords for topics clustered by Latent Dirichlet Allocation
    #print("LDA Model:")
    topics_lda = selected_topics(lda, vectorizer)

    ## topics df with its words - distribution df
    topics_lda_df  = pd.DataFrame()
    i1 = [ t[0] for t in topics_lda]
    i2 = []
    for t in topics_lda:
        for t1 in t[1]:
            i2.append(t1[0])

    topics_lda_df['topic'] = i1
    for i in i2:
        topics_lda_df[i] = 0.0

    for i,t in enumerate(topics_lda):
        for t1 in t[1]:
            topics_lda_df.loc[topics_lda_df.topic==i,t1[0]]=t1[1]

    ## topic precentage in all reviews
    reviews_test_lda = reviews_ms.copy()#[(reviews_ms.len_review>=max_limit) | (reviews_ms.len_review<s_limit)]
    reviews_test_lda['index1'] = range(len(reviews_test_lda))

    dominent_topic_list = []
    topic_detail = pd.DataFrame()
    topic_detail['index1'] = reviews_test_lda.index1
    for i in range(0,NUM_TOPICS):
        topic_detail['topic_'+str(i)+'_perc'] = 0.0


    for i in tqdm(range(len(reviews_test_lda))):
        text = reviews_test_lda.comments.iloc[i]
        x = lda.transform(vectorizer.transform([text]))[0]
        y = pd.Series(x)
        for k in range(len(y)):
            topic_detail.loc[topic_detail.index1==i,'topic_'+str(k)+'_perc'] = y[k]
        y1 = y[y==max(y)].index[0]
        dominent_topic_list.append(y1)

    reviews_test_lda['dominent_topic'] = dominent_topic_list
    reviews_test_lda = reviews_test_lda.merge(topic_detail,on='index1',how='left')
    del reviews_test_lda['index1']

    return lda, topics_lda, topics_lda_df, reviews_test_lda, data_vectorized, vectorizer

In [26]:
category = 'quality'
temp = reviews.sort_values(by=category,ascending=False)
temp = temp[temp[category]!=0]
temp = reviews[reviews['major_topic']==category]
lda, topics_lda, topics_lda_df, reviews_test_lda, data_vectorized, vectorizer = lda_finder(temp,NUM_TOPICS=4)

100%|██████████| 914/914 [00:00<00:00, 5533.64it/s]
100%|██████████| 2344/2344 [00:13<00:00, 178.75it/s]


In [27]:
topics_lda

[(0,
  [('bag', 128.22484287936481),
   ('color', 103.21601532965674),
   ('beautiful', 81.15784810319288),
   ('small', 63.242477534603324),
   ('cute', 56.373709115061274),
   ('little', 50.22960478897968),
   ('okay', 46.222630344068214),
   ('suitable', 37.57740845340906),
   ('look', 35.22284413251581)]),
 (1,
  [('price', 292.8284807139705),
   ('product', 134.1292857230933),
   ('quality', 97.87685622869749),
   ('cheap', 80.36496531503383),
   ('buy', 71.22563598463317),
   ('baht', 68.23079341561339),
   ('worth', 53.224493660102134),
   ('like', 41.46116219782839),
   ('free', 36.228226580472814)]),
 (2,
  [('order', 158.2262418496034),
   ('product', 90.07712208383339),
   ('send', 55.2270075779995),
   ('shop', 40.89193931823177),
   ('time', 39.22762720350515),
   ('reed', 38.23911173844514),
   ('item', 34.227946525944176),
   ('receive', 31.231053800523693),
   ('received', 31.22682710437803)]),
 (3,
  [('good', 819.8709631795583),
   ('delivery', 301.2100375323301),
   

## Sentimental anlysis using TextBlob

In [29]:
reviews = reviews.dropna()

In [30]:
import re 
from textblob import TextBlob 

def clean_sentence(sentence): 
    ''' 
    Utility function to clean tweet text by removing links, special characters 
    using simple regex statements. 
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", sentence).split()) 

def get_sentence_sentiment(sentence): 
    ''' 
    Utility function to classify sentiment of passed tweet 
    using textblob's sentiment method 
    '''
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_sentence(sentence)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 'positive'
    elif analysis.sentiment.polarity == 0: 
        return 'neutral'
    else: 
        return 'negative'
    
senti = []
for i in tqdm_notebook(range(len(reviews))):
    senti.append(get_sentence_sentiment(reviews.comments.iloc[i]))

reviews['sentiment_pred'] = senti

HBox(children=(IntProgress(value=0, max=4765), HTML(value='')))




## plotting

In [31]:
from tqdm import tqdm
from bokeh.io import output_notebook,show, push_notebook
from bokeh.layouts import widgetbox,column,row
from bokeh.models.widgets import Dropdown,PreText, Select

from bokeh.models import HoverTool
from bokeh.plotting import curdoc,figure
from ipywidgets import interact

from bokeh.models import ColumnDataSource

from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import matplotlib.colors as mcolors


output_notebook()

In [32]:

def word_plot1(topics):
    cols = [color for name, color in mcolors.TABLEAU_COLORS.items()]  # more colors: 'mcolors.XKCD_COLORS'

    cloud = WordCloud(stopwords=STOP_WORDS,
                      background_color='white',
                      width=2500,
                      height=1800,
                      max_words=10,
                      colormap='tab10',
                      color_func=lambda *args, **kwargs: cols[i],
                      prefer_horizontal=1.0)

    #topics = lda.show_topics(formatted=False)

    fig, axes = plt.subplots(1, 4, figsize=(14,14), sharex=True, sharey=True)

    for i, ax in enumerate(axes.flatten()):
        fig.add_subplot(ax)
        topic_words = dict(topics[i][1])
        cloud.generate_from_frequencies(topic_words, max_font_size=300)
        plt.gca().imshow(cloud)
        plt.gca().set_title('Topic ' + str(i), fontdict=dict(size=16))
        plt.gca().axis('off')


    plt.subplots_adjust(wspace=0, hspace=0)
    plt.axis('off')
    plt.margins(x=0, y=0)
    plt.tight_layout()
    plt.show(fig)
    


In [33]:

menu = ['quality','price','beauty','delivery']
menu = menu+['all']

sentiment = ['all','negative','positive','neutral']
topic = ['all','0','1','2','3']

def update(category,sentiment):
    
    if category=='all':
        reviews_ms1 = reviews
    else:
        reviews_ms1 = reviews[reviews['major_topic']==category]
        
    if sentiment != 'all':
        reviews_ms1 = reviews_ms1[reviews_ms1.sentiment_pred==sentiment]
    
    lda1,topics_lda1,topics_lda_df1,reviews_test_lda1,data_vectorized1,vectorizer1 = lda_finder(reviews_ms1,NUM_TOPICS=4)
    
    topic_distribution = pd.DataFrame(reviews_test_lda1.dominent_topic.value_counts()).reset_index()
    topic_distribution.columns = ['index','count']
    
    reviews_test_lda1 = reviews_test_lda1.sort_values(by=category,ascending=False)
    reviews_test_lda1.to_csv('temp.csv',index=False)
    
    push_notebook()
    
    plot_all(category,topic_distribution,topics_lda1)
    #plot_rows(reviews_test_lda1,category)

def update2(topic):
    temp = pd.read_csv('temp.csv')
    if topic!='all':
        temp = temp[temp.dominent_topic==int(topic)]
    
    plot_rows(temp,category)    


def plot_all(category,topic_distribution,topics_lda1):

    p1 = figure(width=500, height=350, x_axis_type="linear",title="Topic distribution of "+category)#, y_range=[0, max_price+10])

    r_aapl = p1.vbar('index',.5 ,'count', source=topic_distribution, color='navy',  alpha=0.5)
    
    z = row([p1])
    show(z)
    word_plot1(topics_lda1)
    
def plot_rows(reviews_ms1,category):
    print('--------------------------------------------------------------------------------')
    print('------------------------------ reviews -----------------------------------------')
    print('--------------------------------------------------------------------------------')
    #reviews_ms1 = reviews_ms1.sort_values(by=category,ascending=False)
    for i in range(10):
        print(reviews_ms1.comments.iloc[i])
        
        print('--------------------------------------------------------------------------------')

In [34]:
from bokeh.plotting import curdoc,figure
mm = curdoc()
mm.remove_on_change()

In [35]:
interact(update,category = menu,sentiment=sentiment)
interact(update2,topic = topic)

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

interactive(children=(Dropdown(description='topic', options=('all', '0', '1', '2', '3'), value='all'), Output(…

<function __main__.update2(topic)>

# Using W2V

In [77]:
from nltk.corpus import wordnet
import nltk
synonyms = []
antonyms = []

for syn in wordnet.synsets("price"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
             antonyms.append(l.antonyms()[0].name())

print(set(synonyms))
print(set(antonyms))

{'cost', 'damage', 'Leontyne_Price', 'price', 'Mary_Leontyne_Price', 'Price', 'monetary_value', 'toll', 'terms'}
set()


In [101]:
words = []
for i,w in enumerate(wordnet.words()):
    words.append(w)
len(words)

147306

In [100]:
words = pd.Series(words)
words[words=='price']

107779    price
dtype: object

In [78]:
nltk.edit_distance("quality", "price")

6

In [282]:
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
# model = Word2Vec([words], size=100, window=5, min_count=1, workers=4)
model.train(list(reviews.tokens), total_examples=1, epochs=10)
model.wv.similarity('quality','price')

0.9999867

In [283]:
model.wv.most_similar('price',topn=15)

[('product', 0.9999924898147583),
 ('good', 0.9999887943267822),
 ('quality', 0.9999867081642151),
 ('very', 0.9999840259552002),
 ('beautiful', 0.9999840259552002),
 ('color', 0.9999831318855286),
 ('delivery', 0.9999828338623047),
 ('value', 0.9999825954437256),
 ('bag', 0.9999812841415405),
 ('but', 0.9999799728393555),
 ('order', 0.9999773502349854),
 ('cheap', 0.9999753832817078),
 ('cute', 0.9999744296073914),
 ('shop', 0.9999696016311646),
 ('service', 0.9999694228172302)]

In [284]:
model.wv.most_similar('beautiful',topn=15)

[('bag', 0.9999854564666748),
 ('price', 0.9999839663505554),
 ('color', 0.9999830722808838),
 ('good', 0.9999821186065674),
 ('cute', 0.9999798536300659),
 ('product', 0.9999792575836182),
 ('delivery', 0.9999788999557495),
 ('but', 0.9999773502349854),
 ('quality', 0.9999747276306152),
 ('cheap', 0.9999735355377197),
 ('order', 0.9999735355377197),
 ('very', 0.9999725818634033),
 ('value', 0.9999707937240601),
 ('shop', 0.9999672174453735),
 ('ordered', 0.9999661445617676)]

In [285]:
model.wv.most_similar('quality',topn=15)

[('good', 0.9999903440475464),
 ('product', 0.999988317489624),
 ('price', 0.9999867081642151),
 ('value', 0.9999830722808838),
 ('very', 0.9999808073043823),
 ('delivery', 0.9999769926071167),
 ('but', 0.999976634979248),
 ('cheap', 0.9999760389328003),
 ('beautiful', 0.9999747276306152),
 ('color', 0.9999745488166809),
 ('bag', 0.9999722242355347),
 ('ordered', 0.9999699592590332),
 ('shop', 0.9999688863754272),
 ('order', 0.9999688267707825),
 ('cute', 0.9999676942825317)]

In [286]:
model.wv.most_similar('delivery',topn=15)

[('good', 0.9999875426292419),
 ('product', 0.999984860420227),
 ('price', 0.9999827742576599),
 ('color', 0.9999796152114868),
 ('but', 0.9999791383743286),
 ('beautiful', 0.99997878074646),
 ('bag', 0.999978244304657),
 ('very', 0.9999774098396301),
 ('quality', 0.9999768733978271),
 ('value', 0.9999756217002869),
 ('cheap', 0.9999735355377197),
 ('fast', 0.999971330165863),
 ('order', 0.9999712705612183),
 ('ordered', 0.9999712705612183),
 ('cute', 0.9999691247940063)]

In [287]:
model.wv.similarity('beautiful','color')

0.999983

In [19]:
def find_related(text,maxn):
    return [val[0] for val in model.wv.most_similar(text,topn=maxn)]

delivery_related = find_related('delivery',15)+['delivery']
price_related = find_related('price',15)+['price']
beauty_related = find_related('beautiful',15)+['beautiful']
quality_related = find_related('quality',15)+['quality']


In [20]:
def syno_anto_finder(text):
    synonyms = []
    antonyms = []

    for syn in wordnet.synsets(text):
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                 antonyms.append(l.antonyms()[0].name())

    return list(synonyms),list(antonyms)

a = syno_anto_finder('quality')

In [21]:
quality = [1 if token in quality_related else 0 for token in vocab]
delivery = [1 if token in delivery_related else 0 for token in vocab]
price = [1 if token in price_related else 0 for token in vocab]
beauty = [1 if token in beauty_related else 0 for token in vocab]

In [22]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [23]:
#reviews['tokens'] = reviews.comments.apply(clean_doc)
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors_new'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [36]:
name

'beauty'

In [34]:
cos_sim(s_vec, topic_vec)

0.0

In [37]:
reviews['quality']  = 0.0
reviews['delivery'] = 0.0
reviews['price']    = 0.0
reviews['beauty']   = 0.0

cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors_new):
    print(s_num)
    for name, topic_vec in topics.items():
        similarity = cos_sim(s_vec, topic_vec)
        reviews.loc[s_num,name] = similarity
        print('\t', name, similarity)

In [40]:
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new,quality,delivery,price,beauty
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.094491,0.094491,0.094491,0.0
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0.0,0.0,0.060634
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.06455,0.06455,0.0,0.0
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.072169,0.072169,0.072169,0.0
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.188982,0.188982,0.188982,0.047246


In [49]:
delivery_related

['fast',
 'speed',
 'service',
 'product',
 'the',
 'provided',
 'quality',
 'money',
 'company',
 'transportation',
 'good',
 'value',
 'very',
 'price',
 'shipping',
 'delivery']

## plotting

In [41]:
from bokeh.io import output_notebook,show, push_notebook
from bokeh.layouts import widgetbox,column,row
from bokeh.models.widgets import Dropdown,PreText, Select

from bokeh.models import HoverTool
from bokeh.plotting import curdoc,figure
from ipywidgets import interact

from bokeh.models import ColumnDataSource

output_notebook()

In [43]:

menu = ['quality','price','beauty','delivery']
menu = menu+['all']

def update(category):
    
    if category=='all':
        reviews_ms1 = reviews.iloc[0:10]
    else:
        reviews_ms1 = reviews.copy()
        reviews_ms1 = reviews_ms1.sort_values(by=category,ascending=False)
        reviews_ms1 = reviews_ms1.iloc[0:10]
    
    push_notebook()
    
    plot_all(reviews_ms1)

    
def plot_all(reviews_ms1):
    print('--------------------------------------------------------------------------------')
    print('------------------------------ reviews -----------------------------------------')
    print('--------------------------------------------------------------------------------')
    for i in range(len(reviews_ms1)):
        print(reviews_ms1.comments.iloc[i])
        
        print('--------------------------------------------------------------------------------')

In [47]:
interact(update,category = menu)

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

<function __main__.update(category)>

In [99]:
p = figure(x_range=(0,10), y_range=(0,10))
p.image_url(url=['../CRM_bokeh_app/temp.png'],x=0,y=1,w=30,h=10)
show(p)

In [97]:
from bokeh.models.widgets import Div
footer_text = """
<div >
<img src="../CRM_bokeh_app/temp.jpg" />
</div>
"""
div_footer = Div(text=footer_text,width=900,height=200)
show(div_footer)