# Topic detection based on certain given keywords

Here we doing this using static keyword's cosine similarity with sentences

In [205]:
from numpy import dot
from numpy.linalg import norm
from itertools import chain

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import string
from collections import Counter

# spaCy based imports
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

from sklearn.feature_extraction.text import CountVectorizer

from nltk.stem import WordNetLemmatizer 

## read df

In [289]:
reviews = pd.read_excel('../data/Bag_Reviews.xlsx')
reviews = reviews[['rating','comments']]
reviews = reviews.drop_duplicates()
reviews.comments=reviews.comments.apply(lambda x: x.replace('👍','good '))
reviews['sentiment'] = 'neutral'
reviews.loc[(reviews['rating']==1) | (reviews['rating']==2),'sentiment'] = 'negative'
reviews.loc[(reviews['rating']==4) | (reviews['rating']==5),'sentiment'] = 'positive'

reviews.head()

Unnamed: 0,rating,comments,sentiment
0,4,Give 4 stars because order at the price 37 but...,positive
1,5,Ordered at a discount of 10 baht per piece. It...,positive
2,5,"Small, cute, compact, good But the sash looks ...",positive
3,1,The size is not as large as it is down. The st...,negative
4,1,The product is compared to the price. Okay. Se...,negative


## Sentencing the full reviews

In [290]:
full_comments = []
for i in tqdm(range(len(reviews))):
    c = reviews.comments.iloc[i]
    c = c.split('.')
    full_comments = full_comments+c

100%|██████████| 2677/2677 [00:00<00:00, 32527.27it/s]


In [291]:
full_comments = pd.Series(full_comments).drop_duplicates()
full_comments

0        Give 4 stars because order at the price 37 but...
1               Ordered at a discount of 10 baht per piece
2         It is worth noting that the leaves are very s...
3         Adjustable cable length Suitable for wearing ...
4                                                         
5        Small, cute, compact, good But the sash looks ...
6                   The size is not as large as it is down
7                                  The stitching was wrong
8                             The bag line is contemplated
9                 No, why are you like this? Lost 8 orders
10                    The product is compared to the price
11                                                    Okay
12                                          Send it slowly
13        But the bad thing is that the shop sent the w...
14        And the item was not fully received at the sh...
16       Beautiful work Sewing Good compact Suitable fo...
17                                     I like it very mu

In [292]:
reviews_old = reviews.copy()


In [293]:
def stripper(s): return s.strip()

reviews = pd.DataFrame()
reviews['comments'] = full_comments
reviews['comments'] = reviews['comments'].apply(stripper)
reviews = reviews[reviews.comments!='']
reviews.head()

Unnamed: 0,comments
0,Give 4 stars because order at the price 37 but...
1,Ordered at a discount of 10 baht per piece
2,It is worth noting that the leaves are very small
3,Adjustable cable length Suitable for wearing s...
5,"Small, cute, compact, good But the sash looks ..."


In [294]:
reviews.shape

(5224, 1)

In [295]:
punctuations = string.punctuation
stopwords = list(STOP_WORDS)
lemmatizer = WordNetLemmatizer() 

# turn a doc into clean tokens
def clean_doc(doc):
    doc = doc.strip()
    # split into tokens by white space
    tokens = doc.split()
    # remove punctuation from each token
    table = str.maketrans('', '', punctuations)
    tokens = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # filter out stop words
    tokens = [w for w in tokens if not w in stopwords]
    # filter out short tokens
    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens if len(word) > 1]
    
    return tokens
    

In [354]:
import sys


In [296]:
lemmatizer.lemmatize('broken',pos='n')

'broken'

In [297]:
vocab = sorted(set(token.lower() for token in chain(*list(map(clean_doc, reviews.comments)))))


In [298]:
quality = [1 if token in ['quality','size','small','smaller','large','larger','stitching','service','suitable','good','bad','weak','strong','defective','comfortable','broken','damaged'] else 0 for token in vocab]
delivery = [1 if token in ['delivery','deliver','delivered','fast','slow','slowly','late','shipping','shipped','send','received','receive','ship','shop','arrived','order','ordered','store'] else 0 for token in vocab]
price = [1 if token in ['price','cheap','money','discount','offer','value','worth','purchase','refund','expensive','worthwhile','affordable'] else 0 for token in vocab]
beauty = [1 if token in ['beautiful','color','beauty','cute','shape','style','love','luxury','luxurious','looks','attractive','unattractive','impressive','picture'] else 0 for token in vocab]

In [299]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [300]:
reviews['tokens'] = reviews.comments.apply(clean_doc)
reviews = reviews[reviews.tokens.apply(len)!=0]
reviews.index = range(len(reviews))
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,comments,tokens,vectors
0,Give 4 stars because order at the price 37 but...,"[give, star, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Ordered at a discount of 10 baht per piece,"[ordered, discount, baht, piece]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,It is worth noting that the leaves are very small,"[it, worth, noting, leaf, small]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Adjustable cable length Suitable for wearing s...,"[adjustable, cable, length, suitable, wearing,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"Small, cute, compact, good But the sash looks ...","[small, cute, compact, good, but, sash, look, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [301]:
reviews['quality']  = 0.0
reviews['delivery'] = 0.0
reviews['price']    = 0.0
reviews['beauty']   = 0.0

cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors):
    print(s_num)
    for name, topic_vec in topics.items():
        similarity = cos_sim(s_vec, topic_vec)
        reviews.loc[s_num,name] = similarity
        print('\t', name, similarity)

0
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
1
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.14433756729740646
	 beauty 0.0
2
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
3
	 quality 0.08574929257125441
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4
	 quality 0.12524485821702988
	 delivery 0.0
	 price 0.0
	 beauty 0.07453559924999299
5
	 quality 0.280056016805602
	 delivery 0.0
	 price 0.0
	 beauty 0.0
6
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
7
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
8
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
9
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
10
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
11
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
12
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.1091089451179962
13
	 q

	 beauty 0.0
106
	 quality 0.10846522890932808
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
107
	 quality 0.0
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
108
	 quality 0.10846522890932808
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
109
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
110
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
111
	 quality 0.20180183819889375
	 delivery 0.1307440900921227
	 price 0.08006407690254358
	 beauty 0.0
112
	 quality 0.06482037235521644
	 delivery 0.18898223650461363
	 price 0.23145502494313788
	 beauty 0.07715167498104597
113
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
114
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
115
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
116
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
117
	 quality 0.2970442628930023
	 delivery 0.0
	 price 0.0
	 beauty 0.23570226039551587
118
	 quality 0.2970442628930023
	 de

	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.20412414523193148
	 beauty 0.0
225
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
226
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
227
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
228
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
229
	 quality 0.0
	 delivery 0.0
	 price 0.18257418583505536
	 beauty 0.0
230
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.2182178902359924
231
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.12909944487358055
232
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
233
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
234
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
235
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
236
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
237
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 

	 delivery 0.1421338109037403
	 price 0.08703882797784893
	 beauty 0.0
343
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
344
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
345
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
346
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
347
	 quality 0.14625448482542613
	 delivery 0.21320071635561047
	 price 0.08703882797784893
	 beauty 0.0
348
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
349
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
350
	 quality 0.0
	 delivery 0.08333333333333333
	 price 0.0
	 beauty 0.10206207261596574
351
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
352
	 quality 0.25724787771376323
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.10206207261596574
353
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.1091089451179962
	 beauty 0.0
354
	 

	 quality 0.0700140042014005
	 delivery 0.06804138174397717
	 price 0.0
	 beauty 0.0
454
	 quality 0.0700140042014005
	 delivery 0.06804138174397717
	 price 0.0
	 beauty 0.0
455
	 quality 0.25048971643405976
	 delivery 0.0
	 price 0.07453559924999299
	 beauty 0.07453559924999299
456
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.12909944487358055
	 beauty 0.0
457
	 quality 0.19802950859533489
	 delivery 0.09622504486493764
	 price 0.23570226039551587
	 beauty 0.0
458
	 quality 0.140028008402801
	 delivery 0.13608276348795434
	 price 0.25000000000000006
	 beauty 0.0
459
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
460
	 quality 0.09166984970282112
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.0
461
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
462
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
463
	 quality 0.09166984970282112
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
464
	 quality 0.0
	 delivery 0.07106

	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
575
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
576
	 quality 0.09901475429766744
	 delivery 0.0
	 price 0.0
	 beauty 0.11785113019775793
577
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
578
	 quality 0.0
	 delivery 0.1781741612749496
	 price 0.1091089451179962
	 beauty 0.0
579
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
580
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
581
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
582
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
583
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
584
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.11785113019775793
	 beauty 0.0
585
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
586
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
587
	 quality 0

	 price 0.09622504486493763
	 beauty 0.09622504486493763
691
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
692
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
693
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
694
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
695
	 quality 0.0
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.2581988897471611
696
	 quality 0.280056016805602
	 delivery 0.0
	 price 0.0
	 beauty 0.0
697
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
698
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.0
699
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
700
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
701
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
702
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.0
	 beauty 0.0
703
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337

	 beauty 0.0
827
	 quality 0.0
	 delivery 0.15713484026367722
	 price 0.19245008972987526
	 beauty 0.19245008972987526
828
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
829
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
830
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
831
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
832
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
833
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
834
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.33333333333333337
835
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.1091089451179962
836
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
837
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
838
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
839
	 quality 0.0
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
840
	 quality 0.0
	 delivery

	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
941
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
942
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
943
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
944
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
945
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
946
	 quality 0.25724787771376323
	 delivery 0.16666666666666666
	 price 0.10206207261596574
	 beauty 0.0
947
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
948
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
949
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.2182178902359924
	 beauty 0.0
950
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
951
	 quality 0.09901475429766744
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
952
	 quality 0.0
	 delivery 0.0
	 price 0.2886751345948

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1067
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1068
	 quality 0.21693045781865616
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
1069
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1070
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
1071
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.08333333333333334
1072
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
1073
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1074
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1075
	 quality 0.140028008402801
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
1076
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1077
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
1078
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.27386127

	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.20412414523193148
1184
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1185
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.12909944487358055
1186
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1187
	 quality 0.0
	 delivery 0.07453559924999299
	 price 0.09128709291752768
	 beauty 0.09128709291752768
1188
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1189
	 quality 0.18786728732554484
	 delivery 0.3042903097250923
	 price 0.07453559924999299
	 beauty 0.0
1190
	 quality 0.0
	 delivery 0.0
	 price 0.09622504486493763
	 beauty 0.0
1191
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1192
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1193
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
1194
	 quality 0.07312724241271307
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.08703882797784893
1195
	 qua

	 quality 0.08084520834544433
	 delivery 0.0
	 price 0.19245008972987526
	 beauty 0.0
1304
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1305
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1306
	 quality 0.15339299776947407
	 delivery 0.0
	 price 0.09128709291752768
	 beauty 0.09128709291752768
1307
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
1308
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
1309
	 quality 0.19802950859533489
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1310
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1311
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1312
	 quality 0.0
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.12909944487358055
1313
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1314
	 quality 0.18333969940564224
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
1315
	 quality 0.0
	 delivery 

	 price 0.0
	 beauty 0.0
1418
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1419
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
1420
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1421
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1422
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2581988897471611
1423
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
1424
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1425
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1426
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1427
	 quality 0.08084520834544433
	 delivery 0.0
	 price 0.09622504486493763
	 beauty 0.0
1428
	 quality 0.280056016805602
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
1429
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
1430
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1431

	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1539
	 quality 0.14625448482542613
	 delivery 0.0
	 price 0.17407765595569785
	 beauty 0.0
1540
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1541
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1542
	 quality 0.0
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
1543
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.2182178902359924
	 beauty 0.1091089451179962
1544
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.10206207261596574
	 beauty 0.0
1545
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
1546
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1547
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1548
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1549
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1550
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1551
	 quality 0.24253562503633297


	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1655
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1656
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1657
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
1658
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
1659
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1660
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.12909944487358055
1661
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
1662
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1663
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1664
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1665
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1666
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.0
	 beauty 0.0
16

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1778
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
1779
	 quality 0.08084520834544433
	 delivery 0.0
	 price 0.0
	 beauty 0.09622504486493763
1780
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1781
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1782
	 quality 0.1143323900950059
	 delivery 0.11111111111111113
	 price 0.13608276348795434
	 beauty 0.0
1783
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1784
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1785
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
1786
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
1787
	 quality 0.19802950859533489
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1788
	 quality 0.0700140042014005
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
1789
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0


	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1894
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1895
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1896
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1897
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.2581988897471611
1898
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
1899
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.14433756729740646
1900
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
1901
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1902
	 quality 0.20180183819889375
	 delivery 0.19611613513818407
	 price 0.08006407690254358
	 beauty 0.0
1903
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1904
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
1905
	 quality 0.04950737714883372
	 delivery 0.0
	 price 0.0
	 beauty 0.05892556509887897
1906
	 quality 0.0
	 delivery 0

	 quality 0.15339299776947407
	 delivery 0.0
	 price 0.18257418583505536
	 beauty 0.0
2012
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.1091089451179962
2013
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2014
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
2015
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.14433756729740646
2016
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
2017
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2018
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2019
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2020
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.09622504486493763
2021
	 quality 0.0700140042014005
	 delivery 0.06804138174397717
	 price 0.25000000000000006
	 beauty 0.0
2022
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
2023
	 quality 0.19802950859533489
	 deliver

	 beauty 0.09622504486493763
2129
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2130
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2131
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2132
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2133
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2134
	 quality 0.08084520834544433
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2135
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2136
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2137
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2138
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2139
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2140
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
2141
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2142
	 quality 0.21693045781865616
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
2143
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2144
	

	 price 0.33333333333333337
	 beauty 0.0
2245
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
2246
	 quality 0.140028008402801
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
2247
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2248
	 quality 0.280056016805602
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2249
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2250
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
2251
	 quality 0.09166984970282112
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.0
2252
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2253
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2254
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
2255
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
2256
	 quality 0.16169041669088866
	 delivery 0.15713484026367

	 delivery 0.12598815766974242
	 price 0.15430334996209194
	 beauty 0.0
2367
	 quality 0.0
	 delivery 0.3535533905932738
	 price 0.0
	 beauty 0.0
2368
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2369
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
2370
	 quality 0.07312724241271307
	 delivery 0.1421338109037403
	 price 0.0
	 beauty 0.08703882797784893
2371
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2372
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.16666666666666669
2373
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2374
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
2375
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2376
	 quality 0.0
	 delivery 0.1307440900921227
	 price 0.0
	 beauty 0.08006407690254358
2377
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2378
	 quality 0.0
	 deliv

	 quality 0.07669649888473704
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
2489
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.1091089451179962
	 beauty 0.0
2490
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.16666666666666669
2491
	 quality 0.21693045781865616
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
2492
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2493
	 quality 0.0
	 delivery 0.0
	 price 0.43301270189221935
	 beauty 0.0
2494
	 quality 0.21938172723813917
	 delivery 0.1421338109037403
	 price 0.08703882797784893
	 beauty 0.0
2495
	 quality 0.19802950859533489
	 delivery 0.0
	 price 0.0
	 beauty 0.11785113019775793
2496
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.0
2497
	 quality 0.0
	 delivery 0.2721655269759087
	 price 0.0
	 beauty 0.0
2498
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2499
	 quality 0.21693045781865616
	 del

	 price 0.0
	 beauty 0.0
2612
	 quality 0.18333969940564224
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.1091089451179962
2613
	 quality 0.18333969940564224
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.1091089451179962
2614
	 quality 0.14625448482542613
	 delivery 0.1421338109037403
	 price 0.08703882797784893
	 beauty 0.0
2615
	 quality 0.12126781251816648
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2616
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
2617
	 quality 0.07312724241271307
	 delivery 0.0
	 price 0.17407765595569785
	 beauty 0.17407765595569785
2618
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.3273268353539886
	 beauty 0.0
2619
	 quality 0.09901475429766744
	 delivery 0.1924500897298753
	 price 0.11785113019775793
	 beauty 0.0
2620
	 quality 0.13453455879926252
	 delivery 0.1307440900921227
	 price 0.24019223070763074
	 beauty 0.0
2621
	 quality 0.18333969940564224
	 delivery 0.0
	 p

	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
2732
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
2733
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2734
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.09622504486493763
2735
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
2736
	 quality 0.09166984970282112
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2737
	 quality 0.21693045781865616
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
2738
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
2739
	 quality 0.0
	 delivery 0.0890870806374748
	 price 0.2182178902359924
	 beauty 0.0
2740
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2741
	 quality 0.12964074471043288
	 delivery 0.0
	 price 0.23145502494313788
	 beauty 0.0
2742
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2743
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0

	 price 0.0
	 beauty 0.0
2848
	 quality 0.30678599553894814
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
2849
	 quality 0.29250896965085227
	 delivery 0.21320071635561047
	 price 0.0
	 beauty 0.0
2850
	 quality 0.09901475429766744
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
2851
	 quality 0.0700140042014005
	 delivery 0.06804138174397717
	 price 0.08333333333333334
	 beauty 0.0
2852
	 quality 0.05564148840746571
	 delivery 0.10814761408717503
	 price 0.13245323570650439
	 beauty 0.13245323570650439
2853
	 quality 0.07312724241271307
	 delivery 0.1421338109037403
	 price 0.0
	 beauty 0.0
2854
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.11785113019775793
2855
	 quality 0.2100420126042015
	 delivery 0.13608276348795434
	 price 0.16666666666666669
	 beauty 0.08333333333333334
2856
	 quality 0.2300894966542111
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
2857
	 quality 0.19802950859533489
	 delivery 0.0
	 price 0.2357

	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
2962
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2963
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2964
	 quality 0.0
	 delivery 0.07106690545187015
	 price 0.08703882797784893
	 beauty 0.0
2965
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2966
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2967
	 quality 0.18333969940564224
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
2968
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
2969
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
2970
	 quality 0.0
	 delivery 0.07856742013183861
	 price 0.09622504486493763
	 beauty 0.0
2971
	 quality 0.08574929257125441
	 delivery 0.0
	 price 0.0
	 beauty 0.0
2972
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.10206207261596574
	 beauty 0.0
2973
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.3273268353539886
	 beauty 0.0

	 price 0.0
	 beauty 0.0
3085
	 quality 0.16169041669088866
	 delivery 0.07856742013183861
	 price 0.19245008972987526
	 beauty 0.0
3086
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.16666666666666669
	 beauty 0.0
3087
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3088
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.09128709291752768
3089
	 quality 0.0
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.0
3090
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.12909944487358055
	 beauty 0.0
3091
	 quality 0.0
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
3092
	 quality 0.0
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.0
3093
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3094
	 quality 0.0
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
3095
	 quality 0.08084520834544433
	 delivery 0.23570226039551584
	 price 0.09622504486493763
	 beauty 0.0
3096
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
3097
	 quality

	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3209
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3210
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.25000000000000006
3211
	 quality 0.0
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
3212
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
3213
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
3214
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3215
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3216
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3217
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3218
	 quality 0.07669649888473704
	 delivery 0.0
	 price 0.18257418583505536
	 beauty 0.0
3219
	 quality 0.19802950859533489
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
3220
	 quality 0.18333969940564224
	 delivery 0.0890870806374748
	 price 0.1091089451179962
	 beauty 0.0
3221
	 quality 0.06063390625908324


	 delivery 0.0
	 price 0.0
	 beauty 0.0
3329
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.12909944487358055
3330
	 quality 0.19802950859533489
	 delivery 0.1924500897298753
	 price 0.23570226039551587
	 beauty 0.0
3331
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3332
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3333
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3334
	 quality 0.16692446522239712
	 delivery 0.10814761408717503
	 price 0.13245323570650439
	 beauty 0.19867985355975656
3335
	 quality 0.13453455879926252
	 delivery 0.1307440900921227
	 price 0.24019223070763074
	 beauty 0.08006407690254358
3336
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
3337
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3338
	 quality 0.0
	 delivery 0.3333333333333333
	 price 0.0
	 beauty 0.0
3339
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3340
	 qua

	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
3446
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.16666666666666669
3447
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3448
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
3449
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
3450
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3451
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3452
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3453
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3454
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
3455
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3456
	 quality 0.21938172723813917
	 delivery 0.1421338109037403
	 price 0.08703882797784893
	 beauty 0.0
3457
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
3458
	 quality 0

	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
3564
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3565
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.0
	 beauty 0.12909944487358055
3566
	 quality 0.0
	 delivery 0.14907119849998599
	 price 0.09128709291752768
	 beauty 0.0
3567
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3568
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3569
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3570
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.2886751345948129
	 beauty 0.0
3571
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
3572
	 quality 0.20180183819889375
	 delivery 0.19611613513818407
	 price 0.08006407690254358
	 beauty 0.0
3573
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3574
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
3575
	 quality 0.24253562503633297
	 deliv

	 price 0.11785113019775793
	 beauty 0.0
3685
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3686
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3687
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3688
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
3689
	 quality 0.14625448482542613
	 delivery 0.21320071635561047
	 price 0.26111648393354675
	 beauty 0.0
3690
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3691
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3692
	 quality 0.12524485821702988
	 delivery 0.0
	 price 0.07453559924999299
	 beauty 0.0
3693
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3694
	 quality 0.09166984970282112
	 delivery 0.0890870806374748
	 price 0.0
	 beauty 0.0
3695
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3696
	 quality 0.0
	 delivery 0.0
	 price 0.08703882797784893
	 beauty 0.08703882797784893
3697
	 quality 0.07669649888473704
	 delivery 0.0
	 price 0.0
	 beauty 0.0
369

	 price 0.0
	 beauty 0.0
3804
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3805
	 quality 0.08084520834544433
	 delivery 0.15713484026367722
	 price 0.09622504486493763
	 beauty 0.0
3806
	 quality 0.08084520834544433
	 delivery 0.15713484026367722
	 price 0.09622504486493763
	 beauty 0.0
3807
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3808
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3809
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.33333333333333337
3810
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
3811
	 quality 0.16169041669088866
	 delivery 0.15713484026367722
	 price 0.09622504486493763
	 beauty 0.0
3812
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3813
	 quality 0.0
	 delivery 0.0
	 price 0.11785113019775793
	 beauty 0.11785113019775793
3814
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3815
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3816
	 quality 0.0
	 delivery 0.0
	 price 0.1

	 delivery 0.0
	 price 0.0
	 beauty 0.0
3927
	 quality 0.0
	 delivery 0.0
	 price 0.09128709291752768
	 beauty 0.0
3928
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
3929
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3930
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3931
	 quality 0.0
	 delivery 0.0
	 price 0.19245008972987526
	 beauty 0.0
3932
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3933
	 quality 0.10846522890932808
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
3934
	 quality 0.0
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.1091089451179962
3935
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3936
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.2581988897471611
	 beauty 0.0
3937
	 quality 0.17149858514250882
	 delivery 0.08333333333333333
	 price 0.10206207261596574
	 beauty 0.0
3938
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
3939
	 quality 0.0
	 delivery 0.166666666666666

	 price 0.0
	 beauty 0.0
4048
	 quality 0.0
	 delivery 0.31622776601683794
	 price 0.0
	 beauty 0.0
4049
	 quality 0.21693045781865616
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
4050
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.1091089451179962
4051
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
4052
	 quality 0.0
	 delivery 0.10540925533894598
	 price 0.06454972243679027
	 beauty 0.0
4053
	 quality 0.09166984970282112
	 delivery 0.0
	 price 0.0
	 beauty 0.1091089451179962
4054
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4055
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4056
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.20412414523193148
4057
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.1091089451179962
4058
	 quality 0.09901475429766744
	 delivery 0.0
	 price 0.23570226039551587
	 beauty 0.0
4059
	 quality 0.0
	 delivery 0.19245008972

	 beauty 0.0
4172
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4173
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4174
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.10206207261596574
4175
	 quality 0.0
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.0
4176
	 quality 0.0
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
4177
	 quality 0.24253562503633297
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4178
	 quality 0.0
	 delivery 0.23570226039551587
	 price 0.0
	 beauty 0.0
4179
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4180
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4181
	 quality 0.08574929257125441
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
4182
	 quality 0.140028008402801
	 delivery 0.13608276348795434
	 price 0.08333333333333334
	 beauty 0.0
4183
	 quality 0.13453455879926252
	 delivery 0.1307440900921227
	 price 0.08006407690254358
	 beauty 0

	 delivery 0.0
	 price 0.0
	 beauty 0.2886751345948129
4295
	 quality 0.0
	 delivery 0.0
	 price 0.1091089451179962
	 beauty 0.1091089451179962
4296
	 quality 0.140028008402801
	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
4297
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4298
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4299
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
4300
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
4301
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
4302
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4303
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4304
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4305
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4306
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4307
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4308
	 quality 0.0
	 del

	 price 0.0
	 beauty 0.0
4413
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4414
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4415
	 quality 0.0
	 delivery 0.0
	 price 0.20412414523193148
	 beauty 0.0
4416
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.0
	 beauty 0.0
4417
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4418
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4419
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4420
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4421
	 quality 0.0
	 delivery 0.0
	 price 0.40824829046386296
	 beauty 0.0
4422
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
4423
	 quality 0.09901475429766744
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4424
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4425
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
442

	 price 0.11785113019775793
	 beauty 0.0
4537
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4538
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4539
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4540
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4541
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4542
	 quality 0.09901475429766744
	 delivery 0.1924500897298753
	 price 0.0
	 beauty 0.0
4543
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.12909944487358055
4544
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4545
	 quality 0.0
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.0
4546
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
4547
	 quality 0.0
	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4548
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4549
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4

	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4661
	 quality 0.17149858514250882
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4662
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4663
	 quality 0.140028008402801
	 delivery 0.06804138174397717
	 price 0.16666666666666669
	 beauty 0.0
4664
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.0
4665
	 quality 0.05423261445466404
	 delivery 0.05270462766947299
	 price 0.06454972243679027
	 beauty 0.19364916731037085
4666
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4667
	 quality 0.0
	 delivery 0.0
	 price 0.33333333333333337
	 beauty 0.0
4668
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4669
	 quality 0.07669649888473704
	 delivery 0.14907119849998599
	 price 0.0
	 beauty 0.0
4670
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4671
	 quality 0.17149858514250882
	 delivery 0.16666666666666666
	 price 0.0
	 beauty 0.

	 delivery 0.13608276348795434
	 price 0.0
	 beauty 0.0
4781
	 quality 0.140028008402801
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4782
	 quality 0.18333969940564224
	 delivery 0.0
	 price 0.2182178902359924
	 beauty 0.1091089451179962
4783
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4784
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4785
	 quality 0.10846522890932808
	 delivery 0.21081851067789195
	 price 0.0
	 beauty 0.0
4786
	 quality 0.12126781251816648
	 delivery 0.0
	 price 0.14433756729740646
	 beauty 0.0
4787
	 quality 0.0
	 delivery 0.31622776601683794
	 price 0.0
	 beauty 0.0
4788
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4789
	 quality 0.24253562503633297
	 delivery 0.0
	 price 0.0
	 beauty 0.14433756729740646
4790
	 quality 0.14625448482542613
	 delivery 0.0
	 price 0.0
	 beauty 0.08703882797784893
4791
	 quality 0.13453455879926252
	 delivery 0.1307440900921227
	 price 0.24019223070763074
	 beauty 0.0
4792
	 quality 0.0
	 delivery 0.0
	 price 0.

	 delivery 0.11785113019775793
	 price 0.0
	 beauty 0.0
4901
	 quality 0.0
	 delivery 0.0
	 price 0.16666666666666669
	 beauty 0.0
4902
	 quality 0.0
	 delivery 0.0
	 price 0.5000000000000001
	 beauty 0.0
4903
	 quality 0.10846522890932808
	 delivery 0.0
	 price 0.12909944487358055
	 beauty 0.0
4904
	 quality 0.06726727939963126
	 delivery 0.19611613513818407
	 price 0.16012815380508716
	 beauty 0.0
4905
	 quality 0.18333969940564224
	 delivery 0.1781741612749496
	 price 0.3273268353539886
	 beauty 0.0
4906
	 quality 0.09901475429766744
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
4907
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.16666666666666669
4908
	 quality 0.09901475429766744
	 delivery 0.09622504486493764
	 price 0.0
	 beauty 0.0
4909
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4910
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.0
4911
	 quality 0.0
	 delivery 0.0
	 price 0.0
	 beauty 0.20412414523193148
4912
	 quality 0.24253562503633297
	 deliv

In [324]:
def max_topic_finder(quality1,delivery1,price1,beauty1):
    max_val = max(quality1,delivery1,price1,beauty1)
    if max_val == quality1:
        return 'quality'
    elif max_val == delivery1:
        return 'delivery'
    elif max_val == price1:
        return 'price'
    elif max_val == beauty1:
        return 'beauty'
    
reviews['major_topic'] = reviews.apply(lambda row:max_topic_finder(row['quality'],
                                                                   row['delivery'],row['price'],row['beauty']),axis=1)

In [302]:
i=163
reviews.comments.iloc[i]

'Thank you very much and will be subsidized again'

In [303]:
reviews.tokens.iloc[i]

['thank', 'subsidized']

In [304]:
reviews[(reviews.quality==0) & (reviews.delivery==0) & (reviews.price==0) & (reviews.beauty==0)].shape,reviews.shape

((1119, 7), (5012, 7))

In [305]:
unmatched = reviews[(reviews.quality==0) & (reviews.delivery==0) & (reviews.price==0) & (reviews.beauty==0)]
for c in unmatched.comments:
    print('--------------------------------------------------------------------------------------')
    print(c)

--------------------------------------------------------------------------------------
The bag line is contemplated
--------------------------------------------------------------------------------------
Okay
--------------------------------------------------------------------------------------
I like it very much
--------------------------------------------------------------------------------------
This bag is like a shoulder bag
--------------------------------------------------------------------------------------
Very nice, but a little crumbling
--------------------------------------------------------------------------------------
Haven't tried using it, how long? But overall okay
--------------------------------------------------------------------------------------
The product is not fully contactable
--------------------------------------------------------------------------------------
I recommend
------------------------------------------------------------------------------------

# LDA for seperate category

In [309]:
# Usual imports
import numpy as np
import pandas as pd
from tqdm import tqdm,tqdm_notebook
import string
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE
import concurrent.futures
import time
import pyLDAvis.sklearn
from pylab import bone, pcolor, colorbar, plot, show, rcParams, savefig
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
import os
print(os.listdir("../data"))

# Plotly based imports for visualization
from plotly import tools
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff

# spaCy based imports
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English
import bz2
import re

#!python -m spacy download en_core_web_lg

['.ipynb_checkpoints', 'Bag_Reviews.xlsx', 'for_w2v', 'models', 'productReviewShopee_1.csv']


### loading spacy

In [313]:
# Creating a spaCy object
nlp = spacy.load('en_core_web_lg')

punctuations = string.punctuation
stopwords = list(STOP_WORDS)

# Parser for reviews
parser = English()
def spacy_tokenizer(sentence):
    mytokens = parser(sentence)
    mytokens = [ word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in mytokens ]
    mytokens = [ word for word in mytokens if word not in stopwords and word not in punctuations ]
    mytokens = " ".join([i for i in mytokens])
    return mytokens

# Functions for printing keywords for each topic
def selected_topics(model, vectorizer, top_n=10):
    topics = []
    for idx, topic in enumerate(model.components_):
        #print("Topic %d:" % (idx))
        topic_keys = [(vectorizer.get_feature_names()[i], topic[i]) for i in topic.argsort()[:-top_n - 0:-1]]
        topics.append((idx,topic_keys))
        #print([(vectorizer.get_feature_names()[i], topic[i]) for i in topic.argsort()[:-top_n - 0:-1]]) 
    return topics

##################################################################################################

def lda_finder(reviews_ms,NUM_TOPICS):
    
    # reviews_ms=reviews_ms.drop_duplicates()

    reviews_ms.comments=reviews_ms.comments.astype(str)
    reviews_ms['len_review']=reviews_ms.comments.apply(len)
    reviews_ms.comments=reviews_ms.comments.apply(lambda x: x.replace('👍','good '))

    s_limit=50
    max_limit=1300
    reviews=reviews_ms.loc[(reviews_ms.len_review>=s_limit) & (reviews_ms.len_review<max_limit),:]


    ## lemmetization, stopword remove, punctuation remove etc
    tqdm.pandas()
    reviews["processed_description"] = reviews["comments"].progress_apply(spacy_tokenizer)
    #reviews["processed_description"] = reviews["comments"].apply(spacy_tokenizer)

    # Creating a vectorizer
    vectorizer = CountVectorizer(min_df=0.005, max_df=0.85, stop_words='english', lowercase=True, token_pattern='[a-zA-Z\-][a-zA-Z\-]{2,}')
    data_vectorized = vectorizer.fit_transform(reviews["processed_description"])

    #NUM_TOPICS = 4
    
    SOME_FIXED_SEED = 46

    # before training/inference:
    np.random.seed(SOME_FIXED_SEED)
    
    # Latent Dirichlet Allocation Model
    #lda = LatentDirichletAllocation(n_components=NUM_TOPICS, max_iter=50, learning_method='online',verbose=True)
    lda = LatentDirichletAllocation(n_components=NUM_TOPICS, max_iter=50, learning_method='batch',verbose=False)#,random_state=1)
    data_lda = lda.fit_transform(data_vectorized)

    # Keywords for topics clustered by Latent Dirichlet Allocation
    #print("LDA Model:")
    topics_lda = selected_topics(lda, vectorizer)

    ## topics df with its words - distribution df
    topics_lda_df  = pd.DataFrame()
    i1 = [ t[0] for t in topics_lda]
    i2 = []
    for t in topics_lda:
        for t1 in t[1]:
            i2.append(t1[0])

    topics_lda_df['topic'] = i1
    for i in i2:
        topics_lda_df[i] = 0.0

    for i,t in enumerate(topics_lda):
        for t1 in t[1]:
            topics_lda_df.loc[topics_lda_df.topic==i,t1[0]]=t1[1]

    ## topic precentage in all reviews
    reviews_test_lda = reviews_ms.copy()#[(reviews_ms.len_review>=max_limit) | (reviews_ms.len_review<s_limit)]
    reviews_test_lda['index1'] = range(len(reviews_test_lda))

    dominent_topic_list = []
    topic_detail = pd.DataFrame()
    topic_detail['index1'] = reviews_test_lda.index1
    for i in range(0,NUM_TOPICS):
        topic_detail['topic_'+str(i)+'_perc'] = 0.0


    for i in tqdm(range(len(reviews_test_lda))):
        text = reviews_test_lda.comments.iloc[i]
        x = lda.transform(vectorizer.transform([text]))[0]
        y = pd.Series(x)
        for k in range(len(y)):
            topic_detail.loc[topic_detail.index1==i,'topic_'+str(k)+'_perc'] = y[k]
        y1 = y[y==max(y)].index[0]
        dominent_topic_list.append(y1)

    reviews_test_lda['dominent_topic'] = dominent_topic_list
    reviews_test_lda = reviews_test_lda.merge(topic_detail,on='index1',how='left')
    del reviews_test_lda['index1']

    return lda, topics_lda, topics_lda_df, reviews_test_lda, data_vectorized, vectorizer

In [326]:
category = 'quality'
temp = reviews.sort_values(by=category,ascending=False)
temp = temp[temp[category]!=0]
temp = reviews[reviews['major_topic']==category]
lda, topics_lda, topics_lda_df, reviews_test_lda, data_vectorized, vectorizer = lda_finder(temp,NUM_TOPICS=4)

100%|██████████| 852/852 [00:00<00:00, 10533.48it/s]
100%|██████████| 2398/2398 [00:14<00:00, 168.54it/s]


In [327]:
topics_lda

[(0,
  [('price', 133.28409924782628),
   ('suitable', 114.23563131584257),
   ('bag', 106.23761763062812),
   ('small', 101.24244953446672),
   ('cute', 45.22969918841527),
   ('product', 44.19752781347387),
   ('little', 39.23683180934843),
   ('size', 38.238281658032115),
   ('line', 38.23567687200392)]),
 (1,
  [('like', 198.23983245503987),
   ('lot', 124.24371823626306),
   ('okay', 103.23997782595596),
   ('thing', 38.23462982435082),
   ('bad', 34.23819926196775),
   ('item', 33.22184641816521),
   ('send', 27.23330901372194),
   ('order', 21.249484376200936),
   ('right', 17.241956504194977)]),
 (2,
  [('good', 1814.2213745551194),
   ('product', 598.2793138428609),
   ('quality', 467.33764520790584),
   ('delivery', 235.2199568876551),
   ('service', 233.23700690399693),
   ('value', 159.23884308875483),
   ('fast', 142.22500690005884),
   ('shop', 80.22511834495936),
   ('beautiful', 63.51037761871636)]),
 (3,
  [('reed', 159.24934583177395),
   ('time', 29.24046439853631),


## Sentimental anlysis using TextBlob

In [347]:
import re 
from textblob import TextBlob 

def clean_sentence(sentence): 
    ''' 
    Utility function to clean tweet text by removing links, special characters 
    using simple regex statements. 
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", sentence).split()) 

def get_sentence_sentiment(sentence): 
    ''' 
    Utility function to classify sentiment of passed tweet 
    using textblob's sentiment method 
    '''
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_sentence(sentence)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 'positive'
    elif analysis.sentiment.polarity == 0: 
        return 'neutral'
    else: 
        return 'negative'
    
senti = []
for i in tqdm_notebook(range(len(reviews))):
    senti.append(get_sentence_sentiment(reviews.comments.iloc[i]))

reviews['sentiment_pred'] = senti

HBox(children=(IntProgress(value=0, max=5012), HTML(value='')))




## plotting

In [328]:
from bokeh.io import output_notebook,show, push_notebook
from bokeh.layouts import widgetbox,column,row
from bokeh.models.widgets import Dropdown,PreText, Select

from bokeh.models import HoverTool
from bokeh.plotting import curdoc,figure
from ipywidgets import interact

from bokeh.models import ColumnDataSource

from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import matplotlib.colors as mcolors


output_notebook()

In [329]:

def word_plot1(topics):
    cols = [color for name, color in mcolors.TABLEAU_COLORS.items()]  # more colors: 'mcolors.XKCD_COLORS'

    cloud = WordCloud(stopwords=STOP_WORDS,
                      background_color='white',
                      width=2500,
                      height=1800,
                      max_words=10,
                      colormap='tab10',
                      color_func=lambda *args, **kwargs: cols[i],
                      prefer_horizontal=1.0)

    #topics = lda.show_topics(formatted=False)

    fig, axes = plt.subplots(1, 4, figsize=(14,14), sharex=True, sharey=True)

    for i, ax in enumerate(axes.flatten()):
        fig.add_subplot(ax)
        topic_words = dict(topics[i][1])
        cloud.generate_from_frequencies(topic_words, max_font_size=300)
        plt.gca().imshow(cloud)
        plt.gca().set_title('Topic ' + str(i), fontdict=dict(size=16))
        plt.gca().axis('off')


    plt.subplots_adjust(wspace=0, hspace=0)
    plt.axis('off')
    plt.margins(x=0, y=0)
    plt.tight_layout()
    plt.show()
    


In [357]:

menu = ['quality','price','beauty','delivery']
menu = menu+['all']

sentiment = ['all','negative','positive','neutral']
topic = ['all','0','1','2','3']

def update(category,sentiment):
    
    if category=='all':
        reviews_ms1 = reviews
    else:
        reviews_ms1 = reviews[reviews['major_topic']==category]
        
    if sentiment != 'all':
        reviews_ms1 = reviews_ms1[reviews_ms1.sentiment_pred==sentiment]
    
    lda1,topics_lda1,topics_lda_df1,reviews_test_lda1,data_vectorized1,vectorizer1 = lda_finder(reviews_ms1,NUM_TOPICS=4)
    
    topic_distribution = pd.DataFrame(reviews_test_lda1.dominent_topic.value_counts()).reset_index()
    topic_distribution.columns = ['index','count']
    
    reviews_test_lda1 = reviews_test_lda1.sort_values(by=category,ascending=False)
    reviews_test_lda1.to_csv('temp.csv',index=False)
    
    push_notebook()
    
    plot_all(category,topic_distribution,topics_lda1)
    #plot_rows(reviews_test_lda1,category)

def update2(topic):
    temp = pd.read_csv('temp.csv')
    if topic!='all':
        temp = temp[temp.dominent_topic==int(topic)]
    
    plot_rows(temp,category)    


def plot_all(category,topic_distribution,topics_lda1):

    p1 = figure(width=500, height=350, x_axis_type="linear",title="Topic distribution of "+category)#, y_range=[0, max_price+10])

    r_aapl = p1.vbar('index',.5 ,'count', source=topic_distribution, color='navy',  alpha=0.5)
    
    z = row([p1])
    show(z)
    word_plot1(topics_lda1)
    
def plot_rows(reviews_ms1,category):
    print('--------------------------------------------------------------------------------')
    print('------------------------------ reviews -----------------------------------------')
    print('--------------------------------------------------------------------------------')
    #reviews_ms1 = reviews_ms1.sort_values(by=category,ascending=False)
    for i in range(10):
        print(reviews_ms1.comments.iloc[i])
        
        print('--------------------------------------------------------------------------------')

In [359]:
interact(update,category = menu,sentiment=sentiment,topic=topic)
interact(update,category = menu,sentiment=sentiment,topic=topic)

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

<function __main__.update(category, sentiment, topic)>

# Using W2V

In [77]:
from nltk.corpus import wordnet
import nltk
synonyms = []
antonyms = []

for syn in wordnet.synsets("price"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
             antonyms.append(l.antonyms()[0].name())

print(set(synonyms))
print(set(antonyms))

{'cost', 'damage', 'Leontyne_Price', 'price', 'Mary_Leontyne_Price', 'Price', 'monetary_value', 'toll', 'terms'}
set()


In [101]:
words = []
for i,w in enumerate(wordnet.words()):
    words.append(w)
len(words)

147306

In [100]:
words = pd.Series(words)
words[words=='price']

107779    price
dtype: object

In [78]:
nltk.edit_distance("quality", "price")

6

In [282]:
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
# model = Word2Vec([words], size=100, window=5, min_count=1, workers=4)
model.train(list(reviews.tokens), total_examples=1, epochs=10)
model.wv.similarity('quality','price')

0.9999867

In [283]:
model.wv.most_similar('price',topn=15)

[('product', 0.9999924898147583),
 ('good', 0.9999887943267822),
 ('quality', 0.9999867081642151),
 ('very', 0.9999840259552002),
 ('beautiful', 0.9999840259552002),
 ('color', 0.9999831318855286),
 ('delivery', 0.9999828338623047),
 ('value', 0.9999825954437256),
 ('bag', 0.9999812841415405),
 ('but', 0.9999799728393555),
 ('order', 0.9999773502349854),
 ('cheap', 0.9999753832817078),
 ('cute', 0.9999744296073914),
 ('shop', 0.9999696016311646),
 ('service', 0.9999694228172302)]

In [284]:
model.wv.most_similar('beautiful',topn=15)

[('bag', 0.9999854564666748),
 ('price', 0.9999839663505554),
 ('color', 0.9999830722808838),
 ('good', 0.9999821186065674),
 ('cute', 0.9999798536300659),
 ('product', 0.9999792575836182),
 ('delivery', 0.9999788999557495),
 ('but', 0.9999773502349854),
 ('quality', 0.9999747276306152),
 ('cheap', 0.9999735355377197),
 ('order', 0.9999735355377197),
 ('very', 0.9999725818634033),
 ('value', 0.9999707937240601),
 ('shop', 0.9999672174453735),
 ('ordered', 0.9999661445617676)]

In [285]:
model.wv.most_similar('quality',topn=15)

[('good', 0.9999903440475464),
 ('product', 0.999988317489624),
 ('price', 0.9999867081642151),
 ('value', 0.9999830722808838),
 ('very', 0.9999808073043823),
 ('delivery', 0.9999769926071167),
 ('but', 0.999976634979248),
 ('cheap', 0.9999760389328003),
 ('beautiful', 0.9999747276306152),
 ('color', 0.9999745488166809),
 ('bag', 0.9999722242355347),
 ('ordered', 0.9999699592590332),
 ('shop', 0.9999688863754272),
 ('order', 0.9999688267707825),
 ('cute', 0.9999676942825317)]

In [286]:
model.wv.most_similar('delivery',topn=15)

[('good', 0.9999875426292419),
 ('product', 0.999984860420227),
 ('price', 0.9999827742576599),
 ('color', 0.9999796152114868),
 ('but', 0.9999791383743286),
 ('beautiful', 0.99997878074646),
 ('bag', 0.999978244304657),
 ('very', 0.9999774098396301),
 ('quality', 0.9999768733978271),
 ('value', 0.9999756217002869),
 ('cheap', 0.9999735355377197),
 ('fast', 0.999971330165863),
 ('order', 0.9999712705612183),
 ('ordered', 0.9999712705612183),
 ('cute', 0.9999691247940063)]

In [287]:
model.wv.similarity('beautiful','color')

0.999983

In [19]:
def find_related(text,maxn):
    return [val[0] for val in model.wv.most_similar(text,topn=maxn)]

delivery_related = find_related('delivery',15)+['delivery']
price_related = find_related('price',15)+['price']
beauty_related = find_related('beautiful',15)+['beautiful']
quality_related = find_related('quality',15)+['quality']


In [20]:
def syno_anto_finder(text):
    synonyms = []
    antonyms = []

    for syn in wordnet.synsets(text):
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                 antonyms.append(l.antonyms()[0].name())

    return list(synonyms),list(antonyms)

a = syno_anto_finder('quality')

In [21]:
quality = [1 if token in quality_related else 0 for token in vocab]
delivery = [1 if token in delivery_related else 0 for token in vocab]
price = [1 if token in price_related else 0 for token in vocab]
beauty = [1 if token in beauty_related else 0 for token in vocab]

In [22]:
topics = {'quality': quality, 'delivery': delivery, 'price': price, 'beauty':beauty}

In [23]:
#reviews['tokens'] = reviews.comments.apply(clean_doc)
def vectorize(l):
    return [1 if token in l else 0  for token in vocab]

reviews['vectors_new'] = reviews.tokens.apply(vectorize)
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [36]:
name

'beauty'

In [34]:
cos_sim(s_vec, topic_vec)

0.0

In [37]:
reviews['quality']  = 0.0
reviews['delivery'] = 0.0
reviews['price']    = 0.0
reviews['beauty']   = 0.0

cos_sim = lambda x, y: dot(x,y)/(norm(x)*norm(y))

for s_num, s_vec in enumerate(reviews.vectors_new):
    print(s_num)
    for name, topic_vec in topics.items():
        similarity = cos_sim(s_vec, topic_vec)
        reviews.loc[s_num,name] = similarity
        print('\t', name, similarity)

In [40]:
reviews.head()

Unnamed: 0,rating,comments,sentiment,tokens,vectors,vectors_new,quality,delivery,price,beauty
0,4,Give 4 stars because order at the price 37 but...,positive,"[give, stars, order, price, today, baht, sorry]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.094491,0.094491,0.094491,0.0
1,5,Ordered at a discount of 10 baht per piece. It...,positive,"[ordered, discount, baht, piece, it, worth, no...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0.0,0.0,0.060634
2,5,"Small, cute, compact, good But the sash looks ...",positive,"[small, cute, compact, good, but, sash, looks,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.06455,0.06455,0.0,0.0
3,1,The size is not as large as it is down. The st...,negative,"[the, size, large, the, stitching, wrong, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.072169,0.072169,0.072169,0.0
4,1,The product is compared to the price. Okay. Se...,negative,"[the, product, compared, price, okay, send, sl...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.188982,0.188982,0.188982,0.047246


In [49]:
delivery_related

['fast',
 'speed',
 'service',
 'product',
 'the',
 'provided',
 'quality',
 'money',
 'company',
 'transportation',
 'good',
 'value',
 'very',
 'price',
 'shipping',
 'delivery']

## plotting

In [41]:
from bokeh.io import output_notebook,show, push_notebook
from bokeh.layouts import widgetbox,column,row
from bokeh.models.widgets import Dropdown,PreText, Select

from bokeh.models import HoverTool
from bokeh.plotting import curdoc,figure
from ipywidgets import interact

from bokeh.models import ColumnDataSource

output_notebook()

In [43]:

menu = ['quality','price','beauty','delivery']
menu = menu+['all']

def update(category):
    
    if category=='all':
        reviews_ms1 = reviews.iloc[0:10]
    else:
        reviews_ms1 = reviews.copy()
        reviews_ms1 = reviews_ms1.sort_values(by=category,ascending=False)
        reviews_ms1 = reviews_ms1.iloc[0:10]
    
    push_notebook()
    
    plot_all(reviews_ms1)

    
def plot_all(reviews_ms1):
    print('--------------------------------------------------------------------------------')
    print('------------------------------ reviews -----------------------------------------')
    print('--------------------------------------------------------------------------------')
    for i in range(len(reviews_ms1)):
        print(reviews_ms1.comments.iloc[i])
        
        print('--------------------------------------------------------------------------------')

In [47]:
interact(update,category = menu)

interactive(children=(Dropdown(description='category', options=('quality', 'price', 'beauty', 'delivery', 'all…

<function __main__.update(category)>