In [198]:
import numpy as np
import pandas as pd
import nltk,string
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
import math
import os
from nltk import FreqDist
from nltk.corpus import stopwords
import random

import en_core_web_sm
from nltk.util import ngrams

nlp = en_core_web_sm.load()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/guangxushen/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/guangxushen/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [199]:
data_path = 'data/reviewSelected100.json'
os.path.exists(data_path)

True

In [200]:
random.seed(69)

In [201]:
review_df = pd.read_json(data_path, lines=True,encoding = "ISO-8859-1")

In [202]:
#Randomly generate 1 index
review_random_idx = np.array(np.random.rand(1)*len(review_df), dtype=np.int32)
#print out the business id selected
print(review_df.iloc[review_random_idx]['business_id'].iloc[0])

tD7_aIUTlfzyMoMkXXwiZA


In [203]:
#set the selected business id as the id to be investigated
selected_id = review_df.iloc[review_random_idx]['business_id'].iloc[0]

In [204]:
selected_id_str = str(selected_id)
print(selected_id_str)

tD7_aIUTlfzyMoMkXXwiZA


In [205]:
#Extract out the sentences related to the random id
review_random_df = review_df.iloc[review_random_idx]
review_random_df = review_random_df.reset_index()

In [206]:
#Tokenize the text
review_random_df['tokenize'] = review_random_df['text'].apply(nltk.word_tokenize)
#Tag those individual tokens respectively
review_random_df['pos_tag'] = review_random_df['tokenize'].apply(nltk.pos_tag)

In [207]:
review_random_df.to_json(r'output/specific_business_review_3_final.json', orient='records', lines=True)

In [208]:
tokenized_path = 'output/specific_business_review_3_final.json'
reviews_adjectives_df = pd.read_json(tokenized_path, lines=True,encoding = "ISO-8859-1")

In [209]:
print(len(reviews_adjectives_df))

1


In [210]:
print(len(review_df))

15300


# Find all reviews that are related to a certain store

In [211]:
def extract_reviews(store_id):
  if type(store_id) == str:
    selected_id = store_id
  else:
    selected_id = str(store_id)
  selected_store_reviews = []
  for index, element in review_df.iterrows():
    if element['business_id'] == selected_id:
        selected_store_reviews.append(element['text'])
  return selected_store_reviews

In [212]:
extracted_review = extract_reviews(selected_id_str)
print(extracted_review)

["Three of us ordered a large pizza and two orders of wings for delivery and wow, really great food. The pizza is the best I've had in this area and the wings get in a three way tie with The Portly Piper and Magwyers. Also we had plenty of leftovers for the next day. Bonus!", "Was blown away by service - which isn't a small feat when you were just picking up food! \nDeep dish, NY style pizza just loaded with cheese.\nPizza is delicious, ingredients are fresh and it really is heads and shoulders above your every day delivery.\n\nThe only downside for me, and it was a complete personal taste thing, is that I don't like that much cheese on my pizza and prefer slightly thinner crust. \n\nYes, expecting your total outrage at the above comment!! :P", 'I ordered a meat lovers pizza and it was really good. The pizza has a generous amount of tasty toppings and cheese with the right amount of sauce. The dough is light,soft and crispy around the edges. I will definitely come back for more pizza.'

# Use spacy nlp to tokenize and pos tag each text

In [213]:
def spacy_process(input_list: list):
    final_result = []
    doc = nlp(str(input_list))
    for token in doc:
        result = []
        result.append(token.text)
        result.append(token.pos_)
        final_result.append(result)
    return final_result,doc

In [214]:
processed_review,result_doc = spacy_process(extracted_review)

In [215]:
print(processed_review)

[['[', 'PUNCT'], ['"', 'PUNCT'], ['Three', 'NUM'], ['of', 'ADP'], ['us', 'PRON'], ['ordered', 'VERB'], ['a', 'DET'], ['large', 'ADJ'], ['pizza', 'NOUN'], ['and', 'CCONJ'], ['two', 'NUM'], ['orders', 'NOUN'], ['of', 'ADP'], ['wings', 'NOUN'], ['for', 'ADP'], ['delivery', 'NOUN'], ['and', 'CCONJ'], ['wow', 'INTJ'], [',', 'PUNCT'], ['really', 'ADV'], ['great', 'ADJ'], ['food', 'NOUN'], ['.', 'PUNCT'], ['The', 'DET'], ['pizza', 'NOUN'], ['is', 'AUX'], ['the', 'DET'], ['best', 'ADJ'], ['I', 'PRON'], ["'ve", 'AUX'], ['had', 'VERB'], ['in', 'ADP'], ['this', 'DET'], ['area', 'NOUN'], ['and', 'CCONJ'], ['the', 'DET'], ['wings', 'NOUN'], ['get', 'VERB'], ['in', 'ADP'], ['a', 'DET'], ['three', 'NUM'], ['way', 'NOUN'], ['tie', 'NOUN'], ['with', 'ADP'], ['The', 'DET'], ['Portly', 'PROPN'], ['Piper', 'PROPN'], ['and', 'CCONJ'], ['Magwyers', 'PROPN'], ['.', 'PUNCT'], ['Also', 'ADV'], ['we', 'PRON'], ['had', 'VERB'], ['plenty', 'NOUN'], ['of', 'ADP'], ['leftovers', 'NOUN'], ['for', 'ADP'], ['the', 'DE

In [216]:
print(result_doc)

["Three of us ordered a large pizza and two orders of wings for delivery and wow, really great food. The pizza is the best I've had in this area and the wings get in a three way tie with The Portly Piper and Magwyers. Also we had plenty of leftovers for the next day. Bonus!", "Was blown away by service - which isn't a small feat when you were just picking up food! \nDeep dish, NY style pizza just loaded with cheese.\nPizza is delicious, ingredients are fresh and it really is heads and shoulders above your every day delivery.\n\nThe only downside for me, and it was a complete personal taste thing, is that I don't like that much cheese on my pizza and prefer slightly thinner crust. \n\nYes, expecting your total outrage at the above comment!! :P", 'I ordered a meat lovers pizza and it was really good. The pizza has a generous amount of tasty toppings and cheese with the right amount of sauce. The dough is light,soft and crispy around the edges. I will definitely come back for more pizza.'

# Find out the relevant pairs

In [217]:
def adjective_pair_extraction_1(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_1 = []
    count = 0
    for index in range(len(doc_2)-1):
        temp_list = []
        element_before = doc_2[index-1]
        element = doc_2[index]
        element_after = doc_2[index+1]
        if((element_before.pos_) == 'ADV'and (element.pos_ == 'ADJ')):
            count += 1
            temp_list.append(element_before.text)
            temp_list.append(element.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_1.append(temp_list_tuple)
        selected_combination_1 = list(filter(lambda x: x, selected_combination_1))
    return selected_combination_1,count

In [218]:
def adjective_pair_extraction_2(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_2 = []
    count = 0
    for index in range(len(doc_2)-1):
        temp_list = []
        element_before = doc_2[index-1]
        element = doc_2[index]
        element_after = doc_2[index+1]
        if((element_before.pos_) == 'ADJ'and (element.pos_ == 'ADV')):
            count += 1
            temp_list.append(element_before.text)
            temp_list.append(element.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_2.append(temp_list_tuple)
        selected_combination_2 = list(filter(lambda x: x, selected_combination_2))
    return selected_combination_2,count

In [219]:
def adjective_pair_extraction_3(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_3 = []
    count = 0
    for index in range(len(doc_2)-1):
        temp_list = []
        element_before = doc_2[index-1]
        element = doc_2[index]
        element_after = doc_2[index+1]
        if((element_before.pos_) == 'ADJ'and (element.pos_ == 'ADJ')):
            count += 1
            temp_list.append(element_before.text)
            temp_list.append(element.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_3.append(temp_list_tuple)
        selected_combination_3 = list(filter(lambda x: x, selected_combination_3))
    return selected_combination_3,count

In [220]:
def adjective_pair_extraction_4(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_4 = []
    count = 0
    for index in range(len(doc_2)-1):
        temp_list = []
        element_before = doc_2[index-1]
        element = doc_2[index]
        element_after = doc_2[index+1]
        if((element_before.pos_) == 'ADV'and (element.pos_ == 'ADV') and (element_after.pos_ == 'ADJ')):
            count += 1
            temp_list.append(element_before.text)
            temp_list.append(element.text)
            temp_list.append(element_after.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_4.append(temp_list_tuple)
        selected_combination_4 = list(filter(lambda x: x, selected_combination_4))
    return selected_combination_4,count

In [221]:
def adjective_pair_extraction_5(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_5 = []
    count = 0
    for index in range(len(doc_2)-1):
        temp_list = []
        element_before = doc_2[index-1]
        element = doc_2[index]
        element_after = doc_2[index+1]
        if((element.pos_) == 'ADJ' and (element.pos_ == 'ADP') and (element_after.pos_ == 'NOUN')):
            count += 1
            temp_list.append(element_before.text)
            temp_list.append(element.text)
            temp_list.append(element_after.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_5.append(temp_list_tuple)
        selected_combination_5 = list(filter(lambda x: x, selected_combination_5))
    return selected_combination_5,count

In [222]:
def adjective_pair_extraction_6(doc_2):
    #finding all the adjective phrase combinations of the selected store
    selected_combination_6 = []
    count = 0
    for index in range(len(doc_2)-2):
        temp_list = []
        element_1 = doc_2[index-1]
        element_2 = doc_2[index]
        element_3 = doc_2[index+1]
        element_4 = doc_2[index+2]
        if((element_1.pos_) == 'ADJ' and (element_2.pos_ == 'ADP') and (element_3.pos_ == 'ADP') and element_4.pos_ == 'NOUN'):
            count += 1
            temp_list.append(element_1.text)
            temp_list.append(element_2.text)
            temp_list.append(element_3.text)
            temp_list.append(element_4.text)
        temp_list_tuple = tuple(temp_list)
        selected_combination_6.append(temp_list_tuple)
        selected_combination_6 = list(filter(lambda x: x, selected_combination_6))
    return selected_combination_6,count

In [223]:
adjective_phrase_result_1,count_1 = adjective_pair_extraction_1(result_doc)

In [224]:
print(count_1)
print(adjective_phrase_result_1)

164
[('really', 'great'), ('slightly', 'thinner'), ('really', 'good'), ('So', 'good'), ('rather', 'disappointing'), ('too', 'much'), ('pretty', 'prompt'), ('very', 'good'), ('just', 'great'), ('really', 'good'), ('pretty', 'deep'), ('too', 'much'), ('very', 'courteous'), ('likely', 'understaffed'), ('too', 'late'), ('how', 'cheesy'), ('pretty', 'standard'), ('very', 'good'), ('so', 'cheap'), ('pleasantly', 'surprised'), ('Very', 'generous'), ('fairly', 'busy'), ('very', 'busy'), ('so', 'disappointed'), ('now', 'cold'), ('very', 'cold'), ('really', 'unacceptable'), ('too', 'much'), ('so', 'bad'), ('very', 'busy'), ('too', 'bad'), ('really', 'nice'), ('very', 'posh'), ('very', 'rich'), ('overall', 'amazing'), ('so', 'unassuming'), ('of', 'hidden'), ('too', 'much'), ('too', 'thick'), ('really', 'amazing'), ('ridiculously', 'big'), ('pleasantly', 'surprised'), ('very', 'yummy'), ('really', 'good'), ('very', 'edible'), ('very', 'basic'), ('really', 'good'), ('very', 'generous'), ('as', 'goo

In [225]:
adjective_phrase_result_2,count_2 = adjective_pair_extraction_2(result_doc)

In [226]:
print(count_2)
print(adjective_phrase_result_2)

20
[('delivery.\\n\\nThe', 'only'), ('disappointing', 'as'), ('excited', 'when'), ('special', 'enough'), ('good', 'overall'), ('friendly', 'enough'), ('unassuming', 'when'), ('amazing', 'however'), ('sparse', 'even'), ('good', 'too'), ('little', 'further'), ('plentiful', 'also'), ('cozy', 'inside'), ('aware', 'enough'), ('great', 'too'), ('amazing', 'even'), ('large', 'easily'), ('pricey', 'though'), ('condescending', 'when'), ('better', 'then')]


In [227]:
adjective_phrase_result_3,count_3 = adjective_pair_extraction_3(result_doc)

In [228]:
print(count_3)
print(adjective_phrase_result_3)

36
[('complete', 'personal'), ('cold', 'Canadian'), ('little', 'hangry'), ('thick', 'hardy'), ('horrendous', 'wait'), ('good', 'old'), ('reliable', 'decent'), ('decent', 'sized'), ('new', 'favourite'), ('Great', 'old'), ('great', 'soft'), ('efficient', 'young'), ('enough', 'heavy'), ('other', 'alcoholic'), ('nice', 'thick'), ('great', 'little'), ('good', 'North'), ('North', 'American'), ('typical', 'fast'), ('Amazing', 'Amazing'), ('Amazing', 'Amazing'), ('worth', 'it.\\n\\nThe'), ('many', 'other'), ('extra', 'large'), ('few', 'leftover'), ('flavorful', 'thick'), ('thinner', 'crust.\\n\\nThe'), ('Delicious', 'deep'), ('extra', 'hungry'), ('extra', 'little'), ('non', '-'), ('great', 'little'), ('typical', 'fast'), ('hard', 'flacky'), ('good', 'many'), ('extra', 'large')]


In [229]:
adjective_phrase_result_4,count_4 = adjective_pair_extraction_4(result_doc)

In [230]:
print(count_4)
print(adjective_phrase_result_4)

21
[('Just', 'too', 'much'), ('also', 'very', 'good'), ('just', 'really', 'good'), ('just', 'too', 'much'), ('very', 'likely', 'understaffed'), ('very', 'pleasantly', 'surprised'), ('enough', 'very', 'busy'), ('just', 'overall', 'amazing'), ('kind', 'of', 'hidden'), ('Just', 'as', 'good'), ('already', 'pretty', 'full'), ('also', 'very', 'good'), ('also', 'very', 'good'), ('just', 'too', 'heavy.\\n\\nI'), ('significantly', 'more', 'expensive'), ('Far', 'too', 'much'), ('way', 'too', 'thick'), ('even', 'just', 'plain'), ('enough', 'how', 'tasty'), ('just', 'really', 'upset'), ('even', 'more', 'shady')]


In [231]:
adjective_phrase_result_5,count_5 = adjective_pair_extraction_5(result_doc)

In [232]:
print(count_5)
print(adjective_phrase_result_5)

0
[]


In [233]:
adjective_phrase_result_6,count_6 = adjective_pair_extraction_6(result_doc)

In [234]:
print(count_6)
print(adjective_phrase_result_6)

0
[]


# Adjective numbers extraction

In [235]:
#how many reviews are related to this one store
total_frequency = len(extracted_review)
def evaluative_indicativeness (adjective_phrase_list: list):
    bigram=[]
    bigram.extend(adjective_phrase_list)  
    freq_bi = nltk.FreqDist(bigram)
    result_frequency = freq_bi.most_common(5)
    result_frequency_dict = {}
    for element in freq_bi.most_common(5):
      individual_frequency = element[1]/total_frequency
      result_frequency_dict[element[0]] = individual_frequency
    return result_frequency_dict

In [236]:
#how many reviews are related to this one store
total_frequency = len(extracted_review)
def evaluative_indicativeness_bigram (adjective_phrase_list: list):
    bigram=[]
    bigram.extend(adjective_phrase_list)  
    freq_bi = nltk.FreqDist(bigram)
    result_frequency = freq_bi.most_common(5)
    result_frequency_dict = {}
    for element in freq_bi.most_common(5):
      individual_frequency = element[1]/total_frequency
      result_frequency_dict[element[0]] = individual_frequency
    return result_frequency_dict

In [237]:
bigram_list = adjective_phrase_result_1 + adjective_phrase_result_2 + adjective_phrase_result_3
trigram_list = adjective_phrase_result_4

In [238]:
#how many reviews are related to this one store
total_frequency = len(extracted_review)
def evaluative_indicativeness_bigram (adjective_phrase_list: list):
    bigram=[]
    bigram.extend(adjective_phrase_list)  
    freq_bi = nltk.FreqDist(bigram)
    result_frequency = freq_bi.most_common(5)
    result_frequency_dict = {}
    for element in freq_bi.most_common(5):
      individual_frequency = element[1]/total_frequency
      result_frequency_dict[element[0]] = individual_frequency
    return result_frequency_dict

In [239]:
#how many reviews are related to this one store
total_frequency = len(extracted_review)
def evaluative_indicativeness_trigram (adjective_phrase_list: list):
    trigram=[]
    trigram.extend(adjective_phrase_list)  
    freq_tri = nltk.FreqDist(trigram)
    result_frequency = freq_tri.most_common(5)
    result_frequency_dict = {}
    for element in freq_tri.most_common(5):
      individual_frequency = element[1]/total_frequency
      result_frequency_dict[element[0]] = individual_frequency
    return result_frequency_dict

In [240]:
frequency_result_bi = evaluative_indicativeness_bigram(bigram_list)
frequency_result_tri = evaluative_indicativeness_trigram(trigram_list)

In [241]:
print(frequency_result_bi)

{('too', 'much'): 0.06, ('very', 'good'): 0.06, ('really', 'good'): 0.05, ('so', 'much'): 0.05, ('pleasantly', 'surprised'): 0.03}


In [242]:
print(frequency_result_tri)

{('also', 'very', 'good'): 0.03, ('Just', 'too', 'much'): 0.01, ('just', 'really', 'good'): 0.01, ('just', 'too', 'much'): 0.01, ('very', 'likely', 'understaffed'): 0.01}


In [243]:
def merge(dict1, dict2):
    res = {**dict1, **dict2}
    return res

In [244]:
final_dict = merge(frequency_result_bi,frequency_result_tri)

In [245]:
print(final_dict)

{('too', 'much'): 0.06, ('very', 'good'): 0.06, ('really', 'good'): 0.05, ('so', 'much'): 0.05, ('pleasantly', 'surprised'): 0.03, ('also', 'very', 'good'): 0.03, ('Just', 'too', 'much'): 0.01, ('just', 'really', 'good'): 0.01, ('just', 'too', 'much'): 0.01, ('very', 'likely', 'understaffed'): 0.01}


In [246]:
from operator import itemgetter

# Initialize N 
N = 5
# Using sorted() + itemgetter() + items()
frequency_result_top_5 = dict(sorted(final_dict.items(), key = itemgetter(1), reverse = True)[:N])

In [247]:
print(frequency_result_top_5)

{('too', 'much'): 0.06, ('very', 'good'): 0.06, ('really', 'good'): 0.05, ('so', 'much'): 0.05, ('pleasantly', 'surprised'): 0.03}


In [248]:
index = 1
index_result = {}

for key in frequency_result_top_5:
    phrase_id = "phrase_{}".format(index)
    prob_id = "prob_{}".format(index)
    index_result[phrase_id] = frequency_result_top_5[key]
    index += 1

In [249]:
print(index_result)

{'phrase_1': 0.06, 'phrase_2': 0.06, 'phrase_3': 0.05, 'phrase_4': 0.05, 'phrase_5': 0.03}


# Trying out on the entire dataset

In [250]:
review_length = len(review_df)
print(review_length)

15300


In [251]:
#finding all the business ids
business_ids = set()
for i in range(0, review_length):
    business = review_df.iloc[i]
    business_ids.add(business['business_id'])

In [252]:
print(len(business_ids))

153


In [253]:
selected = {}
for business in business_ids:
    #extract reviews for each individual id
    individual_reviews = extract_reviews(business)
    selected[business] = individual_reviews

In [254]:
#use spacy to tokenize and process each review
#and extract relevant pairs
for key in selected:
    processed_review_selected,result_doc_selected = spacy_process(selected[key])
    final_adjective_phrase_result_1,final_count_1 = adjective_pair_extraction_1(result_doc_selected)
    final_adjective_phrase_result_2,final_count_2 = adjective_pair_extraction_2(result_doc_selected)
    final_adjective_phrase_result_3,final_count_3 = adjective_pair_extraction_3(result_doc_selected)
    final_adjective_phrase_result_4,final_count_4 = adjective_pair_extraction_4(result_doc_selected)
    #adding more considerations
    final_adjective_phrase_result_5,final_count_5 = adjective_pair_extraction_5(result_doc_selected)
    final_adjective_phrase_result_6,final_count_6 = adjective_pair_extraction_6(result_doc_selected)


    
    final_bigram_list = final_adjective_phrase_result_1 + final_adjective_phrase_result_2 + final_adjective_phrase_result_3
    #final_trigram_list = final_adjective_phrase_result_4 
    final_trigram_list = final_adjective_phrase_result_4 + final_adjective_phrase_result_5 + final_adjective_phrase_result_6



    #final_frequency_result = evaluative_indicativeness(final_adjective_phrase_result)
    final_frequency_result_bi = evaluative_indicativeness_bigram(final_bigram_list)
    final_frequency_result_tri = evaluative_indicativeness_bigram(final_trigram_list)
    
    final_frequency_result = merge(final_frequency_result_bi,final_frequency_result_tri)

    selected[key] = final_frequency_result

In [262]:
#use spacy to tokenize and process each review
for key in selected:
    print(key)
    print(selected[key])

iK_3WktnHa_YlbWdA0Axkw
{('authentic', 'Korean'): 0.08, ('best', 'Korean'): 0.05, ('really', 'good'): 0.04, ('at', 'least'): 0.04, ('pretty', 'good'): 0.04, ('super', 'super', 'delicious'): 0.01, ('also', 'quite', 'authentic'): 0.01, ('also', 'very', 'happy'): 0.01, ('so', 'very', 'Korean'): 0.01, ('actually', 'pretty', 'good'): 0.01}
4tjKsIaBXCDUP9PL82Vu_A
{('very', 'friendly'): 0.03, ('super', 'nice'): 0.03, ('very', 'attentive'): 0.03, ('really', 'nice'): 0.02, ('completely', 'worth'): 0.02, ('just', 'as', 'pricey'): 0.01, ('kind', 'of', 'uncomfortable'): 0.01, ('way', 'too', 'pricey'): 0.01, ('twice', 'as', 'much'): 0.01, ('far', 'too', 'much'): 0.01}
IUMyUYOIR9UQ7XGIEQKOuA
{('very', 'good'): 0.06, ('much', 'better'): 0.04, ('pretty', 'sure'): 0.03, ('as', 'good'): 0.03, ('how', 'good'): 0.02, ('just', 'so', 'ghetto'): 0.01, ('no', 'longer', 'available'): 0.01, ('kind', 'of', 'iffy'): 0.01, ('much', 'more', 'reasonable'): 0.01, ('very', 'very', 'salty'): 0.01}
i-2OzvZUDtvKCMq1vcRSZg

# Analysis

In [263]:
frequency_result_top_5_key_list = list(frequency_result_top_5)
frequency_result_top_5_value_list = list(frequency_result_top_5.values())


In [275]:
print("For business ",selected_id)
phrase_1 = frequency_result_top_5_key_list[0]
prob_1 = frequency_result_top_5_value_list[0]
print("The first adjective phrase is:",phrase_1)
print()
print("Its comparison with the rest of the stores is as follows: \n")
print("============== Other stores ==============")

for key in selected:
    values = selected[key]
    for value in values:
        if(value == phrase_1):
            print("Store: "+ key + " Probability: ", selected[key][value])
            if(float(prob_1) - selected[key][value]) >= 0.03:
                print("******* There is a big difference *******")

For business  tD7_aIUTlfzyMoMkXXwiZA
The first adjective phrase is: ('too', 'much')

Its comparison with the rest of the stores is as follows: 

Store: NnxwkiBvSSyKeFiez0DDcQ Probability:  0.04
Store: cPzIic1AqH1ApTVVaww5hw Probability:  0.07
Store: Jol7cXrDmKKmu_V6qp4wMA Probability:  0.05
Store: 0kPm1zEpeXFRg8D2phqgCQ Probability:  0.03
******* There is a big difference *******
Store: Mx8jCUdhaNbMgx_04DWHdA Probability:  0.05
Store: 793LPDn8axywoOANbDeAZw Probability:  0.03
******* There is a big difference *******
Store: WO3L0pmtAO8ozspmaVdHIg Probability:  0.03
******* There is a big difference *******
Store: zPEYgVqJ2QNKi45FJi2jvg Probability:  0.03
******* There is a big difference *******
Store: -7XWJYkutqhIxLen7Grg1g Probability:  0.04
Store: OjnRf8yDGEBCoUDdchSViw Probability:  0.06
Store: tD7_aIUTlfzyMoMkXXwiZA Probability:  0.06
Store: QqGMtc24VdCzYAajw1g4bA Probability:  0.04
Store: UDiMCb4LKowlozI5mhtO_A Probability:  0.03
******* There is a big difference *******
Store: a

In [276]:
print("For business ",selected_id)
phrase_2 = frequency_result_top_5_key_list[1]
prob_2 = frequency_result_top_5_value_list[1]
print("The first adjective phrase is:",phrase_2)
print()
print("Its comparison with the rest of the stores is as follows: \n")
print("============== Other stores ==============")

for key in selected:
    values = selected[key]
    for value in values:
        if(value == phrase_2):
            print("Store: "+ key + " Probability: ", selected[key][value])
            if(float(prob_2) - selected[key][value]) >= 0.03:
                print("******* There is a big difference *******")

For business  tD7_aIUTlfzyMoMkXXwiZA
The first adjective phrase is: ('very', 'good')

Its comparison with the rest of the stores is as follows: 

Store: IUMyUYOIR9UQ7XGIEQKOuA Probability:  0.06
Store: i-2OzvZUDtvKCMq1vcRSZg Probability:  0.03
******* There is a big difference *******
Store: WA7sC64kCRstywm2EgZXEw Probability:  0.11
Store: Jol7cXrDmKKmu_V6qp4wMA Probability:  0.07
Store: GUsYtGG557XKlHaMH87erg Probability:  0.06
Store: JqBtQ1bSynPHE9gbyuSSvA Probability:  0.11
Store: yEZn1XpLsEC9uBa-X4xAZw Probability:  0.05
Store: 6RbCJLiwNYwS6ab9vzD_zg Probability:  0.05
Store: R4R7ttLXfKKWM0VEMoaW4w Probability:  0.04
Store: vxuyl9IVum8zngXgvbT4Jg Probability:  0.03
******* There is a big difference *******
Store: eUVH2Vs_sHApAKSV4iT2FQ Probability:  0.09
Store: 4no7JcP4CYasqxdxmrTGRA Probability:  0.04
Store: mF2EW3twSrFPmT_RVV1-Qg Probability:  0.08
Store: hXzoNgpkC86K_Jfg_zMHvA Probability:  0.05
Store: xVpE01l6ZXdEtVf5PkRpDg Probability:  0.1
Store: IBYExgJ6jURNJOk_5uSrKg Probab

In [277]:
print("For business ",selected_id)
phrase_3 = frequency_result_top_5_key_list[2]
prob_3 = frequency_result_top_5_value_list[2]
print("The first adjective phrase is:",phrase_3)
print()
print("Its comparison with the rest of the stores is as follows: \n")
print("============== Other stores ==============")

for key in selected:
    values = selected[key]
    for value in values:
        if(value == phrase_3):
            print("Store: "+ key + " Probability: ", selected[key][value])
            if(float(prob_3) - selected[key][value]) >= 0.03:
                print("******* There is a big difference *******")

For business  tD7_aIUTlfzyMoMkXXwiZA
The first adjective phrase is: ('really', 'good')

Its comparison with the rest of the stores is as follows: 

Store: iK_3WktnHa_YlbWdA0Axkw Probability:  0.04
Store: j7HO1YeMQGYo3KibMXZ5vg Probability:  0.07
Store: cPzIic1AqH1ApTVVaww5hw Probability:  0.03
Store: WA7sC64kCRstywm2EgZXEw Probability:  0.08
Store: GUsYtGG557XKlHaMH87erg Probability:  0.04
Store: JqBtQ1bSynPHE9gbyuSSvA Probability:  0.06
Store: XA_m9daZl2VFDA6alnkBvg Probability:  0.03
Store: Mx8jCUdhaNbMgx_04DWHdA Probability:  0.04
Store: 793LPDn8axywoOANbDeAZw Probability:  0.05
Store: vxuyl9IVum8zngXgvbT4Jg Probability:  0.07
Store: WO3L0pmtAO8ozspmaVdHIg Probability:  0.03
Store: eUVH2Vs_sHApAKSV4iT2FQ Probability:  0.07
Store: 4no7JcP4CYasqxdxmrTGRA Probability:  0.06
Store: hXzoNgpkC86K_Jfg_zMHvA Probability:  0.06
Store: xVpE01l6ZXdEtVf5PkRpDg Probability:  0.09
Store: IBYExgJ6jURNJOk_5uSrKg Probability:  0.06
Store: p6FPcgLymnpk_gAyQuW_Mw Probability:  0.05
Store: d8Kz48UDfZ0M

In [270]:
print("For business ",selected_id)
phrase_4 = frequency_result_top_5_key_list[3]
prob_4 = frequency_result_top_5_value_list[3]
print("The first adjective phrase is:",phrase_4)
print()
print("Its comparison with the rest of the stores is as follows: \n")
print("============== Other stores ==============")

for key in selected:
    values = selected[key]
    for value in values:
        if(value == phrase_4):
            print("Store: "+ key + " Probability: ", selected[key][value])
            if(float(prob_4) - selected[key][value]) >= 0.03:
                print("******* There is a big difference *******")

For business  tD7_aIUTlfzyMoMkXXwiZA
The first adjective phrase is: ('so', 'much')

Its comparison with the rest of the stores is as follows: 

Store: NnxwkiBvSSyKeFiez0DDcQ Probability:  0.03
Store: Ai-GviVpPlti74NtM8y9QQ Probability:  0.02
******* There is a big difference *******
Store: KY2vtER_NgmonFNY_XFDpg Probability:  0.04
Store: z8Em-bhZI3Mmspml7tj6tg Probability:  0.03
Store: tD7_aIUTlfzyMoMkXXwiZA Probability:  0.05
Store: IeK0nzbMPbWnUBIPB83v6A Probability:  0.04
Store: QaFO4S6HFUu2NIbeu4OwCg Probability:  0.02
******* There is a big difference *******
Store: a6mYyhGgxWhnhrGDHzOTPA Probability:  0.03
Store: AEx2SYEUJmTxVVB18LlCwA Probability:  0.04
Store: e-YnECeZNt8ngm0tu4X9mQ Probability:  0.06


In [269]:
print("For business ",selected_id)
phrase_5 = frequency_result_top_5_key_list[4]
prob_5 = frequency_result_top_5_value_list[4]
print("The first adjective phrase is:",phrase_5)
print()
print("Its comparison with the rest of the stores is as follows: \n")
print("============== Other stores ==============")

for key in selected:
    values = selected[key]
    for value in values:
        if(value == phrase_5):
            print("Store: "+ key + " Probability: ", selected[key][value])
            if(float(prob_5) - selected[key][value]) >= 0.03:
                print("******* There is a big difference *******")

For business  tD7_aIUTlfzyMoMkXXwiZA
The first adjective phrase is: ('pleasantly', 'surprised')

Its comparison with the rest of the stores is as follows: 

Store: tD7_aIUTlfzyMoMkXXwiZA Probability:  0.03
Store: KjjNv4YFzBFnsBh2ncH1Iw Probability:  0.04
Store: pJnN5PxqFPGZW2pJWkQtzA Probability:  0.04
