# LDA Model Results

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
from gensim.models.ldamulticore import LdaMulticore
import itertools
from collections import Counter

## Load Model Results

In [3]:
ldam_fast_food_101050      = LdaMulticore.load('../models/ldam_fast_food_10_topics_10_terms_50_passes.model')
ldam_fast_food_501050      = LdaMulticore.load('../models/ldam_fast_food_50_topics_10_terms_50_passes.model')
ldam_non_fast_food_201550  = LdaMulticore.load('../models/ldam_non_fast_food_20_topics_15_terms_50_passes.model')
ldam_all_noun_151550       = LdaMulticore.load('../models/ldam_noun_15_topics_15_terms_50_passes.model')
ldam_all_noun_201550       = LdaMulticore.load('../models/ldam_noun_20_topics_15_terms_50_passes.model')
ldam_all_noun_501030       = LdaMulticore.load('../models/ldam_noun_50_topics_10_terms_30_passes.model')
ldam_all_noun_verbs_301550 = LdaMulticore.load('../models/ldam_noun_verb_30_topics_15_terms_50_passes.model')

## Pretty Printer Function

In [4]:
def print_topic_terms(model, num_topics=-1, num_words=10, unique=False):
    results = model.print_topics(num_topics=num_topics, num_words=num_words)
    if not unique:
        print('=============================== Terms Per Topic ===============================')
        for r in results:
            topic = r[0]
            term_list = r[1]

            term_list = term_list.split('"')[1::2]
            topic_terms = [term for term in term_list]
            print('{}\t{}'.format(topic, topic_terms))
    else:
        terms = [x[1] for x in results]
        term_lists = [x.split('"')[1::2] for x in terms]

        flatList = itertools.chain.from_iterable(term_lists)
        term_counts = Counter(flatList)

        # non_unique_terms = term_counts
        test = dict(term_counts)

        # extract terms that appear more than once
        non_unique_terms = [key for key, value in test.items() if value > 1]
        
        
        print('============================ Unique Terms Per Topic ===========================')
        for r in results:
            topic = r[0]
            term_list = r[1]

            term_list = term_list.split('"')[1::2]
            topic_terms = [term for term in term_list if term not in non_unique_terms]
            print('{}\t{}'.format(topic, topic_terms))

In [5]:
print_topic_terms(ldam_fast_food_101050, unique=True)

0	['go', 'best', 'menu', 'style']
1	['pizza', 'drink', 'ice', 'topping', 'free', 'tea', 'pie', 'crust']
2	['great', 'friendly', 'fresh', 'delicious']
3	['rice', 'wei', 'pei', 'panda', 'dish']
4	['dog', 'hot', 'got', 'sauce', 'wing']
5	['sandwich', 'sub', 'cheese', 'bread', 'meat']
6	['would', 'table', 'star', 'eat', 'people', 'bad']
7	['taco', 'burrito', 'chipotle', 'bell', 'bean', 'salsa', 'chip', 'mexican']
8	['always', 'location', 'fast']
9	['minute', 'customer', 'asked', 'said']


## Fast Food Model - Best Choice

In [6]:
print_topic_terms(ldam_fast_food_501050)

0	['chipotle', 'bowl', 'burrito', 'rice', 'bean', 'meat', 'chicken', 'steak', 'guacamole', 'get']
1	['taco', 'bell', 'burrito', 'mexican', 'salsa', 'carne', 'asada', 'good', 'food', 'tortilla']
2	['counter', 'one', 'behind', 'people', 'employee', 'order', 'line', 'guy', 'working', 'front']
3	['cold', 'fry', 'food', 'old', 'never', 'hot', 'warm', 'ever', 'even', 'fresh']
4	['lunch', 'quick', 'rush', 'dinner', 'work', 'stopped', 'get', 'today', 'busy', 'grab']
5	['drive', 'thru', 'window', 'car', 'minute', 'get', 'order', 'food', 'inside', 'wait']
6	['panda', 'express', 'job', 'team', 'professional', 'thanks', 'thank', 'polite', 'guy', 'done']
7	['asked', 'said', 'told', 'back', 'ordered', 'wanted', 'got', 'went', 'would', 'could']
8	['box', 'jack', 'server', 'surprised', 'waitress', 'rocket', 'johnny', 'mall', 'pleasantly', 'la']
9	['chicken', 'wei', 'pei', 'rice', 'food', 'dish', 'roll', 'orange', 'chinese', 'teriyaki']
10	['place', 'try', 'back', 'best', 'great', 'good', 'go', 'time',

In [7]:
print_topic_terms(ldam_fast_food_501050, unique=True)

0	['chipotle', 'bowl', 'bean', 'steak', 'guacamole']
1	['taco', 'bell', 'mexican', 'salsa', 'carne', 'asada', 'tortilla']
2	['counter', 'behind', 'working', 'front']
3	['cold', 'old', 'warm', 'even']
4	['lunch', 'quick', 'rush', 'dinner', 'work', 'stopped', 'today', 'busy', 'grab']
5	['drive', 'thru', 'window']
6	['panda', 'express', 'job', 'team', 'professional', 'thanks', 'thank', 'polite', 'done']
7	['asked', 'said', 'told', 'wanted']
8	['box', 'jack', 'server', 'surprised', 'waitress', 'rocket', 'johnny', 'mall', 'pleasantly', 'la']
9	['wei', 'pei', 'dish', 'roll', 'orange', 'chinese', 'teriyaki']
10	['try', 'owner', 'definitely']
11	['new', 'visit', 'particular', 'need']
12	['dirty', 'dining', 'kid', 'room', 'bathroom', 'floor']
13	['fish', 'chip', 'bar', 'priced', 'outstanding', 'beer', 'game', 'reasonably', 'selection']
14	['lot', 'parking', 'across', 'street', 'wall', 'hole', 'space', 'sooo', 'spot', 'basic']
15	['called', 'call', 'phone', 'number']
16	['better', 'much', 'littl

___

In [8]:
print_topic_terms(ldam_non_fast_food_201550)

0	['place', 'back', 'try', 'got', 'time', 'good', 'really', 'went', 'first', 'ordered']
1	['great', 'bar', 'highly', 'patio', 'beer', 'atmosphere', 'place', 'nice', 'outside', 'area']
2	['salad', 'dessert', 'dish', 'bread', 'wine', 'pasta', 'meal', 'sauce', 'delicious', 'cheese']
3	['like', 'food', 'place', 'would', 'could', 'one', 'taste', 'bad', 'even', 'really']
4	['steak', 'shrimp', 'fish', 'fried', 'cooked', 'seafood', 'ordered', 'crab', 'potato', 'good']
5	['place', 'always', 'love', 'best', 'food', 'get', 'go', 'great', 'time', 'favorite']
6	['chicken', 'rice', 'dish', 'thai', 'soup', 'chinese', 'food', 'noodle', 'pho', 'sauce']
7	['breakfast', 'egg', 'coffee', 'pancake', 'bacon', 'morning', 'toast', 'waffle', 'potato', 'brunch']
8	['great', 'food', 'service', 'friendly', 'good', 'staff', 'place', 'back', 'delicious', 'definitely']
9	['bbq', 'pork', 'rib', 'meat', 'sauce', 'side', 'brisket', 'mac', 'cheese', 'pulled']
10	['burger', 'sandwich', 'fry', 'cheese', 'onion', 'bread', 

In [9]:
print_topic_terms(ldam_all_noun_151550)

0	['location', 'place', 'phoenix', 'restaurant', 'area', 'parking', 'lot', 'year', 'make', 'day']
1	['steak', 'salad', 'dessert', 'dish', 'meal', 'restaurant', 'menu', 'dinner', 'bread', 'pasta']
2	['pho', 'tea', 'dog', 'ice', 'cream', 'spring', 'beef', 'broth', 'lemonade', 'drink']
3	['shrimp', 'fish', 'seafood', 'crab', 'soda', 'cup', 'sauce', 'lobster', 'groupon', 'drink']
4	['table', 'food', 'drink', 'server', 'bar', 'service', 'time', 'night', 'hour', 'restaurant']
5	['burger', 'fry', 'onion', 'place', 'cheese', 'time', 'order', 'shake', 'ring', 'bacon']
6	['pizza', 'wing', 'place', 'cheese', 'sauce', 'time', 'slice', 'pie', 'love', 'try']
7	['beer', 'wall', 'place', 'game', 'selection', 'music', 'bathroom', 'tv', 'smell', 'hole']
8	['food', 'place', 'service', 'love', 'time', 'restaurant', 'price', 'staff', 'family', 'try']
9	['sushi', 'rice', 'place', 'chicken', 'soup', 'dish', 'spicy', 'food', 'noodle', 'curry']
10	['sandwich', 'salad', 'lunch', 'chicken', 'bread', 'place', 'pi

In [10]:
print_topic_terms(ldam_all_noun_201550)

0	['sushi', 'fish', 'shrimp', 'crab', 'place', 'seafood', 'tuna', 'chef', 'salmon', 'time']
1	['breakfast', 'egg', 'coffee', 'bacon', 'waffle', 'morning', 'place', 'toast', 'potato', 'gravy']
2	['dessert', 'cream', 'dish', 'menu', 'meal', 'dinner', 'restaurant', 'cake', 'appetizer', 'chocolate']
3	['chicken', 'sauce', 'meat', 'bbq', 'pork', 'rib', 'side', 'flavor', 'potato', 'tender']
4	['taco', 'street', 'fish', 'bell', 'tuesday', 'shrimp', 'shop', 'nacho', 'pastor', 'shell']
5	['love', 'place', 'food', 'family', 'time', 'service', 'kid', 'staff', 'year', 'owner']
6	['location', 'restaurant', 'phoenix', 'place', 'area', 'parking', 'lot', 'wall', 'spot', 'menu']
7	['steak', 'pita', 'gyro', 'chicken', 'salad', 'hummus', 'meat', 'wrap', 'plate', 'lamb']
8	['table', 'food', 'server', 'service', 'restaurant', 'time', 'drink', 'minute', 'waiter', 'meal']
9	['salsa', 'chip', 'bean', 'food', 'cheese', 'rice', 'tortilla', 'margarita', 'sauce', 'chicken']
10	['place', 'review', 'food', 'star', 

In [11]:
print_topic_terms(ldam_all_noun_501030)

0	['food', 'service', 'price', 'place', 'quality', 'restaurant', 'portion', 'time', 'atmosphere', 'size']
1	['kid', 'coupon', 'tuesday', 'deal', 'dad', 'adult', 'regret', 'meal', 'def', 'b']
2	['burger', 'fry', 'onion', 'place', 'cheese', 'shake', 'ring', 'bacon', 'well', 'time']
3	['star', 'review', 'yelp', 'reason', 'read', 'rating', 'rate', 'update', 'living', 'state']
4	['lunch', 'day', 'today', 'time', 'work', 'week', 'dinner', 'spot', 'service', 'yesterday']
5	['spicy', 'curry', 'food', 'spice', 'dish', 'chicken', 'pad', 'rice', 'flavor', 'place']
6	['bowl', 'chicken', 'rice', 'noodle', 'egg', 'food', 'soup', 'ramen', 'sauce', 'place']
7	['chicken', 'strip', 'mall', 'waffle', 'piece', 'breast', 'tender', 'crispy', 'shawarma', 'meal']
8	['dish', 'restaurant', 'menu', 'course', 'meal', 'chef', 'well', 'cuisine', 'flavor', 'try']
9	['sauce', 'onion', 'pepper', 'tomato', 'flavor', 'mushroom', 'oil', 'cheese', 'taste', 'lemon']
10	['box', 'bomb', 'chop', 'stew', 'claim', 'alot', 'vaca

In [12]:
print_topic_terms(ldam_all_noun_verbs_301550)

0	['shrimp', 'fish', 'crab', 'seafood', 'lobster', 'fried', 'leg', 'sauce', 'clam', 'oyster']
1	['parking', 'door', 'lot', 'closed', 'drive', 'car', 'sign', 'street', 'find', 'mile']
2	['food', 'eat', 'place', 'could', 'got', 'tasted', 'would', 'go', 'eating', 'looked']
3	['u', 'server', 'food', 'drink', 'service', 'came', 'time', 'waiter', 'table', 'ordered']
4	['flavor', 'sauce', 'dish', 'spicy', 'taste', 'bland', 'spice', 'bit', 'ordered', 'would']
5	['ice', 'cream', 'bowl', 'soda', 'cup', 'drink', 'water', 'coke', 'machine', 'fountain']
6	['taco', 'mexican', 'salsa', 'burrito', 'chip', 'bean', 'food', 'cheese', 'tortilla', 'margarita']
7	['lunch', 'salad', 'tea', 'curry', 'today', 'pad', 'soup', 'dinner', 'lettuce', 'went']
8	['phoenix', 'year', 'arizona', 'know', 'lived', 'find', 'life', 'city', 'moved', 'mouth']
9	['star', 'place', 'review', 'give', 'would', 'try', 'say', 'yelp', 'think', 'go']
10	['customer', 'service', 'food', 'employee', 'owner', 'people', 'make', 'need', 'guy

# Assigning Topic to Reviews

In [13]:
import pandas as pd

In [14]:
reviews = pd.read_csv('../clean_data/az_restaurant_reviews.csv', index_col=0)
reviews.head(3)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id,is_fast_food,review_len
0,JlNeaOymdVbE6_bubqjohg,0,2014-08-09,0.0,BF0ANB54sc_f-3_howQBCg,1.0,we always go to the chevo's in chandler which ...,3.0,ssuXFjkH4neiBgwv-oN4IA,0.0,422.0
1,0Rni7ocMC_Lg2UH0lDeKMQ,0,2014-08-09,0.0,DbLUpPT61ykLTakknCF9CQ,1.0,this place is always so dirty and grimy been t...,6.0,ssuXFjkH4neiBgwv-oN4IA,0.0,111.0
2,S-oLPRdhlyL5HAknBKTUcQ,0,2017-11-30,0.0,z_mVLygzPn8uHp63SSCErw,4.0,holy portion sizes! you get a lot of bang for ...,0.0,MzEnYCyZlRYQRISNMXTWIg,0.0,130.0


In [16]:
biz = pd.read_csv('../clean_data/az_restaurant_business_clean.csv', index_col=0)
biz = biz.iloc[:,:9].copy()
biz.head(3)

Unnamed: 0,address,business_id,categories,city,is_open,name,postal_code,review_count,stars
0,"777 E Thunderbird Rd, Ste 107",1WBkAuQg81kokZIPMpn9Zg,"['burgers', 'restaurants']",Phoenix,1,Charr An American Burger Bar,85022.0,232,3.0
1,10720 E Southern Ave,kKx8iCJkomVQBdWHnmmOiA,"['restaurants', 'pizza']",Mesa,1,Little Caesars Pizza,85209.0,4,2.5
2,"8140 N Hayden Rd, Ste H115",VdlPZg2NAu8t8GkdbPLecg,"['restaurants', 'gluten-free', 'indian', 'seaf...",Scottsdale,1,Tandoori Times Indian Bistro,85258.0,263,3.5


In [17]:
review_df = reviews.merge(biz[['name', 'business_id']], on='business_id', how='left')

In [18]:
review_df.head(5)

Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id,is_fast_food,review_len,name
0,JlNeaOymdVbE6_bubqjohg,0,2014-08-09,0.0,BF0ANB54sc_f-3_howQBCg,1.0,we always go to the chevo's in chandler which ...,3.0,ssuXFjkH4neiBgwv-oN4IA,0.0,422.0,Papa Chevo's Taco Shop
1,0Rni7ocMC_Lg2UH0lDeKMQ,0,2014-08-09,0.0,DbLUpPT61ykLTakknCF9CQ,1.0,this place is always so dirty and grimy been t...,6.0,ssuXFjkH4neiBgwv-oN4IA,0.0,111.0,Barro's Pizza
2,S-oLPRdhlyL5HAknBKTUcQ,0,2017-11-30,0.0,z_mVLygzPn8uHp63SSCErw,4.0,holy portion sizes! you get a lot of bang for ...,0.0,MzEnYCyZlRYQRISNMXTWIg,0.0,130.0,Harumi Sushi
3,iIjVO7cLD1UEmIO7G05Ujw,0,2016-06-11,0.0,xatycgntu_F_Ioyny3iflw,4.0,flavor was actually pretty good. not used to e...,0.0,vaXJ7-xLrnD6FAEhUqYKwQ,1.0,309.0,Rosita's Place
4,1JF9TbJ2d5hH8xsQvvklHg,0,2016-06-18,0.0,Z7U7MMef6Tbj_ZbSFzLRUw,5.0,this is place very great flavor. server was on...,1.0,vaXJ7-xLrnD6FAEhUqYKwQ,0.0,476.0,Irma's Kitchen


In [31]:
non_fast_food = reviews[reviews.is_fast_food == 0].copy()
non_fast_food.head(3)

Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id,is_fast_food,review_len
0,JlNeaOymdVbE6_bubqjohg,0,2014-08-09,0.0,BF0ANB54sc_f-3_howQBCg,1.0,we always go to the chevo's in chandler which ...,3.0,ssuXFjkH4neiBgwv-oN4IA,0.0,422.0
1,0Rni7ocMC_Lg2UH0lDeKMQ,0,2014-08-09,0.0,DbLUpPT61ykLTakknCF9CQ,1.0,this place is always so dirty and grimy been t...,6.0,ssuXFjkH4neiBgwv-oN4IA,0.0,111.0
2,S-oLPRdhlyL5HAknBKTUcQ,0,2017-11-30,0.0,z_mVLygzPn8uHp63SSCErw,4.0,holy portion sizes! you get a lot of bang for ...,0.0,MzEnYCyZlRYQRISNMXTWIg,0.0,130.0


In [19]:
chipotle_ids = biz[biz.name == 'Chipotle Mexican Grill'].business_id.values

In [20]:
chipotle_reviews = reviews[reviews.business_id.isin(chipotle_ids)].copy()
chipotle_reviews.head(3)

Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id,is_fast_food,review_len
140,uovqgCcWIqTwUeH_A54t2A,0,2014-05-15,0.0,VSVXCLsGO_MduBX4nfE6dw,2.0,this location is not as stellar as others. the...,1.0,Jt4u7qnfrk35buainfOuGA,1.0,213.0
177,uovqgCcWIqTwUeH_A54t2A,1,2008-05-10,1.0,-niC6oq1n697C-dIDWaOnA,4.0,i have always been a fan of chipotle's food bu...,2.0,qYxGJKlYrqNgodzMWHaaGw,1.0,1177.0
213,wAXYLmHuysYTz8i4VPKmaQ,0,2011-02-02,0.0,5MAbf8n0niuIRU1P4rdhZw,4.0,everyone already knows how good chipotle is bu...,0.0,4I_woZLXCO9jaVZvDi18CA,1.0,266.0


In [25]:
def print_top_n_review_topics(model, review, n_topics=5, valid_topics = []):
    review_topic_categories = []
    for word in review.split(' '):
        try:
            r = model.get_term_topics(word_id = word)
            [review_topic_categories.append(x[0]) for x in r]
        except:
            pass
    
    print(review)
    print()

    top_n_topics = [x[0] for x in Counter(review_topic_categories).most_common(n_topics)]
    
    if len(valid_topics) > 0:
        # prune to only topics we care about
        topics = [topic for topic in top_n_topics if topic in valid_topics]
    else:
        topics = top_n_topics

    for n in topics:
        print('Topic {:d}'.format(n))
        print('\t{}\n'.format(model.print_topic(n)))

In [None]:
# Sample Bad Review topic Assignment

In [42]:

sample_review = reviews[reviews.review_id == 'DbLUpPT61ykLTakknCF9CQ'].text.values[0]

print_top_n_review_topics(ldam_fast_food_501050, sample_review, n_topics=5, valid_topics = [12, 24, 27, 35, 43, 48])

this place is always so dirty and grimy been there twice and will not be back.  customer service is horrible!!!

Topic 12
	0.047*"dirty" + 0.045*"clean" + 0.039*"place" + 0.030*"food" + 0.030*"dining" + 0.030*"area" + 0.030*"kid" + 0.026*"room" + 0.025*"bathroom" + 0.024*"floor"

Topic 48
	0.089*"great" + 0.055*"food" + 0.053*"friendly" + 0.044*"service" + 0.042*"staff" + 0.040*"place" + 0.025*"delicious" + 0.021*"fresh" + 0.020*"clean" + 0.020*"awesome"

Topic 35
	0.113*"customer" + 0.112*"service" + 0.035*"employee" + 0.030*"rude" + 0.021*"manager" + 0.017*"ever" + 0.016*"worst" + 0.015*"food" + 0.015*"bad" + 0.015*"attitude"



In [None]:
z_mVLygzPn8uHp63SSCErw

In [30]:
for rev in chipotle_reviews.text[:10]:
    print_top_n_review_topics(ldam_fast_food_501050, rev, n_topics=5, valid_topics = [12, 24, 27, 35, 43, 48])
    print('='*80)

this location is not as stellar as others. they usually have a bit of a wait and the regular employees do not seem thrilled to be there. the food is fine but the customer service alone makes it a chore to stop by.

Topic 35
	0.113*"customer" + 0.112*"service" + 0.035*"employee" + 0.030*"rude" + 0.021*"manager" + 0.017*"ever" + 0.016*"worst" + 0.015*"food" + 0.015*"bad" + 0.015*"attitude"

Topic 43
	0.086*"order" + 0.076*"minute" + 0.061*"wait" + 0.036*"food" + 0.031*"time" + 0.027*"line" + 0.023*"ready" + 0.023*"long" + 0.023*"waiting" + 0.022*"waited"

i have always been a fan of chipotle's food but haven't really been motivated to sit my ample butt down and give them a review.  sure the tasty burritos the size of mom's meatloaf should be worth a few kind words and their delicious guacamole is some of the best at a fast food style mexican place that i have had.

what changed my mind and inspired me to write this was a recent article i read in newsweek about the effort they are making 