# LDA Model Results

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
from gensim.models.ldamulticore import LdaMulticore
import itertools
from collections import Counter

## Load Model Results

In [3]:
ldam_fast_food_101050      = LdaMulticore.load('../models/ldam_fast_food_10_topics_10_terms_50_passes.model')
ldam_fast_food_501050      = LdaMulticore.load('../models/ldam_fast_food_50_topics_10_terms_50_passes.model')
ldam_non_fast_food_201550  = LdaMulticore.load('../models/ldam_non_fast_food_20_topics_15_terms_50_passes.model')
ldam_all_noun_151550       = LdaMulticore.load('../models/ldam_noun_15_topics_15_terms_50_passes.model')
ldam_all_noun_201550       = LdaMulticore.load('../models/ldam_noun_20_topics_15_terms_50_passes.model')
ldam_all_noun_501030       = LdaMulticore.load('../models/ldam_noun_50_topics_10_terms_30_passes.model')

## Pretty Printer Function

In [4]:
def print_topic_terms(model, num_topics=-1, num_words=10, unique=False):
    results = model.print_topics(num_topics=num_topics, num_words=num_words)
    if not unique:
        print('=============================== Terms Per Topic ===============================')
        for r in results:
            topic = r[0]
            term_list = r[1]

            term_list = term_list.split('"')[1::2]
            topic_terms = [term for term in term_list]
            print('{}\t{}'.format(topic, topic_terms))
    else:
        terms = [x[1] for x in results]
        term_lists = [x.split('"')[1::2] for x in terms]

        flatList = itertools.chain.from_iterable(term_lists)
        term_counts = Counter(flatList)

        # non_unique_terms = term_counts
        test = dict(term_counts)

        # extract terms that appear more than once
        non_unique_terms = [key for key, value in test.items() if value > 1]
        
        
        print('============================ Unique Terms Per Topic ===========================')
        for r in results:
            topic = r[0]
            term_list = r[1]

            term_list = term_list.split('"')[1::2]
            topic_terms = [term for term in term_list if term not in non_unique_terms]
            print('{}\t{}'.format(topic, topic_terms))

In [31]:
print_topic_terms(ldam_fast_food_101050, unique=True)

0	['go', 'best', 'menu', 'style']
1	['pizza', 'drink', 'ice', 'topping', 'free', 'tea', 'pie', 'crust']
2	['great', 'friendly', 'fresh', 'delicious']
3	['rice', 'wei', 'pei', 'panda', 'dish']
4	['dog', 'hot', 'got', 'sauce', 'wing']
5	['sandwich', 'sub', 'cheese', 'bread', 'meat']
6	['would', 'table', 'star', 'eat', 'people', 'bad']
7	['taco', 'burrito', 'chipotle', 'bell', 'bean', 'salsa', 'chip', 'mexican']
8	['always', 'location', 'fast']
9	['minute', 'customer', 'asked', 'said']


In [29]:
print_topic_terms(ldam_fast_food_501050)

0	['chipotle', 'bowl', 'burrito', 'rice', 'bean', 'meat', 'chicken', 'steak', 'guacamole', 'get']
1	['taco', 'bell', 'burrito', 'mexican', 'salsa', 'carne', 'asada', 'good', 'food', 'tortilla']
2	['counter', 'one', 'behind', 'people', 'employee', 'order', 'line', 'guy', 'working', 'front']
3	['cold', 'fry', 'food', 'old', 'never', 'hot', 'warm', 'ever', 'even', 'fresh']
4	['lunch', 'quick', 'rush', 'dinner', 'work', 'stopped', 'get', 'today', 'busy', 'grab']
5	['drive', 'thru', 'window', 'car', 'minute', 'get', 'order', 'food', 'inside', 'wait']
6	['panda', 'express', 'job', 'team', 'professional', 'thanks', 'thank', 'polite', 'guy', 'done']
7	['asked', 'said', 'told', 'back', 'ordered', 'wanted', 'got', 'went', 'would', 'could']
8	['box', 'jack', 'server', 'surprised', 'waitress', 'rocket', 'johnny', 'mall', 'pleasantly', 'la']
9	['chicken', 'wei', 'pei', 'rice', 'food', 'dish', 'roll', 'orange', 'chinese', 'teriyaki']
10	['place', 'try', 'back', 'best', 'great', 'good', 'go', 'time',

In [23]:
print_topic_terms(ldam_non_fast_food_201550)

0	['place', 'back', 'try', 'got', 'time', 'good', 'really', 'went', 'first', 'ordered']
1	['great', 'bar', 'highly', 'patio', 'beer', 'atmosphere', 'place', 'nice', 'outside', 'area']
2	['salad', 'dessert', 'dish', 'bread', 'wine', 'pasta', 'meal', 'sauce', 'delicious', 'cheese']
3	['like', 'food', 'place', 'would', 'could', 'one', 'taste', 'bad', 'even', 'really']
4	['steak', 'shrimp', 'fish', 'fried', 'cooked', 'seafood', 'ordered', 'crab', 'potato', 'good']
5	['place', 'always', 'love', 'best', 'food', 'get', 'go', 'great', 'time', 'favorite']
6	['chicken', 'rice', 'dish', 'thai', 'soup', 'chinese', 'food', 'noodle', 'pho', 'sauce']
7	['breakfast', 'egg', 'coffee', 'pancake', 'bacon', 'morning', 'toast', 'waffle', 'potato', 'brunch']
8	['great', 'food', 'service', 'friendly', 'good', 'staff', 'place', 'back', 'delicious', 'definitely']
9	['bbq', 'pork', 'rib', 'meat', 'sauce', 'side', 'brisket', 'mac', 'cheese', 'pulled']
10	['burger', 'sandwich', 'fry', 'cheese', 'onion', 'bread', 

In [24]:
print_topic_terms(ldam_all_noun_151550)

0	['location', 'place', 'phoenix', 'restaurant', 'area', 'parking', 'lot', 'year', 'make', 'day']
1	['steak', 'salad', 'dessert', 'dish', 'meal', 'restaurant', 'menu', 'dinner', 'bread', 'pasta']
2	['pho', 'tea', 'dog', 'ice', 'cream', 'spring', 'beef', 'broth', 'lemonade', 'drink']
3	['shrimp', 'fish', 'seafood', 'crab', 'soda', 'cup', 'sauce', 'lobster', 'groupon', 'drink']
4	['table', 'food', 'drink', 'server', 'bar', 'service', 'time', 'night', 'hour', 'restaurant']
5	['burger', 'fry', 'onion', 'place', 'cheese', 'time', 'order', 'shake', 'ring', 'bacon']
6	['pizza', 'wing', 'place', 'cheese', 'sauce', 'time', 'slice', 'pie', 'love', 'try']
7	['beer', 'wall', 'place', 'game', 'selection', 'music', 'bathroom', 'tv', 'smell', 'hole']
8	['food', 'place', 'service', 'love', 'time', 'restaurant', 'price', 'staff', 'family', 'try']
9	['sushi', 'rice', 'place', 'chicken', 'soup', 'dish', 'spicy', 'food', 'noodle', 'curry']
10	['sandwich', 'salad', 'lunch', 'chicken', 'bread', 'place', 'pi

In [25]:
print_topic_terms(ldam_all_noun_201550)

0	['sushi', 'fish', 'shrimp', 'crab', 'place', 'seafood', 'tuna', 'chef', 'salmon', 'time']
1	['breakfast', 'egg', 'coffee', 'bacon', 'waffle', 'morning', 'place', 'toast', 'potato', 'gravy']
2	['dessert', 'cream', 'dish', 'menu', 'meal', 'dinner', 'restaurant', 'cake', 'appetizer', 'chocolate']
3	['chicken', 'sauce', 'meat', 'bbq', 'pork', 'rib', 'side', 'flavor', 'potato', 'tender']
4	['taco', 'street', 'fish', 'bell', 'tuesday', 'shrimp', 'shop', 'nacho', 'pastor', 'shell']
5	['love', 'place', 'food', 'family', 'time', 'service', 'kid', 'staff', 'year', 'owner']
6	['location', 'restaurant', 'phoenix', 'place', 'area', 'parking', 'lot', 'wall', 'spot', 'menu']
7	['steak', 'pita', 'gyro', 'chicken', 'salad', 'hummus', 'meat', 'wrap', 'plate', 'lamb']
8	['table', 'food', 'server', 'service', 'restaurant', 'time', 'drink', 'minute', 'waiter', 'meal']
9	['salsa', 'chip', 'bean', 'food', 'cheese', 'rice', 'tortilla', 'margarita', 'sauce', 'chicken']
10	['place', 'review', 'food', 'star', 

In [26]:
print_topic_terms(ldam_all_noun_501030)

0	['food', 'service', 'price', 'place', 'quality', 'restaurant', 'portion', 'time', 'atmosphere', 'size']
1	['kid', 'coupon', 'tuesday', 'deal', 'dad', 'adult', 'regret', 'meal', 'def', 'b']
2	['burger', 'fry', 'onion', 'place', 'cheese', 'shake', 'ring', 'bacon', 'well', 'time']
3	['star', 'review', 'yelp', 'reason', 'read', 'rating', 'rate', 'update', 'living', 'state']
4	['lunch', 'day', 'today', 'time', 'work', 'week', 'dinner', 'spot', 'service', 'yesterday']
5	['spicy', 'curry', 'food', 'spice', 'dish', 'chicken', 'pad', 'rice', 'flavor', 'place']
6	['bowl', 'chicken', 'rice', 'noodle', 'egg', 'food', 'soup', 'ramen', 'sauce', 'place']
7	['chicken', 'strip', 'mall', 'waffle', 'piece', 'breast', 'tender', 'crispy', 'shawarma', 'meal']
8	['dish', 'restaurant', 'menu', 'course', 'meal', 'chef', 'well', 'cuisine', 'flavor', 'try']
9	['sauce', 'onion', 'pepper', 'tomato', 'flavor', 'mushroom', 'oil', 'cheese', 'taste', 'lemon']
10	['box', 'bomb', 'chop', 'stew', 'claim', 'alot', 'vaca

# Assigning Topic to Reviews

In [13]:
import pandas as pd

In [14]:
reviews = pd.read_csv('../clean_data/az_restaurant_reviews.csv', index_col=0)
reviews.head(3)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id
0,JlNeaOymdVbE6_bubqjohg,0,2014-08-09,0.0,BF0ANB54sc_f-3_howQBCg,1.0,We always go to the chevo's in chandler which ...,3.0,ssuXFjkH4neiBgwv-oN4IA
1,0Rni7ocMC_Lg2UH0lDeKMQ,0,2014-08-09,0.0,DbLUpPT61ykLTakknCF9CQ,1.0,This place is always so dirty and grimy been t...,6.0,ssuXFjkH4neiBgwv-oN4IA
2,S-oLPRdhlyL5HAknBKTUcQ,0,2017-11-30,0.0,z_mVLygzPn8uHp63SSCErw,4.0,Holy portion sizes! You get a lot of bang for ...,0.0,MzEnYCyZlRYQRISNMXTWIg


In [15]:
biz = pd.read_csv('../clean_data/business_clean.csv', index_col=0)
biz = biz.iloc[:,:9].copy()
biz.head(3)

Unnamed: 0,business_id,address,categories,city,is_open,name,postal_code,review_count,stars
0,rDMptJYWtnMhpQu_rRXHng,719 E Thunderbird Rd,"['fast_food', 'burgers', 'restaurants']",Phoenix,1,McDonald's,85022.0,10,1.0
1,1WBkAuQg81kokZIPMpn9Zg,"777 E Thunderbird Rd, Ste 107","['burgers', 'restaurants']",Phoenix,1,Charr An American Burger Bar,85022.0,232,3.0
2,iPa__LOhse-hobC2Xmp-Kw,1635 E Camelback Rd,"['restaurants', 'burgers', 'fast_food']",Phoenix,1,McDonald's,85016.0,34,3.0


In [16]:
review_df = reviews.merge(biz[['name', 'business_id']], on='business_id', how='left')

In [17]:
review_df.head(5)

Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id,name
0,JlNeaOymdVbE6_bubqjohg,0,2014-08-09,0.0,BF0ANB54sc_f-3_howQBCg,1.0,We always go to the chevo's in chandler which ...,3.0,ssuXFjkH4neiBgwv-oN4IA,Papa Chevo's Taco Shop
1,0Rni7ocMC_Lg2UH0lDeKMQ,0,2014-08-09,0.0,DbLUpPT61ykLTakknCF9CQ,1.0,This place is always so dirty and grimy been t...,6.0,ssuXFjkH4neiBgwv-oN4IA,Barro's Pizza
2,S-oLPRdhlyL5HAknBKTUcQ,0,2017-11-30,0.0,z_mVLygzPn8uHp63SSCErw,4.0,Holy portion sizes! You get a lot of bang for ...,0.0,MzEnYCyZlRYQRISNMXTWIg,Harumi Sushi
3,iIjVO7cLD1UEmIO7G05Ujw,0,2016-06-11,0.0,xatycgntu_F_Ioyny3iflw,4.0,Flavor was actually pretty good. Not used to e...,0.0,vaXJ7-xLrnD6FAEhUqYKwQ,Rosita's Place
4,1JF9TbJ2d5hH8xsQvvklHg,0,2016-06-18,0.0,Z7U7MMef6Tbj_ZbSFzLRUw,5.0,This is place very great flavor. Server was on...,1.0,vaXJ7-xLrnD6FAEhUqYKwQ,Irma's Kitchen


In [20]:
tmp = biz.groupby('name').count()['business_id'].to_frame()
tmp.sort_values('business_id', inplace=True, ascending=False)
tmp.head(50)

Unnamed: 0_level_0,business_id
name,Unnamed: 1_level_1
Subway,246
McDonald's,158
Taco Bell,109
Burger King,91
Pizza Hut,68
Panda Express,66
Wendy's,61
Filiberto's Mexican Food,58
Chipotle Mexican Grill,56
Sonic Drive-In,50


In [None]:
chipotle_ids = biz[biz.name == 'Ha Ha China'].business_id.values

In [None]:
chipotle_reviews = reviews[reviews.business_id.isin(chipotle_ids)].copy()
chipotle_reviews.head(3)

In [None]:
def print_top_n_review_topics(model, review, n_topics=5):
    review_topic_categories = []
    for word in review.split(' '):
        try:
            r = model.get_term_topics(word_id = word)
            [review_topic_categories.append(x[0]) for x in r]
        except:
            pass
    
    print(review)
    print()

    top_n_topics = [x[0] for x in Counter(review_topic_categories).most_common(n_topics)]

    for n in top_n_topics:
        print('Topic {:d}'.format(n))
        print('\t{}\n'.format(model.print_topic(n)))

In [None]:
# sample_review = reviews.text[10330]
# model = ldam_all_noun_201550

# print_top_n_review_topics(model, sample_review, n_topics=5)


In [None]:
for rev in chipotle_reviews.text[:10]:
    print_top_n_review_topics(ldam_all_noun_201550, rev, n_topics=3)
    print('='*80)