# Restaurant Data EDA

In [1]:
%matplotlib inline
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from glob import glob

import seaborn as sns
sns.set()
# sns.set_style('whitegrid')
# sns.set_context("poster")

In [2]:
def time_marker(text=''):
    print('[{}] {}'.format(datetime.datetime.now().time(), text))

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

import matplotlib
font = {'size' : 50}
matplotlib.rc('font', **font)

TITLE_FONT_SIZE = 25
LABEL_FONT_SIZE = 15
TICK_FONT_SIZE  = 15

FIG_SIZE = (15,6)
FIG_SIZE_SQUARE = (15,15)
DO_WRITE_CHARTS = True

# Load Restaurant Data

In [4]:
time_marker(text='Loading Non Fast Food Data...')
restaurants = pd.read_csv('../clean_data/az_restaurant_business_clean.csv', index_col=0)
restaurants.reset_index(inplace=True, drop=True)
time_marker(text='Complete!')

[12:34:31.154817] Loading Non Fast Food Data...
[12:34:31.216084] Complete!


In [5]:
restaurants['is_open'] = restaurants['is_open'].apply(lambda x: 'True' if x == 1 else 'False')
restaurants['is_fast_food'] = restaurants['is_fast_food'].apply(lambda x: 'True' if x == 1 else 'False')

In [6]:
restaurants.head()

Unnamed: 0,address,business_id,categories,city,is_open,name,postal_code,review_count,stars,is_fast_food
0,"777 E Thunderbird Rd, Ste 107",1WBkAuQg81kokZIPMpn9Zg,"['burgers', 'restaurants']",Phoenix,True,Charr An American Burger Bar,85022.0,232,3.0,False
1,10720 E Southern Ave,kKx8iCJkomVQBdWHnmmOiA,"['restaurants', 'pizza']",Mesa,True,Little Caesars Pizza,85209.0,4,2.5,False
2,"8140 N Hayden Rd, Ste H115",VdlPZg2NAu8t8GkdbPLecg,"['restaurants', 'gluten-free', 'indian', 'seaf...",Scottsdale,True,Tandoori Times Indian Bistro,85258.0,263,3.5,False
3,2810 North 75th Ave,QkG3KUXwqZBW18A9k1xqCA,"['american_(traditional)', 'restaurants', 'sea...",Phoenix,True,Red Lobster,85035.0,37,2.5,False
4,1455 W Elliot Rd,nigYwB_m1TQ1WosjSWi-Hw,"['burgers', 'restaurants']",Gilbert,False,Simply Burgers,85233.0,5,3.0,False


## Split into `Fast Food` and `Non Fast Food` DataFrames

In [7]:
time_marker(text='Loading Non Fast Food Data...')
nff_restaurants = restaurants[restaurants.is_fast_food == 'False'].copy()
nff_restaurants.reset_index(inplace=True, drop=True)

time_marker(text='Loading Fast Food Data...')
ff_restaurants = restaurants[restaurants.is_fast_food == 'True'].copy()
ff_restaurants.reset_index(inplace=True, drop=True)

time_marker(text='Complete!')

[12:34:31.302678] Loading Non Fast Food Data...
[12:34:31.312078] Loading Fast Food Data...
[12:34:31.315222] Complete!


In [8]:
restaurants.head(3).transpose()

Unnamed: 0,0,1,2
address,"777 E Thunderbird Rd, Ste 107",10720 E Southern Ave,"8140 N Hayden Rd, Ste H115"
business_id,1WBkAuQg81kokZIPMpn9Zg,kKx8iCJkomVQBdWHnmmOiA,VdlPZg2NAu8t8GkdbPLecg
categories,"['burgers', 'restaurants']","['restaurants', 'pizza']","['restaurants', 'gluten-free', 'indian', 'seaf..."
city,Phoenix,Mesa,Scottsdale
is_open,True,True,True
name,Charr An American Burger Bar,Little Caesars Pizza,Tandoori Times Indian Bistro
postal_code,85022,85209,85258
review_count,232,4,263
stars,3,2.5,3.5
is_fast_food,False,False,False


# Exploratory Data Analysis

In [9]:
def plot_ratings(df, title, cp):
    ax = plt.figure(figsize=FIG_SIZE)
    ax = sns.countplot(x="stars", data=df, palette=cp.title())

    ax.set_xlabel('Star Rating', size=LABEL_FONT_SIZE)
    ax.set_ylabel('Number of Restaurants', size=LABEL_FONT_SIZE)

    title = title
    ax.set_title(title, size=TITLE_FONT_SIZE)

    if DO_WRITE_CHARTS:
        plt.savefig('../charts/{}_bar.png'.format(title.lower().replace(' ', '_')))
    else:
        plt.show()
    plt.close()

In [10]:
restaurants.head()

Unnamed: 0,address,business_id,categories,city,is_open,name,postal_code,review_count,stars,is_fast_food
0,"777 E Thunderbird Rd, Ste 107",1WBkAuQg81kokZIPMpn9Zg,"['burgers', 'restaurants']",Phoenix,True,Charr An American Burger Bar,85022.0,232,3.0,False
1,10720 E Southern Ave,kKx8iCJkomVQBdWHnmmOiA,"['restaurants', 'pizza']",Mesa,True,Little Caesars Pizza,85209.0,4,2.5,False
2,"8140 N Hayden Rd, Ste H115",VdlPZg2NAu8t8GkdbPLecg,"['restaurants', 'gluten-free', 'indian', 'seaf...",Scottsdale,True,Tandoori Times Indian Bistro,85258.0,263,3.5,False
3,2810 North 75th Ave,QkG3KUXwqZBW18A9k1xqCA,"['american_(traditional)', 'restaurants', 'sea...",Phoenix,True,Red Lobster,85035.0,37,2.5,False
4,1455 W Elliot Rd,nigYwB_m1TQ1WosjSWi-Hw,"['burgers', 'restaurants']",Gilbert,False,Simply Burgers,85233.0,5,3.0,False


In [11]:
title = 'Arizona Restaurant Star Distribution'
plot_ratings(restaurants, title, 'Reds')

In [12]:
ff_restaurants.head()

Unnamed: 0,address,business_id,categories,city,is_open,name,postal_code,review_count,stars,is_fast_food
0,719 E Thunderbird Rd,rDMptJYWtnMhpQu_rRXHng,"['fast_food', 'burgers', 'restaurants']",Phoenix,True,McDonald's,85022.0,10,1.0,True
1,1635 E Camelback Rd,iPa__LOhse-hobC2Xmp-Kw,"['restaurants', 'burgers', 'fast_food']",Phoenix,True,McDonald's,85016.0,34,3.0,True
2,3441 W Northern Ave,2v-8QQfMLX2PCz-0S6gISQ,"['pizza', 'restaurants', 'fast_food']",Phoenix,True,Papa John's Pizza,85051.0,7,2.5,True
3,1483 N Dysart Rd,mfQlXkeNhW1vz_1iq63mNQ,"['restaurants', 'fast_food', 'mexican']",Avondale,True,Del Taco,85323.0,23,2.5,True
4,17032 N 99th Ave,6CpAWkTcqqNtGAfOsPeFUw,"['american_(traditional)', 'restaurants', 'san...",Sun City,True,Arby's,85373.0,9,3.5,True


In [13]:
title = 'Arizona Fast Food Restaurant Star Distribution'
plot_ratings(ff_restaurants, title, 'Greens')

In [14]:
nff_restaurants.head()

Unnamed: 0,address,business_id,categories,city,is_open,name,postal_code,review_count,stars,is_fast_food
0,"777 E Thunderbird Rd, Ste 107",1WBkAuQg81kokZIPMpn9Zg,"['burgers', 'restaurants']",Phoenix,True,Charr An American Burger Bar,85022.0,232,3.0,False
1,10720 E Southern Ave,kKx8iCJkomVQBdWHnmmOiA,"['restaurants', 'pizza']",Mesa,True,Little Caesars Pizza,85209.0,4,2.5,False
2,"8140 N Hayden Rd, Ste H115",VdlPZg2NAu8t8GkdbPLecg,"['restaurants', 'gluten-free', 'indian', 'seaf...",Scottsdale,True,Tandoori Times Indian Bistro,85258.0,263,3.5,False
3,2810 North 75th Ave,QkG3KUXwqZBW18A9k1xqCA,"['american_(traditional)', 'restaurants', 'sea...",Phoenix,True,Red Lobster,85035.0,37,2.5,False
4,1455 W Elliot Rd,nigYwB_m1TQ1WosjSWi-Hw,"['burgers', 'restaurants']",Gilbert,False,Simply Burgers,85233.0,5,3.0,False


In [15]:
title = 'Arizona Non Fast Food Restaurant Star Distribution'
plot_ratings(nff_restaurants, title, 'Blues')

# Inspect Restaurants by Category

In [16]:
# get all unique categories

all_category_list = []
for sublist in [cat for cat in list(restaurants['categories'].values)]:
    sublist_parts = sublist.split("', '")
    
    for part in sublist_parts:
        part = part.strip("[]").strip("'")
        if len(part) > 1:
            all_category_list.append(part)
            
all_category_list = sorted(list(set(all_category_list)))

In [17]:
def plot_category_closures(df, category, category_label):
    
    data = df[df.categories.apply(lambda x: category in x)].copy()
    data.sort_values(['is_open', 'is_fast_food'], inplace=True, ascending=False)
    
    # prune tailing 's' from category label
    if category_label.endswith('s'):
        category_label = category_label[:-1]
    
    if category_label == 'fast_food':
        category_label = 'All Fast Food'
    if category_label == 'restaurant':
        category_label = 'All '
    
    if data.shape[0] > 100:
        
        category_label = category_label.replace('(', '').replace(')', '').replace('_', ' ')

        g = sns.factorplot(x="stars", y="review_count", hue="is_open", col="is_fast_food", data=data, kind="swarm", size=6, aspect=1, palette=['#78C850', '#C03028'])

        g.fig.suptitle('{} Resturants'.format(category_label.title()), size=TITLE_FONT_SIZE)

        if DO_WRITE_CHARTS:
            plt.savefig('../charts/categorical/{}_swarmplot.png'.format(category_label.lower().replace(' ', '_')))
        else:
            plt.show()
        plt.close()

In [18]:
for cat in all_category_list:
    time_marker('Plotting {} Locations...'.format(cat))
    plot_category_closures(restaurants, cat, cat)

[12:34:32.447766] Plotting afghan Locations...
[12:34:32.458577] Plotting african Locations...
[12:34:32.467312] Plotting american_(new) Locations...
[12:34:33.298703] Plotting american_(traditional) Locations...
[12:34:34.473902] Plotting arabian Locations...
[12:34:34.479497] Plotting argentine Locations...
[12:34:34.484521] Plotting armenian Locations...
[12:34:34.489463] Plotting asian_fusion Locations...
[12:34:35.063395] Plotting austrian Locations...
[12:34:35.069374] Plotting barbeque Locations...
[12:34:35.496441] Plotting belgian Locations...
[12:34:35.502311] Plotting brazilian Locations...
[12:34:35.507877] Plotting breakfast_&_brunch Locations...
[12:34:36.329427] Plotting british Locations...
[12:34:36.336830] Plotting buffets Locations...
[12:34:36.838281] Plotting burgers Locations...
[12:34:37.983934] Plotting burmese Locations...
[12:34:37.992604] Plotting cajun/creole Locations...
[12:34:37.998685] Plotting cambodian Locations...
[12:34:38.003473] Plotting cantonese 