# Restaurant Data EDA
<p>Now that we have wrangled business and review data, we can move forward to inspecting the data so far.</p>

## Step 0: Import packages and establish chart output standards

In [None]:
import numpy as np
import pandas as pd
import datetime
import json
import itertools
from collections import Counter

import seaborn as sns
sns.set()

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
TITLE_FONT_SIZE = 25
LABEL_FONT_SIZE = 15
TICK_FONT_SIZE  = 15

FIG_SIZE = (15,6)

DO_WRITE_CHARTS = False

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

def time_marker(text=''):
    print('[{}] {}'.format(datetime.datetime.now().time(), text.lower()))

# Step 1: Load Cleaned Restaurant Data
<p>Load cleaned Arizona Restaurant Data from csv file and extract `business_id` values.  Load reviews and keep only those given to these Arizona Restaurants</p>

In [None]:
time_marker(text='Loading Non Fast Food Data...')
restaurants = pd.read_csv('../clean_data/az_restaurant_business_clean.csv', index_col=0)
restaurants.reset_index(inplace=True, drop=True)
time_marker(text='Complete!')

In [None]:
# encode open and 
restaurants['is_open'] = restaurants['is_open'].apply(lambda x: 'True' if x == 1 else 'False')
restaurants['is_fast_food'] = restaurants['is_fast_food'].apply(lambda x: 'True' if x == 1 else 'False')

In [None]:
restaurants.head()

## Split into `Fast Food` and `Non Fast Food` DataFrames

In [None]:
time_marker(text='Loading Non Fast Food Data...')
nff_restaurants = restaurants[restaurants.is_fast_food == 'False'].copy()
nff_restaurants.reset_index(inplace=True, drop=True)

time_marker(text='Loading Fast Food Data...')
ff_restaurants = restaurants[restaurants.is_fast_food == 'True'].copy()
ff_restaurants.reset_index(inplace=True, drop=True)

time_marker(text='Complete!')

In [None]:
restaurants.head(3).transpose()

# Exploratory Data Analysis

In [None]:
def plot_ratings(df, title, cp):
    ax = plt.figure(figsize=FIG_SIZE)
    ax = sns.countplot(x="stars", data=df, palette=cp.title())

    ax.set_xlabel('Star Rating', size=LABEL_FONT_SIZE)
    ax.set_ylabel('Number of Restaurants', size=LABEL_FONT_SIZE)

    title = title
    ax.set_title(title, size=TITLE_FONT_SIZE)

    if DO_WRITE_CHARTS:
        plt.savefig('../charts/{}_bar.png'.format(title.lower().replace(' ', '_')))
    plt.show()
    plt.close()

In [None]:
restaurants.head()

In [None]:
title = 'Arizona Restaurant Star Distribution'
plot_ratings(restaurants, title, 'Reds')

In [None]:
ff_restaurants.head()

In [None]:
title = 'Arizona Fast Food Restaurant Star Distribution'
plot_ratings(ff_restaurants, title, 'Greens')

In [None]:
nff_restaurants.head()

In [None]:
title = 'Arizona Non Fast Food Restaurant Star Distribution'
plot_ratings(nff_restaurants, title, 'Blues')

# Inspect Restaurants by Category

In [None]:
# get all unique categories

all_category_list = []
for sublist in [cat for cat in list(restaurants['categories'].values)]:
    sublist_parts = sublist.split("', '")
    
    for part in sublist_parts:
        part = part.strip("[]").strip("'")
        if len(part) > 1:
            all_category_list.append(part)
            
all_category_list = sorted(list(set(all_category_list)))

In [None]:
def plot_category_closures(df, category, category_label):
    
    data = df[df.categories.apply(lambda x: category in x)].copy()
    data.sort_values(['is_open', 'is_fast_food'], inplace=True, ascending=False)
    
    # prune tailing 's' from category label
    if category_label.endswith('s'):
        category_label = category_label[:-1]
    
    if category_label == 'fast_food':
        category_label = 'All Fast Food'
    if category_label == 'restaurant':
        category_label = 'All '
    
    if data.shape[0] > 100:
        
        category_label = category_label.replace('(', '').replace(')', '').replace('_', ' ')

        g = sns.factorplot(x="stars", y="review_count", hue="is_open", col="is_fast_food", data=data, kind="swarm", size=6, aspect=1, palette=['#78C850', '#C03028'])

        g.fig.suptitle('{} Resturants'.format(category_label.title()), size=TITLE_FONT_SIZE)

        plt.show()
        plt.close()

In [None]:
for cat in all_category_list:
    time_marker('Plotting {} Locations...'.format(cat))
    plot_category_closures(restaurants, cat, cat)