## Dataset and prerequisite

In [102]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

In [103]:
top_rest = pd.read_csv("Top250.csv")
top_rest.head()

Unnamed: 0,Rank,Restaurant,Content,Sales,YOY_Sales,Units,YOY_Units,Headquarters,Segment_Category
0,1,McDonald's,,40412,4.9%,13846,-0.5%,,Quick Service & Burger
1,2,Starbucks,,21380,8.6%,15049,3.0%,,Quick Service & Coffee Cafe
2,3,Chick-fil-A,While Popeyes got a lot of the chicken buzz in...,11320,13.0%,2470,5.0%,,Quick Service & Chicken
3,4,Taco Bell,,11293,9.0%,6766,2.7%,,Quick Service & Mexican
4,5,Burger King,,10204,2.7%,7346,0.2%,,Quick Service & Burger


In [104]:
inde_rest = pd.read_csv("Independence100.csv")
inde_rest.head()

Unnamed: 0,Rank,Restaurant,Sales,Average Check,City,State,Meals Served
0,1,Carmine's (Times Square),39080335.0,40,New York,N.Y.,469803.0
1,2,The Boathouse Orlando,35218364.0,43,Orlando,Fla.,820819.0
2,3,Old Ebbitt Grill,29104017.0,33,Washington,D.C.,892830.0
3,4,LAVO Italian Restaurant & Nightclub,26916180.0,90,New York,N.Y.,198500.0
4,5,Bryant Park Grill & Cafe,26900000.0,62,New York,N.Y.,403000.0


In [105]:
fut_rest = pd.read_csv("Future50.csv")
fut_rest.head()

Unnamed: 0,Rank,Restaurant,Location,Sales,YOY_Sales,Units,YOY_Units,Unit_Volume,Franchising
0,1,Evergreens,"Seattle, Wash.",24,130.5%,26,116.7%,1150,No
1,2,Clean Juice,"Charlotte, N.C.",44,121.9%,105,94.4%,560,Yes
2,3,Slapfish,"Huntington Beach, Calif.",21,81.0%,21,90.9%,1370,Yes
3,4,Clean Eatz,"Wilmington, N.C.",25,79.7%,46,58.6%,685,Yes
4,5,Pokeworks,"Irvine, Calif.",49,77.1%,50,56.3%,1210,Yes


## Getting to know the data

### 1. imputation

In [106]:
# check the null value

top_rest.isnull().sum()

Rank                  0
Restaurant            0
Content             217
Sales                 0
YOY_Sales             0
Units                 0
YOY_Units             0
Headquarters        198
Segment_Category      0
dtype: int64

In [107]:
# drop the 'Content', 'Headquarters' columns

top_rest.drop(['Content','Headquarters'], axis = 1, inplace = True)

### 2. Categorical Encoding

In [108]:
# uniquie value in 'Segment_Category' column

top_rest.Segment_Category.unique()

array(['Quick Service & Burger', 'Quick Service & Coffee Cafe',
       'Quick Service & Chicken', 'Quick Service & Mexican',
       'Quick Service & Sandwich', 'Quick Service & Pizza',
       'Fast Casual & Bakery Cafe', 'Fast Casual & Mexican',
       'Casual Dining & Italian/Pizza', 'Casual Dining & Varied Menu',
       'Fast Casual & Asian/Noodle', 'Quick Service & Frozen Desserts',
       'Casual Dining & Sports Bar', 'Family Dining & Family Style',
       'Casual Dining & Steak', 'Casual Dining & Seafood',
       'Fast Casual & Sandwich', 'Fast Casual & Chicken',
       'Quick Service & Family Casual', 'Fast Casual & Burger',
       'Casual Dining & Asian', 'Quick Service & Snack', 'Steak',
       'Quick Service & Beverage', 'Sports Bar',
       'Quick Service & Seafood', 'Quick Service & Bakery Cafe',
       'Fast Casual & Pizza', 'Fine Dining & Steak', 'Mexican',
       'Varied Menu', 'Chicken', 'Italian/Pizza', 'Pizza', 'Seafood',
       'Frozen Desserts', 'Coffee Cafe', 'BBQ',

In [139]:
top_rest['menu_category'] = top_rest['Segment_Category'].apply(lambda x:x.replace('Quick Service & Burger','Burger')
                .replace('Italian/Pizza','Pizza')
               .replace('Quick Service & Pizza','Pizza')
               .replace('Bakery Cafe ','Cafe')
               .replace('Coffee Cafe','Cafe')
               .replace('Quick Service & Chicken','Chicken')
               .replace('Casual Dining & Pizza','Pizza')
               .replace('Quick Service & Cafe','Cafe')
               .replace('Fast Casual & Pizza','Pizza')
               .replace('Fast Casual & Bakery Cafe','Cafe')
               .replace('Quick Service & Frozen Desserts','Dessert')
               .replace('Quick Service & Family Casual','Family')
               .replace('Casual Dining & Asian','Asian')
               .replace('Asian/Noodle','Asian')
               .replace('Casual Dining & Seafood','Seafood')
               .replace('Bakery Cafe','Cafe')
               .replace('Frozen Desserts','Dessert')
               .replace('Family Dining & Family Style','Family')
               .replace('Fast Casual & Sandwich','Sandwich')
               .replace('Family Casual','Family')
               .replace('Fast Casual & Chicken','Chicken')
               .replace('Fast Casual & Burger','Burger')
               .replace('Casual Dining & Steak','Steak')
               .replace('Casual Dining & Sports Bar','Sports Bar')
               .replace('Quick Service & Mexican','Mexican')
               .replace('Quick Service & Mexican','Mexican')
               .replace('Quick Service & Seafood','Seafood')
               .replace('Quick Service & Sandwich','Sandwich')
               .replace('Seafodd','Seafood')
               .replace('Fine Dining & Steak','Steak')
               .replace('Quick Service & Bakery Cafe','Cafe')
               .replace('Fast Casual & Asian/Noodle','Asia')
               .replace('Quick Service & Snack','Snack')
               .replace('Fast Casual & Mexican','Mexican')
               .replace('Quick Service & Beverage','Drinks')
               .replace('Asian','Asia')
               .replace('Casual Dining & Varied Menu','Varied Menu')
               .replace('Family Style','Family'))

In [140]:
top_rest.head()

Unnamed: 0,Rank,Restaurant,Sales,YOY_Sales,Units,YOY_Units,Segment_Category,menu_category
0,1,McDonald's,40412,4.9%,13846,-0.5%,Quick Service & Burger,Burger
1,2,Starbucks,21380,8.6%,15049,3.0%,Quick Service & Coffee Cafe,Cafe
2,3,Chick-fil-A,11320,13.0%,2470,5.0%,Quick Service & Chicken,Chicken
3,4,Taco Bell,11293,9.0%,6766,2.7%,Quick Service & Mexican,Mexican
4,5,Burger King,10204,2.7%,7346,0.2%,Quick Service & Burger,Burger
