### CONTENTS

1. [Intro](#intro)

2. [Univariate Analysis](#uva)

3. [Bivariate Analysis](#bva)

#### The usual drill of fetching the data and looking at it before diving into EDA and more

In [None]:
import numpy as np
import pandas as pd
import os
import seaborn as sb
import matplotlib.pyplot as plt
import plotly.express as px


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
top250 = pd.read_csv("../input/restaurant-business-rankings-2020/Top250.csv")
ft50 = pd.read_csv("../input/restaurant-business-rankings-2020/Future50.csv")
i100 = pd.read_csv("../input/restaurant-business-rankings-2020/Independence100.csv")

<a id='intro'> </a>

<h1 style="text-align:center;color:blue;">Top 250</h1>

In [None]:
top250.head()

In [None]:
#!pip install pandas_profiling 
from pandas_profiling import ProfileReport
profile = ProfileReport(top250)
profile

#### In a Sense, lets validate the above report and starting with Univariate Analysis, lets look at the missing values in the top 250 dataset

In [None]:
train = top250.copy()
percent = ((train.isnull().sum()/train.isnull().count()) * 100).sort_values(ascending=True)
percent = percent[percent>0].reset_index()
percent.columns = ['Variable', 'Percent']

fig = px.bar(percent[['Variable', 'Percent']], y="Percent", x="Variable", color='Variable', log_y=False, template='ggplot2')
fig.show()

<h3 style="background-color:DodgerBlue; color:white"> So Variables, Headquarters and Content have a lot of missing values and I guess it should be ok to proceed for now.</h3>

<br/>
<br/>
        
        
<h3> <span class="label label-default">Replacing, % to numeric</span></h3>

In [None]:
train['YOY_Sales'] = train['YOY_Sales'].apply(lambda x:x.replace('%',''))
train['YOY_Units'] = train['YOY_Units'].apply(lambda x:x.replace('%',''))


train['YOY_Sales']= pd.to_numeric(train['YOY_Sales'])
train['YOY_Units']= pd.to_numeric(train['YOY_Units'])

<a id='uva'> </a>


<h1 style="text-align:center;color:green;">Univariate Analysis</h1>

<h3 style="background-color:Green; color:white" >Which Restaurant topped in Sales ? </h3>

In [None]:
k = train.sort_values(by=['Sales'], ascending=False)
k= k.head(15)
fig=px.bar(k,x='Restaurant', y="Sales", animation_group="Restaurant", color="Restaurant", hover_name="Restaurant")
fig.update_layout(title='Restaurant Sales - Top 15')

<h3 style="background-color:Yellow; color:Black" >Which Segment topped in Sales ? </h3>

In [None]:
grouped = train.groupby('Segment_Category').sum().reset_index()
grouped = grouped.sort_values('Sales', ascending=False).reset_index()

grouped.drop('index', axis=1, inplace=True)
grouped = grouped.head(15)
fig = px.pie(grouped, values="Sales", names="Segment_Category",template="seaborn")

fig.update_traces(rotation=90, pull=0.05, textinfo="percent+label")
fig.show()

<h3 style="background-color:Yellow; color:Black" >It was Quick Service & Burger Segment that had the most sales </h3>

<h3 style="background-color:Purple; color:Yellow" >YOY Distribution of Units & Sales</h3>

In [None]:
import plotly.figure_factory as ff

x1 = train['YOY_Sales']
x2 = train['YOY_Units']

hist_data = [x1, x2]
group_labels = ['YOY Sales', 'YOY Units']

fig = ff.create_distplot(hist_data, group_labels, bin_size=.2)
fig.show()

<h3 style="background-color:Purple; color:yellow" >It was not more than 40% </h3>

<h3 style="background-color:Orange; color:Black" >The Restaurant with the most branches</h3>

In [None]:
ms = train.sort_values(by=['Units'],ascending=False)
ms = ms.head(10)
fig = px.funnel(ms, x='Units', y='Restaurant')
fig.show()

<a id='bva'> </a>


<h1 style="text-align:center;color:Blue;">Bivariate Analysis</h1>

<h3 style="background-color:Brown; color:White" >Are Sales Correlated with total Units/Branches?</h3>

In [None]:
train['Mood'] = train['YOY_Sales'].apply(lambda x:'Positive' if x>0 else 'Negative')
#train.head(10)
import plotly.express as px
fig = px.scatter(train, x="Sales", y="Units", color="Mood")
fig.show()

<h3 style="background-color:Brown; color:White" > Yes, it seems to be the case</h3>

<h3 style="background-color:#C4A297; color:White" >Segments & Restaurant with +/- YoY Sales</h3>

In [None]:
plt.figure(figsize=(40,40));
fig = px.sunburst(train, path=['Mood', 'Segment_Category'])
fig.show()

In [None]:
plt.figure(figsize=(40,40));

fig = px.sunburst(train, path=['Mood', 'Restaurant'])

fig.show()

<a id='ft'> </a>


<h1 style="text-align:center;color:Brown;">Future 50</h1>

In [None]:
ft50['YOY_Sales'] = ft50['YOY_Sales'].apply(lambda x:x.replace('%',''))
ft50['YOY_Units'] = ft50['YOY_Units'].apply(lambda x:x.replace('%',''))
ft50['YOY_Sales']= pd.to_numeric(ft50['YOY_Sales'])
ft50['YOY_Units']= pd.to_numeric(ft50['YOY_Units'])

#ft50.head(10)

ft50['Mood'] = ft50['YOY_Sales'].apply(lambda x:'Positive' if x>0 else 'Negative')

<h3 style="background-color:#C4A297; color:Brown" >Does Franchise have anything to do with YoY Sales? </h3>

In [None]:
plt.rcParams['figure.figsize'] = (20, 8)
plt.style.use('fivethirtyeight')

import warnings
import seaborn as sns
warnings.filterwarnings('ignore')

plt.subplot(1, 2, 1)
sns.kdeplot(ft50[ft50['Franchising']=='Yes']['YOY_Sales'], color = 'green')
plt.title('With Franchise')

               
plt.subplot(1, 2, 2)
sns.kdeplot(ft50[ft50['Franchising']=='No']['YOY_Sales'], color = 'skyblue')
plt.title('No Franchise')

plt.suptitle('YoY Sales comparison with & without Franchise', fontsize = 20)
plt.show()

<h3 style="background-color:#C4A297; color:Brown" >Overall, its Similar, but there indeed is a dip in sales when there's no franchise. </h3>

In [None]:
!pip install pywaffle
from pywaffle import Waffle
df = ft50.iloc[:200,:].groupby('Franchising').size().reset_index(name='counts')
n_categories = df.shape[0]
colors = [plt.cm.inferno_r(i/float(n_categories)) for i in range(n_categories)]

fig = plt.figure(
    FigureClass=Waffle,
    values = df['counts'],
    labels = ['No','Yes'],
    legend = {'loc': 'upper left'},
    title = {'label': 'Franchising in Future 50 Restaurants', 'fontsize': 15},    
    rows=5,
    colors=['pink','green'],
    figsize = (15, 15)
)

<h3 style="text-align:center;font-size:200%;color:Purple">Progress on the Notebook</h3>
<div class="progress">
  <div class="progress-bar" role="progressbar" style="width: 40%;" aria-valuenow="40" aria-valuemin="0" aria-valuemax="100">40%</div>
</div>