In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [None]:
filename = "../input/fast-food-chains-nutrition-values/nutrition_values.csv"
food = pd.read_csv(filename, sep= ";")
food.head()

This a dataset on nutrion information for different types of food in Burger King and MCdonalds.

In [None]:
new_data = food.drop(['Serving Size (g)','Calories from fat','Saturated Fat (g)','Trans Fat (g)'], axis=1)
new_data.head()

We would like to exclude 'Serving Size (g)','Saturated Fat (g)','Trans Fat (g)'.

In [None]:
new_data = new_data.drop(new_data.index[175:499])
new_data.head()

Also, comparison will only be done for Burger King menus.

In [None]:
new_data.isna().any()

Check if any NA or null values exist. There's none.

In [None]:
new_data.info()

Here it shows that the Dtype for 'Total Fat (g)' is object. 
This would be problematic for data plotting using groupby function.

'DataError: No numeric types to aggregate' would show if Dtype is not changed.

In [None]:
print(new_data["Total Fat (g)"].unique())

It turns out there are couple typos such as '4,5', '2.5'.
We will assume that the commas in between the numbers are decimal points.
Let's fix this problem.

In [None]:
new_data1 = new_data.copy()

In [None]:
new_data1.head()

In [None]:
new_data1[["Total Fat (g)"]].astype(str) 

In [None]:
new_data1["Total Fat (g)"] = new_data1["Total Fat (g)"].str.replace(',','.')

In [None]:
print(new_data1["Total Fat (g)"].unique());

The errors are fixed.

In [None]:
new_data1[["Total Fat (g)"]] = new_data1[["Total Fat (g)"]].astype(float)

In [None]:
new_data1.info()

The Dtype for 'Total Fat (g)' has changed to float64 instead of object.

**Unique values in Type Column**

In [None]:
new_data1.Type.unique()

We will be comparing nutritions for each 'Type'.

****

Below we display the number of items for each 'Type' using countplot. We observe that'Soft Drinks' has most number of items.

In [None]:
plt.style.use('default')
plt.figure(figsize=(7,4),edgecolor='0.1',dpi=100)
a=sns.countplot(x='Type',facecolor='darkgreen',data=new_data1)


xticks=plt.xticks(rotation=55,family='serif')
yticks=plt.yticks(family='serif')

plt.xlabel(new_data1['Type'].all(),font='serif')
plt.ylabel('Number of Items',font='serif')

a.spines['bottom'].set_color('gray')
a.spines['left'].set_color('gray')
sns.despine()

**Analyze the Number of Items per Type**
The heatmap function is another cool way to show the distribution of number of items for each type.

Here, if you place the cursor on the bar it shows the name of item.

Below we observe that 'Soft Drinks' has the most number of unique items followed by Breakfast in the menu.

In [None]:
px.density_heatmap(x='Type',y='Item',data_frame=new_data1,width=900)

**Analysis of Nutrients**

**Average Calories distribution for each Type**

Below we observe hightest amount of calories for 'Whopper Sandwitches' followed by 'Flame Broiled Burgers'.


In [None]:
calories=pd.DataFrame(new_data1.groupby('Type')['Calories'].mean())
        
colors=['gray']*17
colors[16]='#eb7a34'
fig = go.Figure(data=[go.Bar(
    x=calories.index,
    y=calories['Calories'],
    marker_color=colors
)])
fig.update_layout(width=700,height=500)
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Avg Calories')
fig.show()

**Average Total Fat**

Below we observe hightest amount of Fat for 'Whopper Sandwitches' followed by 'Flame Broiled Burgers'.


In [None]:
fat=pd.DataFrame(new_data1.groupby('Type')['Total Fat (g)'].mean())
        
colors=['gray']*17
colors[16]='#C4451C'
fig = go.Figure(data=[go.Bar(
    x=fat.index,
    y=fat['Total Fat (g)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Avg Total Fat')
fig.update_layout(width=700,height=500)
fig.show()

**Average Total Cholesterol**

Below we observe hightest amount of Cholestrol for 'Breakfast' followed by 'Whopper Sandwitches'.

In [None]:
chol=pd.DataFrame(new_data1.groupby('Type')['Chol (mg)'].mean())
        
colors=['gray']*17
colors[1]='#85660D'
fig = go.Figure(data=[go.Bar(
    x=chol.index,
    y=chol['Chol (mg)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Avg Cholesterol')
fig.update_layout(width=700,height=500)
fig.show()

**Average Total Sodium**

Below we observe hightest amount of Sodium for 'Breakfast' followed by 'Whopper Sandwitches'.

In [None]:
sod=pd.DataFrame(new_data1.groupby('Type')['Sodium (mg)'].mean())
        
colors=['gray']*17
colors[1]='#34eb34'
fig = go.Figure(data=[go.Bar(
    x=sod.index,
    y=sod['Sodium (mg)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Avg Sodium')
fig.update_layout(width=700,height=500)
fig.show()

**Average Total Carbohydrates**

Below we observe hightest amount of Carbohydrates for 'Shakes/Smoothies' followed by 'Frappes'.

In [None]:
carbs=pd.DataFrame(new_data1.groupby('Type')['Total Carb (g)'].mean())
        
colors=['gray']*17
colors[14]='#B82E2E'
fig = go.Figure(data=[go.Bar(
    x=carbs.index,
    y=carbs['Total Carb (g)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Avg Carbohydrates')
fig.update_layout(width=700,height=500)
fig.show()

**Average Total Dietary Fiber**

Below we observe hightest amount of Dietary Fiber for 'Salads & Sides' followed by 'Chicken & More'.

In [None]:
fib=pd.DataFrame(new_data1.groupby('Type')['Dietary Fiber (g)'].mean())
        
colors=['gray']*17
colors[12]='#eb34cc'
fig = go.Figure(data=[go.Bar(
    x=fib.index,
    y=fib['Dietary Fiber (g)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Dietary Fiber (g)')
fig.update_layout(width=700,height=500)
fig.show()

**Average Total Sugar**

Below we observe hightest amount of Sugar for 'Shakes/Smoothies' followed by 'Soft Drinks'.

In [None]:
sug=pd.DataFrame(new_data1.groupby('Type')['Total Sugar (g)'].mean())
        
colors=['gray']*17
colors[14]='#ebe534'
fig = go.Figure(data=[go.Bar(
    x=sug.index,
    y=sug['Total Sugar (g)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Total Sugar (g)')
fig.update_layout(width=700,height=500)
fig.show()

**Average Protein**

Below we observe hightest amount of Sugar for 'Whopper Sandwiches' followed by 'Flame Broiled Burgers'.

In [None]:
pro=pd.DataFrame(new_data1.groupby('Type')['Protein (g)'].mean())
        
colors=['gray']*17
colors[16]='#1C8356'
fig = go.Figure(data=[go.Bar(
    x=pro.index,
    y=pro['Protein (g)'],
    marker_color=colors
)])
fig.update_xaxes(title='Type')
fig.update_yaxes(title='Protein (g)')
fig.update_layout(width=700,height=500)
fig.show()

**Below we analyze how are Carbohydrates and Sugars related**

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(8,5),dpi=80)
sns.scatterplot(x='Total Carb (g)',y='Total Sugar (g)',data=new_data1,alpha=0.8,s=60)
plt.title('Carbohydrates vs Sugars',x=0.5,y=1.05)

We can see that there is a strong, positive, linear relationship between Carbohydrates and Sugars.

**For each Category below we analyze the items with highest amount of Nutrients in the Burger King's menu**

**Whopper Sandwiches**

In [None]:
ws=new_data1[new_data1['Type']=='Whopper Sandwiches']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=ws.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Flame Broiled Burgers**

In [None]:
fbb=new_data1[new_data1['Type']=='Flame Broiled Burgers']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=fbb.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Chicken & More**

In [None]:
cm=new_data1[new_data1['Type']=='Chicken & More']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=cm.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Salads & Sides**

In [None]:
ss=new_data1[new_data1['Type']=='Salads & Sides']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=ss.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**King Jr Meals - Entrees**

In [None]:
kjme=new_data1[new_data1['Type']=='King Jr Meals - Entrees']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=kjme.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**King Jr Meals - Sides**

In [None]:
kjms=new_data1[new_data1['Type']=='King Jr Meals - Sides']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=kjms.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**King Jr Meals - Beverages**

In [None]:
kjmb=new_data1[new_data1['Type']=='King Jr Meals - Beverages']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=kjmb.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**King Jr Meals - Desserts**

In [None]:
kjmd=new_data1[new_data1['Type']=='King Jr Meals - Desserts']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=kjmd.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Breakfast**

In [None]:
breakfast=new_data1[new_data1['Type']=='Breakfast']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=breakfast.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Additional Options**

In [None]:
ao=new_data1[new_data1['Type']=='Additional Options']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=ao.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Shakes/Smoothies**

In [None]:
ss=new_data1[new_data1['Type']=='Shakes/Smoothies']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=ss.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Soft Drinks**

In [None]:
softd=new_data1[new_data1['Type']=='Soft Drinks']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=softd.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Hot Coffees**

There's no nutrition information for Hot Coffes

In [None]:
hc=new_data1[new_data1['Type']=='Hot Coffees']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=hc.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Iced Coffess**

In [None]:
ic=new_data1[new_data1['Type']=='Iced Coffees']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=ic.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Frappes**

In [None]:
fra=new_data1[new_data1['Type']=='Frappes']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=fra.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

**Sandwiches**

In [None]:
sandw=new_data1[new_data1['Type']=='Sandwiches']
cols=['Calories','Total Fat (g)','Chol (mg)','Sodium (mg)','Total Carb (g)','Dietary Fiber (g)','Total Sugar (g)','Protein (g)']
for col in cols:
    print(col)
    a=sandw.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')