<a href="https://colab.research.google.com/github/junyanvv/junyanvv/blob/main/Pre_Fastfood_Nutrition_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
#read the data
data ='/content/drive/MyDrive/Python/FastFoodNutritionMenuV2.csv'
#use your own path
print(data)  #print the first few rows of the dataframe

/content/drive/MyDrive/Python/FastFoodNutritionMenuV2.csv


In [12]:
#data cleaning
#check for missing values
data_df = pd.read_csv('/content/drive/MyDrive/Python/FastFoodNutritionMenuV2.csv')
print(data_df.isnull().sum())

# For example, when calculating average calories, we can drop rows with missing Calories values
# Similarly, when analyzing by Company, we can drop rows with missing Company values

Company                    0
Item                       0
Calories                   1
Calories from\nFat       506
Total Fat\n(g)            57
Saturated Fat\n(g)        57
Trans Fat\n(g)            57
Cholesterol\n(mg)          1
Sodium \n(mg)              1
Carbs\n(g)                57
Fiber\n(g)                57
Sugars\n(g)                1
Protein\n(g)              57
Weight Watchers\nPnts    261
dtype: int64


In [14]:
# List of nutrient columns to convert to numeric
nutrient_cols = ["Calories", "Calories from\nFat", "Total Fat\n(g)", "Saturated Fat\n(g)", "Trans Fat\n(g)", "Cholesterol\n(mg)", "Sodium \n(mg)", "Carbs\n(g)", "Fiber\n(g)", "Sugars\n(g)", "Protein\n(g)", "Weight Watchers\nPnts"]

# Convert the nutrient columns to numeric, coercing errors to NaN
for col in nutrient_cols:
    data_df[col] = pd.to_numeric(data_df[col], errors='coerce')

# Display the data types to confirm the conversion
print(data_df.dtypes)

Company                   object
Item                      object
Calories                 float64
Calories from\nFat       float64
Total Fat\n(g)           float64
Saturated Fat\n(g)       float64
Trans Fat\n(g)           float64
Cholesterol\n(mg)        float64
Sodium \n(mg)            float64
Carbs\n(g)               float64
Fiber\n(g)               float64
Sugars\n(g)              float64
Protein\n(g)             float64
Weight Watchers\nPnts    float64
dtype: object


In [15]:
#calculate the average calories for each restaurant
#get a slice of data that only includes company and calories, and drop rows with missing values in these columns
data_calories = data_df[["Company", "Calories"]].dropna()

#change the Calories column to numeric, coercing errors to NaN
data_calories["Calories"] = pd.to_numeric(data_calories["Calories"], errors='coerce')

print(data_calories)

         Company  Calories
0     McDonald’s     250.0
1     McDonald’s     300.0
2     McDonald’s     440.0
3     McDonald’s     390.0
4     McDonald’s     510.0
...          ...       ...
1143   Pizza Hut     230.0
1144   Pizza Hut     310.0
1145   Pizza Hut     120.0
1146   Pizza Hut     200.0
1147   Pizza Hut     260.0

[1133 rows x 2 columns]


Next we want to group by company in terms of data_calories, then average the calories in each company

In [16]:
#group by company and caculate the mean calories
avg_calories = data_calories.groupby("Company")["Calories"].mean().reset_index()

#display the average calories for each restaurant
print(avg_calories)

       Company    Calories
0  Burger King  359.189944
1          KFC  215.229358
2   McDonald’s  284.618902
3    Pizza Hut  253.378378
4    Taco Bell  292.166667
5      Wendy’s  322.500000


In [17]:
#plot the average calories for each restaurant
fig = px.bar(avg_calories, x = "Company", y = "Calories", title = "Average Calories by Restaurant")

fig.show()

Next we want to visualize all the calories nutrients in each company, we still have to first categorize by company, them sum all the nutrients, calculate its proportion in food

In [18]:
columns = data_df.columns.tolist()
print(columns)

['Company', 'Item', 'Calories', 'Calories from\nFat', 'Total Fat\n(g)', 'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(mg)', 'Sodium \n(mg)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)', 'Protein\n(g)', 'Weight Watchers\nPnts']


In [19]:
# get those nutrition columns
nutrient_cols = ["Total Fat\n(g)", 'Cholesterol\n(mg)', 'Sodium \n(mg)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)', 'Protein\n(g)']

for company in data_df["Company"].unique():
    row = data_df[data_df["Company"] == company]
    # Convert nutrient columns to numeric and sum up all the elements
    values = [pd.to_numeric(row[col], errors='coerce').sum() for col in nutrient_cols]
    #label them
    labels = ["Fat", "Cholesterol", "Sodium", "Carbs", "Fiber", "Sugars", "Protein"]
    #plot the pie chart
    fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.3)])
    fig.update_layout(title=f"{company} distribution of nutrition elements")
    fig.show()

In [20]:
#notice that the sodium intake is very high, which may be a health concern
#get rid of the sodium, and replot the pie charts
for company in data_df["Company"].unique():
    #catch the data for each company
    row = data_df[data_df["Company"] == company]
    # Convert nutrient columns to numeric and sum up all the elements, excluding Sodium
    values = [pd.to_numeric(row[col], errors='coerce').sum() for col in nutrient_cols if col != 'Sodium \n(mg)']
    #label them, excluding Sodium
    labels = ["Fat", "Cholesterol", "Carbs", "Fiber", "Sugars", "Protein"]
    #plot the pie chart
    fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.3)])
    fig.update_layout(title=f"{company} distribution of nutrition elements (excluding Sodium)")
    fig.show()