# MCDONALD'S MENU ANALYSIS

McDonald’s food items are often controversial because of their high fat and sodium content. Using this dataset from Kaggle, I will perform a nutrition analysis of every menu item, including salads, beverages, and desserts. 

How many calories does the average McDonald's value meal contain? How much do beverages, like soda or coffee, contribute to the overall caloric intake? Does ordered grilled chicken instead of crispy increase a sandwich's nutritional value? What about ordering egg whites instead of whole eggs? What is the least number of items could you order from the menu to meet one day's nutritional requirements?

The menu items and nutrition facts were scraped from the McDonald's website.

In [46]:
# Importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import re

In [47]:
# Reading McDonald's Menu
menu_df = pd.read_csv('menu.csv')
display(menu_df)

Unnamed: 0,Category,Item,Serving Size,Calories,Calories from Fat,Total Fat,Total Fat (% Daily Value),Saturated Fat,Saturated Fat (% Daily Value),Trans Fat,...,Carbohydrates,Carbohydrates (% Daily Value),Dietary Fiber,Dietary Fiber (% Daily Value),Sugars,Protein,Vitamin A (% Daily Value),Vitamin C (% Daily Value),Calcium (% Daily Value),Iron (% Daily Value)
0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13.0,20,5.0,25,0.0,...,31,10,4,17,3,17,10,0,25,15
1,Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8.0,12,3.0,15,0.0,...,30,10,4,17,3,18,6,0,25,8
2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23.0,35,8.0,42,0.0,...,29,10,4,17,2,14,8,0,25,10
3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28.0,43,10.0,52,0.0,...,30,10,4,17,2,21,15,0,30,15
4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23.0,35,8.0,42,0.0,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17.0,26,9.0,44,0.5,...,80,27,1,4,64,12,15,0,40,8
256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23.0,35,12.0,58,1.0,...,106,35,1,5,85,15,20,0,50,10
257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11.0,17,6.0,29,0.0,...,53,18,1,2,43,8,10,0,25,6
258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32.0,50,15.0,76,1.0,...,114,38,2,9,103,21,20,0,60,6


In [48]:
menu_df.dtypes

Category                          object
Item                              object
Serving Size                      object
Calories                           int64
Calories from Fat                  int64
Total Fat                        float64
Total Fat (% Daily Value)          int64
Saturated Fat                    float64
Saturated Fat (% Daily Value)      int64
Trans Fat                        float64
Cholesterol                        int64
Cholesterol (% Daily Value)        int64
Sodium                             int64
Sodium (% Daily Value)             int64
Carbohydrates                      int64
Carbohydrates (% Daily Value)      int64
Dietary Fiber                      int64
Dietary Fiber (% Daily Value)      int64
Sugars                             int64
Protein                            int64
Vitamin A (% Daily Value)          int64
Vitamin C (% Daily Value)          int64
Calcium (% Daily Value)            int64
Iron (% Daily Value)               int64
dtype: object

In [49]:
menu_df['Category'].unique().tolist()

['Breakfast',
 'Beef & Pork',
 'Chicken & Fish',
 'Salads',
 'Snacks & Sides',
 'Desserts',
 'Beverages',
 'Coffee & Tea',
 'Smoothies & Shakes']

In [50]:
null_values = menu_df[menu_df['Serving Size'].isnull()]
null_values_lines = null_values.any(axis=1)
print(null_values_lines)

Series([], dtype: bool)


In [51]:
menu_df = menu_df.rename(columns={'Category': 'category', 
                                            'Item': 'item',
                                            'Serving Size': 'serving_size_grams',
                                            'Calories': 'calories',
                                            'Calories from Fat': 'calories_from_fat',
                                            'Total Fat': 'total_fat',
                                            'Total Fat (% Daily Value)': 'total_fat_daily_value%',
                                            'Saturated Fat': 'saturated_fat',
                                            'Saturated Fat (% Daily Value)': 'saturated_fat_daily_value%',
                                            'Trans Fat': 'trans_fat',
                                            'Cholesterol': 'cholesterol',
                                            'Cholesterol (% Daily Value)': 'cholesterol_daily_value%',
                                            'Sodium': 'sodium',
                                            'Sodium (% Daily Value)': 'sodium_daily_value%',
                                            'Carbohydrates': 'carbohydrates',
                                            'Carbohydrates (% Daily Value)': 'carbohydrates_daily_value%',
                                            'Dietary Fiber': 'dietary_fiber',
                                            'Dietary Fiber (% Daily Value)': 'dietary_fiber_daily_value%',
                                            'Sugars': 'sugars',
                                            'Protein': 'protein',
                                            'Vitamin A (% Daily Value)': 'vitaminA_daily_value%',
                                            'Vitamin C (% Daily Value)': 'vitaminC_daily_value%',
                                            'Calcium (% Daily Value)': 'calcium_daily_value%',
                                            'Iron (% Daily Value)': 'iron_daily_value%'
                                            })
display(menu_df)

Unnamed: 0,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13.0,20,5.0,25,0.0,...,31,10,4,17,3,17,10,0,25,15
1,Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8.0,12,3.0,15,0.0,...,30,10,4,17,3,18,6,0,25,8
2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23.0,35,8.0,42,0.0,...,29,10,4,17,2,14,8,0,25,10
3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28.0,43,10.0,52,0.0,...,30,10,4,17,2,21,15,0,30,15
4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23.0,35,8.0,42,0.0,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17.0,26,9.0,44,0.5,...,80,27,1,4,64,12,15,0,40,8
256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23.0,35,12.0,58,1.0,...,106,35,1,5,85,15,20,0,50,10
257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11.0,17,6.0,29,0.0,...,53,18,1,2,43,8,10,0,25,6
258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32.0,50,15.0,76,1.0,...,114,38,2,9,103,21,20,0,60,6


In [52]:
drink_df = menu_df.loc[(menu_df['category'].isin(['Beverages', 'Smoothies & Shakes', 'Coffee & Tea']))]
drink_df = drink_df.reset_index()
display(drink_df)

Unnamed: 0,index,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,110,Beverages,Coca-Cola Classic (Small),16 fl oz cup,140,0,0.0,0,0.0,0,...,39,13,0,0,39,0,0,0,0,0
1,111,Beverages,Coca-Cola Classic (Medium),21 fl oz cup,200,0,0.0,0,0.0,0,...,55,18,0,0,55,0,0,0,0,0
2,112,Beverages,Coca-Cola Classic (Large),30 fl oz cup,280,0,0.0,0,0.0,0,...,76,25,0,0,76,0,0,0,0,0
3,113,Beverages,Coca-Cola Classic (Child),12 fl oz cup,100,0,0.0,0,0.0,0,...,28,9,0,0,28,0,0,0,0,0
4,114,Beverages,Diet Coke (Small),16 fl oz cup,0,0,0.0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17.0,26,9.0,44,...,80,27,1,4,64,12,15,0,40,8
146,256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23.0,35,12.0,58,...,106,35,1,5,85,15,20,0,50,10
147,257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11.0,17,6.0,29,...,53,18,1,2,43,8,10,0,25,6
148,258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32.0,50,15.0,76,...,114,38,2,9,103,21,20,0,60,6


In [53]:
food_df = menu_df.loc[(~menu_df['category'].isin(drink_df['category']))]
food_df = food_df.reset_index()
display(food_df)

Unnamed: 0,index,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13.0,20,5.0,25,...,31,10,4,17,3,17,10,0,25,15
1,1,Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8.0,12,3.0,15,...,30,10,4,17,3,18,6,0,25,8
2,2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23.0,35,8.0,42,...,29,10,4,17,2,14,8,0,25,10
3,3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28.0,43,10.0,52,...,30,10,4,17,2,21,15,0,30,15
4,4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23.0,35,8.0,42,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,105,Desserts,Oatmeal Raisin Cookie,1 cookie (33 g),150,50,6.0,9,2.5,13,...,22,7,1,3,13,2,2,0,2,6
106,106,Desserts,Kids Ice Cream Cone,1 oz (29 g),45,10,1.5,2,1.0,4,...,7,2,0,0,6,1,2,0,4,0
107,107,Desserts,Hot Fudge Sundae,6.3 oz (179 g),330,80,9.0,14,7.0,34,...,53,18,1,3,48,8,8,0,25,8
108,108,Desserts,Hot Caramel Sundae,6.4 oz (182 g),340,70,8.0,12,5.0,24,...,60,20,0,0,43,7,10,0,25,0


In [54]:
food_df['serving_size_grams'].unique().tolist()

['4.8 oz (136 g)',
 '4.8 oz (135 g)',
 '3.9 oz (111 g)',
 '5.7 oz (161 g)',
 '6.5 oz (185 g)',
 '5.3 oz (150 g)',
 '5.8 oz (164 g)',
 '5.4 oz (153 g)',
 '5.9 oz (167 g)',
 '4.1 oz (117 g)',
 '4.6 oz (131 g)',
 '5.7 oz (163 g)',
 '6.2 oz (177 g)',
 '6.4 oz (181 g)',
 '5 oz (143 g)',
 '5.5 oz (157 g)',
 '7.1 oz (201 g)',
 '6.1 oz (174 g)',
 '6.3 oz (178 g)',
 '5 oz (141 g)',
 '7.2 oz (205 g)',
 '6.9 oz (197 g)',
 '8.5 oz (241 g)',
 '9.5 oz (269 g)',
 '10 oz (283 g)',
 '9.6 oz (272 g)',
 '10.1 oz (286 g)',
 '14.8 oz (420 g)',
 '15.3 oz (434 g)',
 '14.9 oz (423 g)',
 '15.4 oz (437 g)',
 '5.3 oz (151 g)',
 '6.8 oz (192 g)',
 '2 oz (56 g)',
 '4 oz (114 g)',
 '9.6 oz (251 g)',
 '7.4 oz (211 g)',
 '7.1 oz (202 g)',
 '8 oz (227 g)',
 '8.3 oz (235 g)',
 '8.6 oz (244 g)',
 '3.5 oz (98 g)',
 '4 oz (113 g)',
 '9.5 oz (270 g)',
 '5.2 oz (147 g)',
 '6.7 oz (190 g)',
 '5.6 oz (159 g)',
 '7.3 oz (208 g)',
 '7.5 oz (213 g)',
 '7 oz (200 g)',
 '8.8 oz (249 g)',
 '8.1 oz (230 g)',
 '7.6 oz (217 g)',
 '10 

In [55]:
food_df['serving_size_grams'] = food_df['serving_size_grams'].str.split('(').str.get(1)
food_df['serving_size_grams'] = food_df['serving_size_grams'].str.replace('g)', '')
display(food_df)

Unnamed: 0,index,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,0,Breakfast,Egg McMuffin,136,300,120,13.0,20,5.0,25,...,31,10,4,17,3,17,10,0,25,15
1,1,Breakfast,Egg White Delight,135,250,70,8.0,12,3.0,15,...,30,10,4,17,3,18,6,0,25,8
2,2,Breakfast,Sausage McMuffin,111,370,200,23.0,35,8.0,42,...,29,10,4,17,2,14,8,0,25,10
3,3,Breakfast,Sausage McMuffin with Egg,161,450,250,28.0,43,10.0,52,...,30,10,4,17,2,21,15,0,30,15
4,4,Breakfast,Sausage McMuffin with Egg Whites,161,400,210,23.0,35,8.0,42,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,105,Desserts,Oatmeal Raisin Cookie,33,150,50,6.0,9,2.5,13,...,22,7,1,3,13,2,2,0,2,6
106,106,Desserts,Kids Ice Cream Cone,29,45,10,1.5,2,1.0,4,...,7,2,0,0,6,1,2,0,4,0
107,107,Desserts,Hot Fudge Sundae,179,330,80,9.0,14,7.0,34,...,53,18,1,3,48,8,8,0,25,8
108,108,Desserts,Hot Caramel Sundae,182,340,70,8.0,12,5.0,24,...,60,20,0,0,43,7,10,0,25,0


In [56]:
null_values = food_df[food_df['serving_size_grams'].isnull()]
null_values_lines = null_values.any(axis=1)
print(null_values_lines)

Series([], dtype: bool)


In [57]:
drink_df = drink_df.rename(columns={'serving_size_grams': 'serving_size_ml'})
drink_df.head()

Unnamed: 0,index,category,item,serving_size_ml,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,110,Beverages,Coca-Cola Classic (Small),16 fl oz cup,140,0,0.0,0,0.0,0,...,39,13,0,0,39,0,0,0,0,0
1,111,Beverages,Coca-Cola Classic (Medium),21 fl oz cup,200,0,0.0,0,0.0,0,...,55,18,0,0,55,0,0,0,0,0
2,112,Beverages,Coca-Cola Classic (Large),30 fl oz cup,280,0,0.0,0,0.0,0,...,76,25,0,0,76,0,0,0,0,0
3,113,Beverages,Coca-Cola Classic (Child),12 fl oz cup,100,0,0.0,0,0.0,0,...,28,9,0,0,28,0,0,0,0,0
4,114,Beverages,Diet Coke (Small),16 fl oz cup,0,0,0.0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [58]:
drink_df['serving_size_ml'].unique().tolist()

['16 fl oz cup',
 '21 fl oz cup',
 '30 fl oz cup',
 '12 fl oz cup',
 '1 carton (236 ml)',
 '6 fl oz (177 ml)',
 '22 fl oz cup',
 '16.9 fl oz',
 '20 fl oz cup',
 '32 fl oz cup',
 '10.9 oz (310 g)',
 '16.2 oz (460 g)',
 '7.3 oz (207 g)',
 '10.1 oz (285 g)',
 '13.4 oz (381 g)',
 '6.7 oz (190 g)',
 '14.2 oz (403 g)',
 '7.1 oz (202 g)']

In [59]:
display(drink_df)

Unnamed: 0,index,category,item,serving_size_ml,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,110,Beverages,Coca-Cola Classic (Small),16 fl oz cup,140,0,0.0,0,0.0,0,...,39,13,0,0,39,0,0,0,0,0
1,111,Beverages,Coca-Cola Classic (Medium),21 fl oz cup,200,0,0.0,0,0.0,0,...,55,18,0,0,55,0,0,0,0,0
2,112,Beverages,Coca-Cola Classic (Large),30 fl oz cup,280,0,0.0,0,0.0,0,...,76,25,0,0,76,0,0,0,0,0
3,113,Beverages,Coca-Cola Classic (Child),12 fl oz cup,100,0,0.0,0,0.0,0,...,28,9,0,0,28,0,0,0,0,0
4,114,Beverages,Diet Coke (Small),16 fl oz cup,0,0,0.0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17.0,26,9.0,44,...,80,27,1,4,64,12,15,0,40,8
146,256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23.0,35,12.0,58,...,106,35,1,5,85,15,20,0,50,10
147,257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11.0,17,6.0,29,...,53,18,1,2,43,8,10,0,25,6
148,258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32.0,50,15.0,76,...,114,38,2,9,103,21,20,0,60,6


In [60]:
drink_df['unit'] = ''
matches = []
for index, value in enumerate(drink_df['serving_size_ml']):
    match = re.findall(r"(\d+\.?\d*)\s*(fl oz|oz|ml)", value)
    matches.append(match)
    
matches_length = {}
for i, x in enumerate(matches):
    length = len(x)
    matches_length[i] = length
    
unique_values =  set(matches_length.values())
print(unique_values)

# Encontrar os valores únicos
for i, x in matches_length.items():
    if x == 2:
        print(i, x)

{1, 2}
22 2


In [61]:
for index, value in enumerate(matches):
    if index != 22:
        drink_df.at[index, 'serving_size_ml'] = value[0][0]
        drink_df.at[index, 'unit'] = value[0][1]
    else:
        drink_df.at[index, 'serving_size_ml'] = value[1][0]
        drink_df.at[index, 'unit'] = value[1][1]

print(drink_df.at[22, 'serving_size_ml'], drink_df.at[22, 'unit'])

177 ml


In [62]:
drink_df.drop(['index'], axis='columns', inplace=True)
food_df.drop(['index'], axis='columns', inplace=True)
display(drink_df, food_df)

Unnamed: 0,category,item,serving_size_ml,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%,unit
0,Beverages,Coca-Cola Classic (Small),16,140,0,0.0,0,0.0,0,0.0,...,13,0,0,39,0,0,0,0,0,fl oz
1,Beverages,Coca-Cola Classic (Medium),21,200,0,0.0,0,0.0,0,0.0,...,18,0,0,55,0,0,0,0,0,fl oz
2,Beverages,Coca-Cola Classic (Large),30,280,0,0.0,0,0.0,0,0.0,...,25,0,0,76,0,0,0,0,0,fl oz
3,Beverages,Coca-Cola Classic (Child),12,100,0,0.0,0,0.0,0,0.0,...,9,0,0,28,0,0,0,0,0,fl oz
4,Beverages,Diet Coke (Small),16,0,0,0.0,0,0.0,0,0.0,...,0,0,0,0,0,0,0,0,0,fl oz
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1,510,150,17.0,26,9.0,44,0.5,...,27,1,4,64,12,15,0,40,8,oz
146,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4,690,200,23.0,35,12.0,58,1.0,...,35,1,5,85,15,20,0,50,10,oz
147,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7,340,100,11.0,17,6.0,29,0.0,...,18,1,2,43,8,10,0,25,6,oz
148,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2,810,290,32.0,50,15.0,76,1.0,...,38,2,9,103,21,20,0,60,6,oz


Unnamed: 0,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,Breakfast,Egg McMuffin,136,300,120,13.0,20,5.0,25,0.0,...,31,10,4,17,3,17,10,0,25,15
1,Breakfast,Egg White Delight,135,250,70,8.0,12,3.0,15,0.0,...,30,10,4,17,3,18,6,0,25,8
2,Breakfast,Sausage McMuffin,111,370,200,23.0,35,8.0,42,0.0,...,29,10,4,17,2,14,8,0,25,10
3,Breakfast,Sausage McMuffin with Egg,161,450,250,28.0,43,10.0,52,0.0,...,30,10,4,17,2,21,15,0,30,15
4,Breakfast,Sausage McMuffin with Egg Whites,161,400,210,23.0,35,8.0,42,0.0,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,Desserts,Oatmeal Raisin Cookie,33,150,50,6.0,9,2.5,13,0.0,...,22,7,1,3,13,2,2,0,2,6
106,Desserts,Kids Ice Cream Cone,29,45,10,1.5,2,1.0,4,0.0,...,7,2,0,0,6,1,2,0,4,0
107,Desserts,Hot Fudge Sundae,179,330,80,9.0,14,7.0,34,0.0,...,53,18,1,3,48,8,8,0,25,8
108,Desserts,Hot Caramel Sundae,182,340,70,8.0,12,5.0,24,0.0,...,60,20,0,0,43,7,10,0,25,0


In [110]:
matches = []
for value in drink_df['serving_size_ml']:
    lines = re.findall(r"(\d+\.?\d*)\s*(fl oz|oz|ml)", value)
    matches.append(lines)
print(len(matches), matches, f"\n{lines}")

# serving_size = []
# unit = []
# for i in matches:
#     for number, un in i:
#         serving_size.append(number)
#         unit.append(un)
#         print(serving_size, unit)
# print(len(serving_size))

150 [[('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('236', 'ml')], [('236', 'ml')], [('6', 'fl oz'), ('177', 'ml')], [('12', 'fl oz')], [('16', 'fl oz')], [('22', 'fl oz')], [('16.9', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('21', 'fl oz')], [('30', 'fl oz')], [('12', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('16', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('20', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('20', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('20', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz')], [('20', 'fl oz')], [('12', 'fl oz')], [('16', 'fl oz

In [59]:
serving_size = []
unit = []
for number, un in matches:
    serving_size.append(number)
    unit.append(un)
    print(serving_size, unit)
print(len(serving_size))

ValueError: not enough values to unpack (expected 2, got 1)

In [29]:
drink_df['serving_size_ml'] = serving_size
drink_df.head()

ValueError: Length of values (151) does not match length of index (150)

In [70]:
drink_df['serving_size_ml'] = drink_df['serving_size_ml'].replace('cup','')
drink_df['serving_size_ml'].unique().tolist() 

['16 fl oz cup',
 '21 fl oz cup',
 '30 fl oz cup',
 '12 fl oz cup',
 '1 carton (236 ml)',
 '6 fl oz (177 ml)',
 '22 fl oz cup',
 '16.9 fl oz',
 '20 fl oz cup',
 '32 fl oz cup',
 '10.9 oz (310 g)',
 '16.2 oz (460 g)',
 '7.3 oz (207 g)',
 '10.1 oz (285 g)',
 '13.4 oz (381 g)',
 '6.7 oz (190 g)',
 '14.2 oz (403 g)',
 '7.1 oz (202 g)']

In [37]:
drink_df[['serving_size_ml', '1', '2', '3', 'un']] = drink_df['serving_size_ml'].str.split(' ', expand=True)
drink_df.head()

Unnamed: 0,category,item,serving_size_ml,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%,1,2,3,un
110,Beverages,Coca-Cola Classic (Small),16,140,0,0.0,0,0.0,0,0.0,...,39,0,0,0,0,0,fl,oz,cup,
111,Beverages,Coca-Cola Classic (Medium),21,200,0,0.0,0,0.0,0,0.0,...,55,0,0,0,0,0,fl,oz,cup,
112,Beverages,Coca-Cola Classic (Large),30,280,0,0.0,0,0.0,0,0.0,...,76,0,0,0,0,0,fl,oz,cup,
113,Beverages,Coca-Cola Classic (Child),12,100,0,0.0,0,0.0,0,0.0,...,28,0,0,0,0,0,fl,oz,cup,
114,Beverages,Diet Coke (Small),16,0,0,0.0,0,0.0,0,0.0,...,0,0,0,0,0,0,fl,oz,cup,


In [41]:
display(drink_df)

Unnamed: 0,category,item,serving_size_ml,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%,1,2,3,un
110,Beverages,Coca-Cola Classic (Small),16,140,0,0.0,0,0.0,0,0.0,...,39,0,0,0,0,0,fl,oz,cup,
111,Beverages,Coca-Cola Classic (Medium),21,200,0,0.0,0,0.0,0,0.0,...,55,0,0,0,0,0,fl,oz,cup,
112,Beverages,Coca-Cola Classic (Large),30,280,0,0.0,0,0.0,0,0.0,...,76,0,0,0,0,0,fl,oz,cup,
113,Beverages,Coca-Cola Classic (Child),12,100,0,0.0,0,0.0,0,0.0,...,28,0,0,0,0,0,fl,oz,cup,
114,Beverages,Diet Coke (Small),16,0,0,0.0,0,0.0,0,0.0,...,0,0,0,0,0,0,fl,oz,cup,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1,510,150,17.0,26,9.0,44,0.5,...,64,12,15,0,40,8,oz,(285,g),
256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4,690,200,23.0,35,12.0,58,1.0,...,85,15,20,0,50,10,oz,(381,g),
257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7,340,100,11.0,17,6.0,29,0.0,...,43,8,10,0,25,6,oz,(190,g),
258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2,810,290,32.0,50,15.0,76,1.0,...,103,21,20,0,60,6,oz,(403,g),


In [45]:
unique_values_dict = {col: drink_df[col].unique().tolist() for col in ['1', '2', '3', 'un']}

In [46]:
print(unique_values_dict)

{'1': ['fl', 'carton', 'oz'], '2': ['oz', '(236', '(310', '(460', '(207', '(285', '(381', '(190', '(403', '(202'], '3': ['cup', 'ml)', '(177', None, 'g)'], 'un': [None, 'ml)']}


In [None]:
for x in drink_df['serving_size_ml']:
    if

In [63]:
serving_size_list = menu_df['Serving Size'].tolist()
print(serving_size_list)

[['4.8', 'oz', '(136', 'g)'], ['4.8', 'oz', '(135', 'g)'], ['3.9', 'oz', '(111', 'g)'], ['5.7', 'oz', '(161', 'g)'], ['5.7', 'oz', '(161', 'g)'], ['6.5', 'oz', '(185', 'g)'], ['5.3', 'oz', '(150', 'g)'], ['5.8', 'oz', '(164', 'g)'], ['5.4', 'oz', '(153', 'g)'], ['5.9', 'oz', '(167', 'g)'], ['4.1', 'oz', '(117', 'g)'], ['4.6', 'oz', '(131', 'g)'], ['5.7', 'oz', '(163', 'g)'], ['6.2', 'oz', '(177', 'g)'], ['5.9', 'oz', '(167', 'g)'], ['6.4', 'oz', '(181', 'g)'], ['5', 'oz', '(143', 'g)'], ['5.5', 'oz', '(157', 'g)'], ['7.1', 'oz', '(201', 'g)'], ['6.1', 'oz', '(174', 'g)'], ['6.3', 'oz', '(178', 'g)'], ['5', 'oz', '(141', 'g)'], ['7.1', 'oz', '(201', 'g)'], ['7.2', 'oz', '(205', 'g)'], ['6.9', 'oz', '(197', 'g)'], ['7.1', 'oz', '(201', 'g)'], ['8.5', 'oz', '(241', 'g)'], ['9.5', 'oz', '(269', 'g)'], ['10', 'oz', '(283', 'g)'], ['9.6', 'oz', '(272', 'g)'], ['10.1', 'oz', '(286', 'g)'], ['14.8', 'oz', '(420', 'g)'], ['15.3', 'oz', '(434', 'g)'], ['14.9', 'oz', '(423', 'g)'], ['15.4', 'oz',

In [49]:
menu_df = menu_df.rename(columns={'Category': 'category', 
                                            'Item': 'item',
                                            'Serving Size': 'serving_size_grams',
                                            'Calories': 'calories',
                                            'Calories from Fat': 'calories_from_fat',
                                            'Total Fat': 'total_fat',
                                            'Total Fat (% Daily Value)': 'total_fat_daily_value%',
                                            'Saturated Fat': 'saturated_fat',
                                            'Saturated Fat (% Daily Value)': 'saturated_fat_daily_value%',
                                            'Trans Fat': 'trans_fat',
                                            'Cholesterol': 'cholesterol',
                                            'Cholesterol (% Daily Value)': 'cholesterol_daily_value%',
                                            'Sodium': 'sodium',
                                            'Sodium (% Daily Value)': 'sodium_daily_value%',
                                            'Carbohydrates': 'carbohydrates',
                                            'Carbohydrates (% Daily Value)': 'carbohydrates_daily_value%',
                                            'Dietary Fiber': 'dietary_fiber',
                                            'Dietary Fiber (% Daily Value)': 'dietary_fiber_daily_value%',
                                            'Sugars': 'sugars',
                                            'Protein': 'protein',
                                            'Vitamin A (% Daily Value)': 'vitaminA_daily_value%',
                                            'Vitamin C (% Daily Value)': 'vitaminC_daily_value%',
                                            'Calcium (% Daily Value)': 'calcium_daily_value%',
                                            'Iron (% Daily Value)': 'iron_daily_value%'
                                            })

In [50]:
display(menu_df)

Unnamed: 0,category,item,serving_size_grams,calories,calories_from_fat,total_fat,total_fat_daily_value%,saturated_fat,saturated_fat_daily_value%,trans_fat,...,carbohydrates,carbohydrates_daily_value%,dietary_fiber,dietary_fiber_daily_value%,sugars,protein,vitaminA_daily_value%,vitaminC_daily_value%,calcium_daily_value%,iron_daily_value%
0,Breakfast,Egg McMuffin,136,300,120,13.0,20,5.0,25,0.0,...,31,10,4,17,3,17,10,0,25,15
1,Breakfast,Egg White Delight,135,250,70,8.0,12,3.0,15,0.0,...,30,10,4,17,3,18,6,0,25,8
2,Breakfast,Sausage McMuffin,111,370,200,23.0,35,8.0,42,0.0,...,29,10,4,17,2,14,8,0,25,10
3,Breakfast,Sausage McMuffin with Egg,161,450,250,28.0,43,10.0,52,0.0,...,30,10,4,17,2,21,15,0,30,15
4,Breakfast,Sausage McMuffin with Egg Whites,161,400,210,23.0,35,8.0,42,0.0,...,30,10,4,17,2,21,6,0,25,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Smoothies & Shakes,McFlurry with Oreo Cookies (Small),285,510,150,17.0,26,9.0,44,0.5,...,80,27,1,4,64,12,15,0,40,8
256,Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),381,690,200,23.0,35,12.0,58,1.0,...,106,35,1,5,85,15,20,0,50,10
257,Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),190,340,100,11.0,17,6.0,29,0.0,...,53,18,1,2,43,8,10,0,25,6
258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),403,810,290,32.0,50,15.0,76,1.0,...,114,38,2,9,103,21,20,0,60,6


In [14]:
menu_df[['category', 'item']] = menu_df[['category', 'item']].astype(str)

In [51]:
null_values = menu_df[menu_df['serving_size_grams'].isnull()]
null_values_lines = null_values.any(axis=1)
print(null_values_lines)

110    True
111    True
112    True
113    True
114    True
       ... 
247    True
248    True
249    True
250    True
251    True
Length: 139, dtype: bool
