In [None]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

## Extract CSV Files

In [None]:
mcd_file = "Resources/mcd_menu.csv"
bk_mcd_file = "Resources/bk_mcd_menu.csv"
starbucks_food_file = "Resources/starbucks_food_menu.csv"
starbucks_drink_file = "Resources/starbucks_drink_menu.csv"
subway_file = "Resources/subway_menu.csv"

In [None]:
mcd_df = pd.read_csv(mcd_file)
bk_mcd_df = pd.read_csv(bk_mcd_file, delimiter=';')
starbucks_food_df = pd.read_csv(starbucks_food_file)
starbucks_drink_df = pd.read_csv(starbucks_drink_file)
subway_df = pd.read_csv(subway_file)

In [None]:
# mikes code here

In [None]:
subway_df.head()

In [None]:
# nicks code here

In [None]:
mcd_df.head()

In [None]:
# Create a filtered dataframe from specific columns
mcd_cols = ["Category", "Item", "Saturated Fat", "Calories"]
mcd_transformed= mcd_df[mcd_cols].copy()

# Rename the column headers for consistency
mcd_transformed = mcd_transformed.rename(columns={"Category": "category",
                                                    "Item": "item",
                                                    "Saturated Fat": "saturated_fat",
                                                    "Calories": "calories"})

mcd_transformed.head()

In [None]:
# Find full list of categories
mcd_transformed['category'].unique()

In [None]:
# manually assign each category to a food_class 
conditions = [(mcd_transformed['category'] == 'Breakfast') | (mcd_transformed['category'] == 'Beef & Pork') \
                  | (mcd_transformed['category'] == 'Chicken & Fish') | (mcd_transformed['category'] == 'Salads') \
                  | (mcd_transformed['category'] == 'Snacks & Sides'),
              (mcd_transformed['category'] == 'Beverages') | (mcd_transformed['category'] == 'Smoothies & Shakes') \
                  | (mcd_transformed['category'] == 'Coffee & Tea'),
              (mcd_transformed['category'] == 'Desserts'), 
             ]

values = [3, 2, 1]

mcd_transformed['food_class'] = np.select(conditions, values)

mcd_transformed.head()

In [None]:
bk_mcd_df.head()

In [None]:
# Create a filtered dataframe from specific columns
bk_mcd_cols = ["Chain", "Type", "Item", "Saturated Fat (g)", "Calories"]
bk_mcd_transformed= bk_mcd_df[bk_mcd_cols].copy()

# Rename the column headers
bk_mcd_transformed = bk_mcd_transformed.rename(columns={"Type": "category",
                                                    "Item": "item",
                                                    "Saturated Fat (g)": "saturated_fat",
                                                    "Calories": "calories"})

bk_mcd_transformed.head()

In [None]:
# Find full list of categories
bk_mcd_transformed['category'].unique()

In [None]:
# manually assign each category to a food_class 
conditions = [(bk_mcd_transformed['category'] == 'Whopper Sandwiches') | (bk_mcd_transformed['category'] == 'Flame Broiled Burgers') \
                  | (bk_mcd_transformed['category'] == 'Chicken & More') | (bk_mcd_transformed['category'] == 'Salads & Sides') \
                  | (bk_mcd_transformed['category'] == 'King Jr Meals - Entrees') | (bk_mcd_transformed['category'] == 'King Jr Meals - Sides') \
                  | (bk_mcd_transformed['category'] == 'Breakfast') | (bk_mcd_transformed['category'] == 'Additional Options') \
                  | (bk_mcd_transformed['category'] == 'Sandwiches') | (bk_mcd_transformed['category'] == 'French Fries') \
                  | (bk_mcd_transformed['category'] == 'Chicken & Sauce') | (bk_mcd_transformed['category'] == 'Salads') \
                  | (bk_mcd_transformed['category'] == 'Salad Dressings'),
              (bk_mcd_transformed['category'] == 'Beverages') | (bk_mcd_transformed['category'] == 'McCafe Coffees') \
                  | (bk_mcd_transformed['category'] == 'King Jr Meals - Beverages') | (bk_mcd_transformed['category'] == 'Shakes/Smoothies') \
                  | (bk_mcd_transformed['category'] == 'Soft Drinks') | (bk_mcd_transformed['category'] == 'Hot Coffees') \
                  | (bk_mcd_transformed['category'] == 'Iced Coffees') | (bk_mcd_transformed['category'] == 'Frappes') \
                  | (bk_mcd_transformed['category'] == 'McCafe Coffees - Nonfat Milk') | (bk_mcd_transformed['category'] == 'McCafe Coffees - Whole Milk') \
                  | (bk_mcd_transformed['category'] == 'McCafe Frappes') | (bk_mcd_transformed['category'] == 'McCafe Smoothies'),
              (bk_mcd_transformed['category'] == 'Desserts') | (bk_mcd_transformed['category'] =='King Jr Meals - Desserts') \
                  | (bk_mcd_transformed['category'] == 'Desserts/Shakes') | (bk_mcd_transformed['category'] =='King Jr Meals - Desserts'), 
             ]

values = [3, 2, 1]

bk_mcd_transformed['food_class'] = np.select(conditions, values)

bk_mcd_transformed.head()

In [None]:
# remove bad data (namely the  ' -   ' values found in the original csv)
bk_mcd_transformed = bk_mcd_transformed[bk_mcd_transformed['saturated_fat'] != ' -   ']

In [None]:
# convert , decimal place to . and set to float64 datatype
bk_mcd_transformed['saturated_fat'] = bk_mcd_transformed['saturated_fat'].str.replace(',', '.')
bk_mcd_transformed['saturated_fat'] = bk_mcd_transformed['saturated_fat'].astype('float64')

In [None]:
# Split bk and mcd into seperate dataframes
bk_transformed =  bk_mcd_transformed.loc[bk_mcd_transformed['Chain'] == 'Burger King']
mcd_2_join_transformed =  bk_mcd_transformed.loc[bk_mcd_transformed['Chain'] == 'Mc Donalds']
mcd_2_join_transformed.head()

In [None]:
mcd_transformed_combined = mcd_transformed.merge(mcd_2_join_transformed, how = "right")
mcd_transformed_combined

In [None]:
# Remove the 'Chain' column from the dataframes
mcd_transformed_combined = mcd_transformed_combined.drop(columns=['Chain'])
bk_transformed = bk_transformed.drop(columns=['Chain'])

In [None]:
# noahs code here

In [None]:
starbucks_food_df.head()

In [None]:
starbucks_drink_df.head()