## Load data into jupyter notebook

In [1]:
# Dependencies for accessing MySQL database
from sqlalchemy import create_engine
from config import password

# Dependencies for data analyses and dataframe building
import pandas as pd
import numpy as np
import re

In [2]:
# Open the database connection
engine = create_engine(f"mysql+pymysql://root:{password}@localhost/nutrition_db")
db_conn = engine.connect()

# Create a dataframe based on a query for the GSR embedding of the 
# dishes and occasions by state
df = pd.read_sql("select I.Recipe_No, I.Dish, R.Occasion, I.Ingredient, I.Amount_gm, I.Carbohydrate_gm, \
I.Protein_gm, I.Fat_gm, I.Energy_kcal from ingredients as I inner join recipes as R on R.Recipe_No = I.Recipe_No \
limit 1000", 
                 db_conn)

# Close the connection
db_conn.close()

In [3]:
# Print the dataframe
df.head()

Unnamed: 0,Recipe_No,Dish,Occasion,Ingredient,Amount_gm,Carbohydrate_gm,Protein_gm,Fat_gm,Energy_kcal
0,86,Chicken korma,Lunch,Almond,10.0,1.05,2.08,5.89,65.5
1,13,Amaranth fry,Lunch,Amaranth,75.0,5.55,2.1,0.3,33.0
2,18,Chicken biryani,Lunch,Apricot,25.0,18.35,0.4,0.175,76.5
3,18,Chicken biryani,Dinner,Apricot,25.0,18.35,0.4,0.175,76.5
4,19,Mutton biryani,Lunch,Apricot,25.0,18.35,0.4,0.175,76.5


In [4]:
len(df["Recipe_No"].unique())

155

## Summary statistics

In [5]:
# Calculate nutrition content per dish
df.groupby("Recipe_No")[["Carbohydrate_gm", "Protein_gm", "Fat_gm", "Energy_kcal"]].mean()

Unnamed: 0_level_0,Carbohydrate_gm,Protein_gm,Fat_gm,Energy_kcal
Recipe_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2.635000,0.301667,3.351667,41.866667
2,5.650000,0.150000,10.025000,114.250000
3,3.390000,24.000000,7.515000,82.050000
4,3.390000,24.000000,3.515000,46.050000
5,6.566200,1.138600,1.272600,96.128000
...,...,...,...,...
160,5.600000,1.402000,2.236000,47.120000
161,11.610000,1.552500,0.137500,53.825000
162,1.490000,0.315000,0.005000,7.250000
163,1.571667,0.490000,3.534167,40.853333


In [6]:
# Calculate nutrition content per occasion
df.groupby("Occasion")[["Carbohydrate_gm", "Protein_gm", "Fat_gm", "Energy_kcal"]].mean()

Unnamed: 0_level_0,Carbohydrate_gm,Protein_gm,Fat_gm,Energy_kcal
Occasion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AM Snack,9.09819,4.919786,5.51306,91.870357
Breakfast,7.212825,4.578994,4.643865,88.649415
Dinner,4.633714,2.091984,4.718806,71.427083
Lunch,3.864017,2.425156,4.546872,63.185969
PM Snack,6.437451,5.329588,5.655662,82.402794


In [7]:
# Calculate serving portion per dish
df.groupby("Recipe_No")["Amount_gm"].sum()

Recipe_No
1       65.0
2       70.0
3      180.0
4       74.0
5      168.0
       ...  
160    450.0
161    165.0
162      5.0
163    210.0
164    200.0
Name: Amount_gm, Length: 155, dtype: float64

In [8]:
# How many unique dish names were benchmarked?
number_of_dishes = len(df["Dish"].unique())
print(f"There were {number_of_dishes} dishes benchmarked in the study.")

There were 149 dishes benchmarked in the study.


In [9]:
# How many recipes?
number_of_recipes = len(df["Recipe_No"].unique())
print(f"There were {number_of_recipes} recipes documented in the study.")

There were 155 recipes documented in the study.
