In [1]:
from tqdm import tqdm

from User import User
from Survey import Survey
from Intake import Intake

from heifa_composition import FoodComposition, IngredientInRecipe, RecipeComposition

import pandas as pd

In [2]:
def rename_columns(old_name:str, new_name: str, df: pd.DataFrame) -> None:
    df.rename(columns = { old_name : new_name }, inplace=True)
    return None

# Breakdown of Intake 24:

The file has many users.

Each user has many surveys.

Each survey has many meal intake.

Each intake consists of many food components.

Every food component is marked with a "Nutrition ID code".

In [3]:
# Rename the columns (For Intake24)

intake24_df = pd.read_csv('files/intake24_survey_file.csv')

#print(intake24_df.columns)

rename_columns('Energy, with dietary fibre', 'energy_with_fibre', intake24_df)
rename_columns('Meal name', 'meal_name', intake24_df)
rename_columns('Survey ID', 'survey_id', intake24_df)
rename_columns('Intake24 food code', 'food_code', intake24_df)
rename_columns('User ID', 'user_id', intake24_df)
rename_columns('Meal ID', 'meal_id', intake24_df)
rename_columns('Nutrient table code', 'heifa_nutrient_id', intake24_df)
rename_columns('Portion size (g/ml)', 'portion_size_consumed', intake24_df)

#print("\n\n")
#print(intake24_df.columns)

In [4]:
# Structure is like this:
# - Map user ID to their respective meal and information
# - Map the meal number to the respective ingredients
# - Map the ingredients to their respective nutrient code

user_dict = {}

In [5]:
def loop_ingredients(food_df: pd.DataFrame) -> dict:

    meal_intake = Intake()
    meal_intake.add_food_information(food_df)
    
    return meal_intake

def loop_meals(meal_ids_list: list, survey_meals_df: pd.DataFrame) -> None:

    survey_info_meals = Survey()
    
    for meal_id in meal_ids_list:
        
        food_intake_df = survey_meals_df.query("meal_id == @meal_id")
        meal_ingredients = loop_ingredients(food_intake_df)

        # To be removed and replaced with OOP
        survey_info_meals.add_meal(meal_id, meal_ingredients)

    return survey_info_meals

In [6]:
# Step 1: Let's split by the survey ID.

# Get the list of survey IDs.
survey_ids_list = intake24_df['survey_id'].unique().tolist()

# Loop one survey ID at a time and split the Intake24 dataframe accordingly.
for survey_id in tqdm(survey_ids_list, ncols=50):

    survey_meals_df = intake24_df.query("survey_id == @survey_id")

    # Extract the user ID.
    # If it exists, extract the object.
    # Otherwise, create a new object
    user_id = survey_meals_df['user_id'].values[0]
    
    if user_id not in user_dict:
        user_dict[user_id] = User(user_id)

    user = user_dict[user_id]
    
    # Step 2: Split further the Meal ID
    meal_ids_list = survey_meals_df['meal_id'].unique().tolist()

    # Step 3: Get information of the meals
    survey_meals_info = loop_meals(meal_ids_list, survey_meals_df)

    # Every meal of the survey will be populated here
    user.add_survey(survey_id, survey_meals_info)

100%|███████████| 405/405 [00:10<00:00, 40.23it/s]


In [7]:
for user_id in user_dict.keys():

    #print(f"Printing for User {user_id}")
    user_obj = user_dict[user_id]
    user_obj.print_information()

Printing for Survey ID 5d3e72da-a241-4e8e-837b-021accf9a37c

HEIFA code(s) for Breakfast: ['04B10082', '02B10604', '09A10204', '01B10301']

Details for 04B10082:
 Portion size of 15.5g/ml
 Energy (Dietary Fibre included): 357.9 kJ

Details for 02B10604:
 Portion size of 54.0g/ml
 Energy (Dietary Fibre included): 638.28 kJ

Details for 09A10204:
 Portion size of 16.42g/ml
 Energy (Dietary Fibre included): 32.5 kJ

Details for 01B10301:
 Portion size of 194.92g/ml
 Energy (Dietary Fibre included): 11.7 kJ

HEIFA code(s) for Lunch: ['08E30320', '09A10204', '01B10301']

Details for 08E30320:
 Portion size of 51.75g/ml
 Energy (Dietary Fibre included): 918.05 kJ

Details for 09A10204:
 Portion size of 16.42g/ml
 Energy (Dietary Fibre included): 32.5 kJ

Details for 01B10301:
 Portion size of 195.2g/ml
 Energy (Dietary Fibre included): 11.71 kJ

HEIFA code(s) for Afternoon snack or drink: ['04B10082', '02B10604', '09A10204', '01B10301']

Details for 04B10082:
 Portion size of 12.4g/ml
 Energ

# Breakdown of HEIFA (Food Composition)

Every row in the file is a unique ingredient.

Every ingredient:
- has it's own attributes.
- can be mapped to a 8-digit code (for HEIFA Recipe)
- is used as a divisor for either energy (kilo joules) or grams (g)

In [8]:
heifa_food_df = pd.read_csv('files/heifa_food_composition.csv')

#print(heifa_food_df)

In [9]:
# Rename the columns (For HEIFA Food Comp)

#print(heifa_food_df.columns)

#rename_columns('Energy, with dietary fibre', 'energy_with_fibre', intake24_df)
rename_columns('Nutrient table code', 'heifa_code', heifa_food_df)
rename_columns('8 digit code', 'eight_digit_code', heifa_food_df)
rename_columns('HEIFA Food Groups', 'food_group', heifa_food_df)
rename_columns('Energy or grams per Serve \n(HEIFA food groups)', 'serving_size', heifa_food_df)
rename_columns('Serving size unit of measure', 'serving_measure', heifa_food_df)
    
#print("\n\n")
#print(heifa_food_df.columns)

In [10]:
# Data cleaning
#print(heifa_food_df[['heifa_code', 'food_group', 'serving_size', 'serving_measure']].head(30))

filled_values = {
    'serving_size' : 'N/A',
    'serving_measure': 'N/A'
}

heifa_food_df.fillna(value = filled_values, inplace=True)

#print(heifa_food_df[['heifa_code', 'food_group', 'serving_size', 'serving_measure']].head(30))

In [11]:
def create_food_comp_objects(heifa_food_df):

    heifa_food_dict = {}

    def populate_food_composition(food_row):

        heifa_code = food_row['heifa_code']
        
        info_dict = {
            'heifa_code': heifa_code,
            'eight_digit_code': food_row['eight_digit_code'],
            'food_group': food_row['food_group'],
            'serving_size': food_row['serving_size'],
            'serving_measure': food_row['serving_measure']
        }

        heifa_food_dict[heifa_code] = FoodComposition(info_dict)

    heifa_food_df.apply(populate_food_composition, axis = 1)

    return heifa_food_dict

# Create the objects
food_composition_dict = create_food_comp_objects(heifa_food_df)

In [12]:
for key, food_comp_obj in food_composition_dict.items():

    food_comp_obj.print_full_details()

Heifa Code: 01B10298
8 Digit Code: 11101001
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10330
8 Digit Code: 11101002
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10390
8 Digit Code: 11101003
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10391
8 Digit Code: 11101004
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10404
8 Digit Code: 11101005
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10392
8 Digit Code: 11101006
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10482
8 Digit Code: 11101007
Is a recipe: False
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10487
8 Digit Code: 11101008
Is a recipe: False
Serving Size: 600.0
Serving measure: kJ

Heifa Code: 01B10480
8 Digit Code: 11102001
Is a recipe: True
Serving Size: N/A
Serving measure: N/A

Heifa Code: 01B10488
8 Digit Code: 11102002
Is a recipe: True
Serving Siz

# Breakdown of HEIFA (Recipes)

- Every recipe has multiple ingredients
- Keys are repeated across rows (similar to Survey ID of Intake24)
- Every ingredient has respective proportion to the recipe

In [13]:
heifa_recipes_df = pd.read_csv('files/heifa_recipes.csv')

  heifa_recipes_df = pd.read_csv('files/heifa_recipes.csv')


In [14]:
# Rename the columns (For HEIFA Recipe)

#print(heifa_food_df.columns)

#rename_columns('Energy, with dietary fibre', 'energy_with_fibre', heifa_recipes_df)
rename_columns('Recipe AUSNUT 8-digit code', 'eight_digit_code', heifa_recipes_df)
rename_columns('Ingredient Nutrient table code', 'heifa_code', heifa_recipes_df)
rename_columns('Recipe Food Name', 'recipe_name', heifa_recipes_df)
rename_columns('Proportion of ingredients in the recipe', 'proportion_recipe', heifa_recipes_df)
rename_columns('Ingredient Food Name', 'ingredient_name', heifa_recipes_df)
rename_columns('Energy, with dietary fibre (kJ) per 100g', 'energy_with_fibre_100g', heifa_recipes_df)

#print("\n\n")
#print(heifa_food_df.columns)

In [15]:
def get_ingredients_proportion(recipe_df):

    ingredient_proportion_dict = {}
    
    def create_ingredients_object(ingredient_row):
        
        info_dict = {
            'proportion': ingredient_row['proportion_recipe'],
            'ingredient_name': ingredient_row['ingredient_name'],
            'energy_fibre_100g': ingredient_row['energy_with_fibre_100g']
        }

        ingredient_proportion_dict[ingredient_row['heifa_code']] = \
            IngredientInRecipe(info_dict)


    recipe_df.apply(create_ingredients_object, axis = 1)

    return ingredient_proportion_dict

In [16]:
# Get the list of 8-digit codes

recipes_id_list = heifa_recipes_df['eight_digit_code'].unique().tolist()
#print(len(recipes_id_list))

recipe_dict = {}

# Loop one recipe ID at a time and split the HEIFA Recipe dataframe accordingly.
for recipe_id in tqdm(recipes_id_list, ncols=50):

    recipe_df = heifa_recipes_df.query("eight_digit_code == @recipe_id")

    # Create the recipe object
    info_dict = {
        'recipe_id': recipe_id,
        'recipe_name': recipe_df['recipe_name'].values[0]
    }

    recipe_obj = RecipeComposition(info_dict)

    # Get breakdown of all the ingredients
    # Add to the recipe object
    recipe_obj.add_pieces(get_ingredients_proportion(recipe_df))

    # Add to dictionary for ease of mapping
    recipe_dict[recipe_id] = recipe_obj

100%|████████| 1404/1404 [00:08<00:00, 165.35it/s]


In [17]:
for id, recipe_obj in recipe_dict.items():
    print(f"Printing for ID {id}\n")
    recipe_obj.print_ingredients_information()

Printing for ID 11102001

Print for Tea, regular, black, brewed from leaf or teabags, plain, without milk
Propotion: 0.88


Print for Milk, cow, fluid, unflavoured, not further defined
Propotion: 0.12


Printing for ID 11102002

Print for Chai latte, dry powder mix containing milk solids & sugar
Propotion: 0.08


Print for Milk, cow, fluid, regular fat (~3.5%)
Propotion: 0.90


Printing for ID 11102003

Print for Chai latte, dry powder mix containing milk solids & sugar
Propotion: 0.08


Print for Milk, cow, fluid, reduced fat (1-2%), not further defined
Propotion: 0.90


Printing for ID 11102004

Print for Chai latte, dry powder mix containing milk solids & sugar
Propotion: 0.08


Print for Milk, cow, fluid, skim (~0.15% fat), not further defined
Propotion: 0.90


Printing for ID 11102005

Print for Chai latte, dry powder mix containing milk solids & sugar
Propotion: 0.08


Print for Soy beverage, unflavoured, not further defined
Propotion: 0.90


Printing for ID 11202001

Print for C

## Mapping between Intake24 and HEIFA Ingredients

- For each user, extract the given nutrients and store in an array.
- This is from ALL the survey data.
- We don't care about the order here.
- The array will contain a list of dictionaries/JSON.

In the array:

- Use the HEIFA ID (from user) to map to the HEIFA Ingredients' HEIFA ID.
- Check if a result is found or not.
- Check if it requires a recipe or not.

## Mapping between Intake24 and HEIFA Recipes

This is in case a recipe is found (The second step).

- For the given recipe, extract the given nutrients ID and proportion, store in an array.
- We don't care about the order here.
- The array will contain a list of dictionaries.

In the array:

- Use the HEIFA ID (from the recipes) to map the HEIFA Ingredients' HEIFA ID.
- Check the energy and serving size.

In [18]:
user_meals = {}

for id, user_obj in user_dict.items():

    user_meals[id] = user_obj.get_meals_information()
    print(user_meals)
    break

{1: [{'04B10082': <Food.Food object at 0x00000234E3C58850>, '02B10604': <Food.Food object at 0x00000234E3C58CA0>, '09A10204': <Food.Food object at 0x00000234E3C58820>, '01B10301': <Food.Food object at 0x00000234E3C58700>}, {'08E30320': <Food.Food object at 0x00000234E3C5E370>, '09A10204': <Food.Food object at 0x00000234E3C5E340>, '01B10301': <Food.Food object at 0x00000234E3C5E220>}, {'04B10082': <Food.Food object at 0x00000234E3C5E2E0>, '02B10604': <Food.Food object at 0x00000234E3C5E670>, '09A10204': <Food.Food object at 0x00000234E3C5E610>, '01B10301': <Food.Food object at 0x00000234E3C5E100>}, {'08F10852': <Food.Food object at 0x00000234E3C5E5E0>, '01B20413': <Food.Food object at 0x00000234E3C5E8B0>}, {'04B10082': <Food.Food object at 0x00000234E3C58760>, '02B20049': <Food.Food object at 0x00000234E3C58F10>, '09A10204': <Food.Food object at 0x00000234E3C58AC0>, '01B10301': <Food.Food object at 0x00000234E3C58AF0>}, {'09A10204': <Food.Food object at 0x00000234E3C5E7F0>, '01B10301': 

In [21]:
food_list = user_meals[1]

# For no food groups, we skip the serving size calculation

for food_dict in food_list:

    # Get the ID and the food object
    for heifa_id, food_obj in food_dict.items():
    
        heifa_obj = food_composition_dict[heifa_id]

        print(f"HEIFA ID: {heifa_id}\n")
        print(f"Portion size (gram): {food_obj.portion_size}")
        print(f"Portion size (energy with fibre): {food_obj.energy_with_fibre}")
        print(f"Is it a recipe: {heifa_obj.is_recipe}\n")
        
        print(f"HEIFA Serving size: {heifa_obj.serving_size}")
        print(f"HEIFA Serving measure: {heifa_obj.serving_measure}\n")

        # Skip the ones that have no food group
        if not heifa_obj.required_portion_calculation:
            print("*" * 20)
            print("\n\n")
            continue

        if heifa_obj.is_recipe:

            # Get the recipe ID via 8 digit code
            size = food_obj.portion_size
            recipe_obj = recipe_dict[heifa_obj.eight_digit_code]
            recipe_pieces = recipe_obj.recipe_pieces

            for heifa_id, ingredient_obj in recipe_pieces.items():

                # Find the object
                pieced_heifa = food_composition_dict[heifa_id]
                print(f"{heifa_id} -> Recipe: {pieced_heifa.is_recipe}")

                # Break down the proportion first
                pieced_portion = round(size * ingredient_obj.proroption, 1)

                # Energy convertion
                energy_amount = round((pieced_portion * ingredient_obj.energy_with_fibre) / 100, 2)

                print(f"Portion of {size} with proportion {ingredient_obj.proroption}: {pieced_portion}")
                print(f"Energy per 100g: {ingredient_obj.energy_with_fibre} (For {pieced_portion}g: {energy_amount})")

                # Skip the ones that have no food group
                #if not pieced_heifa.required_portion_calculation:
                #    continue
                    
                #serving_size = pieced_heifa.calculate_serving_size(energy_amount, pieced_portion)
                #print(f"Recommended serving size: {serving_size}\n")
                
                print("\n")
                
            print("*" * 20)
            print("\n\n")
            continue

        recommended_serving_size = heifa_obj.calculate_serving_size(food_obj.energy_with_fibre, food_obj.portion_size)
        print(f"Recommended serving size: {recommended_serving_size}\n")

        print("*" * 20)
        print("\n\n")
        break

HEIFA ID: 04B10082

Portion size (gram): 15.5
Portion size (energy with fibre): 357.9
Is it a recipe: False

HEIFA Serving size: 600.0
HEIFA Serving measure: kJ

Recommended serving size: 0.6

********************



HEIFA ID: 08E30320

Portion size (gram): 51.75
Portion size (energy with fibre): 918.05
Is it a recipe: False

HEIFA Serving size: 600.0
HEIFA Serving measure: kJ

Recommended serving size: 1.5

********************



HEIFA ID: 04B10082

Portion size (gram): 12.4
Portion size (energy with fibre): 286.32
Is it a recipe: False

HEIFA Serving size: 600.0
HEIFA Serving measure: kJ

Recommended serving size: 0.5

********************



HEIFA ID: 08F10852

Portion size (gram): 106.0
Portion size (energy with fibre): 678.4
Is it a recipe: True

HEIFA Serving size: N/A
HEIFA Serving measure: N/A

13A12032 -> Recipe: False
Portion of 106.0 with proportion 0.06: 6.4
Energy per 100g: 136 (For 6.4g: 8.7)


13A12553 -> Recipe: True
Portion of 106.0 with proportion 0.22: 23.3
Energy p