In [1]:
from tqdm import tqdm

from User import User
from Survey import Survey
from Intake import Intake

import pandas as pd

In [2]:
intake24_df = pd.read_csv('files/intake24_survey_file.csv')
#print(intake24_df)

# Breakdown (From user to ingredient):

The file has many users.

Each user has many surveys.

Each survey has many meals.

Each meal consists of many food components.

Every food component is marked with a "Nutrition ID code".

In [3]:
def rename_columns(old_name:str, new_name: str, df: pd.DataFrame) -> None:
    df.rename(columns = { old_name : new_name }, inplace=True)
    return None

rename_columns('Survey ID', 'survey_id', intake24_df)
rename_columns('Intake24 food code', 'food_code', intake24_df)
rename_columns('User ID', 'user_id', intake24_df)
rename_columns('Meal ID', 'meal_id', intake24_df)
rename_columns('Nutrient table code', 'heifa_nutrient_id', intake24_df)
rename_columns('Portion size (g/ml)', 'portion_size_consumed', intake24_df)

In [4]:
# Structure is like this:
# - Map user ID to their respective meal and information
# - Map the meal number to the respective ingredients
# - Map the ingredients to their respective nutrient code

user_information = {}

In [8]:
def loop_ingredients(ingredients_df: pd.DataFrame) -> dict:

    # What do we need:
    # - The ID: HEIFA
    # - The Intake24 Food Code
    # - The portion consumed
    food_code_list = ingredients_df['food_code'].values.tolist()
    heifa_list = ingredients_df['heifa_nutrient_id'].values.tolist()
    portion_size_list = ingredients_df['portion_size_consumed'].values.tolist()

    zipped_ingredients = zip(heifa_list, food_code_list, portion_size_list)

    meal_info_dict = {}
    meal_info_dict['heifa_ingredients_consumed'] = heifa_list

    # OOP Approach
    meal_intake = Intake()
    meal_intake.add_nutrient_heifa_list(heifa_list)
    
    for heifa_id, food_code, portion_size in zipped_ingredients:

        nutrient_info = {
            'food_code': food_code,
            'portion_size': portion_size
        }
        
        meal_info_dict[heifa_id] = nutrient_info

        # OOP Approach
        meal_intake.add_nutrient(heifa_id, nutrient_info)

    print(meal_intake.nutrients_info)
    print(meal_intake.heifa_list)
    print("\n\n")
    
    return meal_info_dict

def loop_meals(meal_ids_list: list, survey_meals_df: pd.DataFrame) -> None:

    survey_meals_dict = {}
    for meal_id in meal_ids_list:
        
        ingredients_df = survey_meals_df.query("meal_id == @meal_id")
        
        survey_meals_dict[meal_id] = loop_ingredients(ingredients_df)
        
    return survey_meals_dict

In [6]:
# Step 1: Let's split by the survey ID.

# Get the list of survey IDs.
survey_ids_list = intake24_df['survey_id'].unique().tolist()
#print(f'Total number of surveys carried out: {len(survey_ids_list)}')

# Loop one survey ID at a time and split the Intake24 dataframe accordingly.
for survey_id in tqdm(survey_ids_list, ncols=50):

    survey_meals_df = intake24_df.query("survey_id == @survey_id")
    survey_meals_df.reset_index(inplace=True)

    # Add in the user (if it exists)
    user_id = survey_meals_df['user_id'][0]
    if user_id not in user_information:
        user_information[user_id] = {}
    
    # Step 2: Split further the Meal ID
    meal_ids_list = survey_meals_df['meal_id'].unique().tolist()

    # Step 3: Get information of the meals
    meal_info_dict = loop_meals(meal_ids_list, survey_meals_df)

    # Every meal of the survey will be populated here
    meal_info_dict['meals'] = meal_ids_list
    user_information[user_id][survey_id] = meal_info_dict

    break

  0%|                     | 0/405 [00:00<?, ?it/s]

<Intake.Intake object at 0x00000271D2E45AC0>
<Intake.Intake object at 0x00000271D2E458E0>
<Intake.Intake object at 0x00000271D2E45C40>
<Intake.Intake object at 0x00000271D2E459A0>





In [7]:
def print_ingredients_component(ingredients_dict, heifa_list):

    for heifa_code in heifa_list:
        
        food_code = ingredients_dict[heifa_code]['food_code']
        portion_size = ingredients_dict[heifa_code]['portion_size']
        print(f"Ingredient {heifa_code} (Food code: {food_code}) of portion size {portion_size}g/ml")

def print_ingredients_information(meals_dict, meals_ids_list):

    for id in meals_ids_list:

        heifa_ingredients_list = meals_dict[id]['heifa_ingredients_consumed']
        print(f"For Meal #{id}")
        print_ingredients_component(meals_dict[id], heifa_ingredients_list)
        print("\n\n")
    
def print_meal_information(survey_ids, user_dict):

    for survey_id in survey_ids:

        meals_ids_list = user_dict[survey_id]['meals']
        print(f"For Survey {survey_id}: {len(meal_ids_list)} meals populated\n")
        print_ingredients_information(user_dict[survey_id], meals_ids_list)
        

for user_id in user_information.keys():

    # Lets dive into the user
    print(f"For User ID {user_id}\n--------------\n")
    user_dict = user_information[user_id]
    
    survey_ids = user_information[user_id].keys()

    print_meal_information(survey_ids, user_dict)
    print("=" * 20 + "\n")

For User ID 1
--------------

For Survey 5d3e72da-a241-4e8e-837b-021accf9a37c: 4 meals populated

For Meal #1
Ingredient 04B10082 (Food code: MARG) of portion size 15.5g/ml
Ingredient 02B10604 (Food code: WTST) of portion size 54.0g/ml
Ingredient 09A10204 (Food code: SMLK) of portion size 16.42g/ml
Ingredient 01B10301 (Food code: FLCO) of portion size 194.92g/ml



For Meal #2
Ingredient 08E30320 (Food code: SLMI) of portion size 51.75g/ml
Ingredient 09A10204 (Food code: SMLK) of portion size 16.42g/ml
Ingredient 01B10301 (Food code: FLCO) of portion size 195.2g/ml



For Meal #3
Ingredient 04B10082 (Food code: MARG) of portion size 12.4g/ml
Ingredient 02B10604 (Food code: WTST) of portion size 54.0g/ml
Ingredient 09A10204 (Food code: SMLK) of portion size 16.42g/ml
Ingredient 01B10301 (Food code: FLCO) of portion size 198.9g/ml



For Meal #4
Ingredient 08F10852 (Food code: CCCB) of portion size 106.0g/ml
Ingredient 01B20413 (Food code: OSQB) of portion size 272.18g/ml




