In [1]:
from tqdm import tqdm

from User import User
from Survey import Survey
from Intake import Intake

import pandas as pd

In [2]:
intake24_df = pd.read_csv('files/intake24_survey_file.csv')
#print(intake24_df)

In [3]:
def rename_columns(old_name:str, new_name: str, df: pd.DataFrame) -> None:
    df.rename(columns = { old_name : new_name }, inplace=True)
    return None

# Breakdown (From user to ingredient):

The file has many users.

Each user has many surveys.

Each survey has many meal intake.

Each intake consists of many food components.

Every food component is marked with a "Nutrition ID code".

In [4]:
# Rename the columns (For Intake24)

rename_columns('Energy, with dietary fibre', 'energy_with_fibre', intake24_df)
rename_columns('Meal name', 'meal_name', intake24_df)
rename_columns('Survey ID', 'survey_id', intake24_df)
rename_columns('Intake24 food code', 'food_code', intake24_df)
rename_columns('User ID', 'user_id', intake24_df)
rename_columns('Meal ID', 'meal_id', intake24_df)
rename_columns('Nutrient table code', 'heifa_nutrient_id', intake24_df)
rename_columns('Portion size (g/ml)', 'portion_size_consumed', intake24_df)

In [5]:
# Structure is like this:
# - Map user ID to their respective meal and information
# - Map the meal number to the respective ingredients
# - Map the ingredients to their respective nutrient code

user_dict = {}

In [6]:
def loop_ingredients(food_df: pd.DataFrame) -> dict:

    meal_intake = Intake()
    meal_intake.add_food_information(food_df)
    
    return meal_intake

def loop_meals(meal_ids_list: list, survey_meals_df: pd.DataFrame) -> None:

    survey_info_meals = Survey()
    
    for meal_id in meal_ids_list:
        
        food_intake_df = survey_meals_df.query("meal_id == @meal_id")
        meal_ingredients = loop_ingredients(food_intake_df)

        # To be removed and replaced with OOP
        survey_info_meals.add_meal(meal_id, meal_ingredients)

    return survey_info_meals

In [7]:
# Step 1: Let's split by the survey ID.

# Get the list of survey IDs.
survey_ids_list = intake24_df['survey_id'].unique().tolist()

# Loop one survey ID at a time and split the Intake24 dataframe accordingly.
for survey_id in tqdm(survey_ids_list, ncols=50):

    survey_meals_df = intake24_df.query("survey_id == @survey_id")

    # Extract the user ID.
    # If it exists, extract the object.
    # Otherwise, create a new object
    user_id = survey_meals_df['user_id'].values[0]
    
    if user_id not in user_dict:
        user_dict[user_id] = User(user_id)

    user = user_dict[user_id]
    
    # Step 2: Split further the Meal ID
    meal_ids_list = survey_meals_df['meal_id'].unique().tolist()

    # Step 3: Get information of the meals
    survey_meals_info = loop_meals(meal_ids_list, survey_meals_df)

    # Every meal of the survey will be populated here
    user.add_survey(survey_id, survey_meals_info)

100%|███████████| 405/405 [00:09<00:00, 43.20it/s]


In [8]:
for user_id in user_dict.keys():

    #print(f"Printing for User {user_id}")
    user_obj = user_dict[user_id]
    #user_obj.print_information()

# HEIFA - Ingredients

Every row in the file is a unique ingredient.

Every ingredient:
- has it's own attributes.
- can be mapped to a 8-digit code (for HEIFA Recipe)
- is used as a divisor for either energy (kilo joules) or grams (g)