In [1]:
from tqdm import tqdm

from User import User
from Survey import Survey
from Intake import Intake
from heifa_composition import FoodComposition

import pandas as pd

In [2]:
def rename_columns(old_name:str, new_name: str, df: pd.DataFrame) -> None:
    df.rename(columns = { old_name : new_name }, inplace=True)
    return None

# Breakdown (From user to ingredient):

The file has many users.

Each user has many surveys.

Each survey has many meal intake.

Each intake consists of many food components.

Every food component is marked with a "Nutrition ID code".

In [None]:
# Rename the columns (For Intake24)

intake24_df = pd.read_csv('files/intake24_survey_file.csv')

#print(intake24_df.columns)

rename_columns('Energy, with dietary fibre', 'energy_with_fibre', intake24_df)
rename_columns('Meal name', 'meal_name', intake24_df)
rename_columns('Survey ID', 'survey_id', intake24_df)
rename_columns('Intake24 food code', 'food_code', intake24_df)
rename_columns('User ID', 'user_id', intake24_df)
rename_columns('Meal ID', 'meal_id', intake24_df)
rename_columns('Nutrient table code', 'heifa_nutrient_id', intake24_df)
rename_columns('Portion size (g/ml)', 'portion_size_consumed', intake24_df)

#print("\n\n")
#print(intake24_df.columns)

In [None]:
# Structure is like this:
# - Map user ID to their respective meal and information
# - Map the meal number to the respective ingredients
# - Map the ingredients to their respective nutrient code

user_dict = {}

In [None]:
def loop_ingredients(food_df: pd.DataFrame) -> dict:

    meal_intake = Intake()
    meal_intake.add_food_information(food_df)
    
    return meal_intake

def loop_meals(meal_ids_list: list, survey_meals_df: pd.DataFrame) -> None:

    survey_info_meals = Survey()
    
    for meal_id in meal_ids_list:
        
        food_intake_df = survey_meals_df.query("meal_id == @meal_id")
        meal_ingredients = loop_ingredients(food_intake_df)

        # To be removed and replaced with OOP
        survey_info_meals.add_meal(meal_id, meal_ingredients)

    return survey_info_meals

In [None]:
# Step 1: Let's split by the survey ID.

# Get the list of survey IDs.
survey_ids_list = intake24_df['survey_id'].unique().tolist()

# Loop one survey ID at a time and split the Intake24 dataframe accordingly.
for survey_id in tqdm(survey_ids_list, ncols=50):

    survey_meals_df = intake24_df.query("survey_id == @survey_id")

    # Extract the user ID.
    # If it exists, extract the object.
    # Otherwise, create a new object
    user_id = survey_meals_df['user_id'].values[0]
    
    if user_id not in user_dict:
        user_dict[user_id] = User(user_id)

    user = user_dict[user_id]
    
    # Step 2: Split further the Meal ID
    meal_ids_list = survey_meals_df['meal_id'].unique().tolist()

    # Step 3: Get information of the meals
    survey_meals_info = loop_meals(meal_ids_list, survey_meals_df)

    # Every meal of the survey will be populated here
    user.add_survey(survey_id, survey_meals_info)

In [None]:
for user_id in user_dict.keys():

    #print(f"Printing for User {user_id}")
    user_obj = user_dict[user_id]
    user_obj.print_information()

# HEIFA - Ingredients

Every row in the file is a unique ingredient.

Every ingredient:
- has it's own attributes.
- can be mapped to a 8-digit code (for HEIFA Recipe)
- is used as a divisor for either energy (kilo joules) or grams (g)

In [3]:
heifa_food_df = pd.read_csv('files/heifa_food_composition.csv')

#print(heifa_food_df)

In [4]:
# Rename the columns (For HEIFA Food Comp)

#print(heifa_food_df.columns)

#rename_columns('Energy, with dietary fibre', 'energy_with_fibre', intake24_df)
rename_columns('Nutrient table code', 'heifa_code', heifa_food_df)
rename_columns('8 digit code', '8_digit_code', heifa_food_df)
rename_columns('HEIFA Food Groups', 'food_group', heifa_food_df)
rename_columns('Energy or grams per Serve \n(HEIFA food groups)', 'serving_size', heifa_food_df)
rename_columns('Serving size unit of measure', 'serving_measure', heifa_food_df)
    
#print("\n\n")
#print(heifa_food_df.columns)






In [12]:
# Data cleaning
#print(heifa_food_df[['heifa_code', 'food_group', 'serving_size', 'serving_measure']].head(30))

filled_values = {
    'serving_size' : 'N/A',
    'serving_measure': 'N/A'
}

heifa_food_df.fillna(value = filled_values, inplace=True)

print(heifa_food_df[['heifa_code', 'food_group', 'serving_size', 'serving_measure']].head(30))


   heifa_code                food_group serving_size serving_measure
0    01B10298             No food group          N/A             N/A
1    01B10330             No food group          N/A             N/A
2    01B10390             No food group          N/A             N/A
3    01B10391             No food group          N/A             N/A
4    01B10404             No food group          N/A             N/A
5    01B10392             No food group          N/A             N/A
6    01B10482             No food group          N/A             N/A
7    01B10487             Discretionary        600.0              kJ
8    01B10480  Recipe/Non-discretionary          N/A             N/A
9    01B10488      Recipe/Discretionary          N/A             N/A
10   01B10489      Recipe/Discretionary          N/A             N/A
11   01B10490      Recipe/Discretionary          N/A             N/A
12   01B10491      Recipe/Discretionary          N/A             N/A
13   01B10397             No food 

In [5]:
def create_food_comp_objects(heifa_food_df):

    heifa_food_dict = {}

    def populate_food_composition(food_row):

        heifa_code = food_row['heifa_code']
        
        info_dict = {
            'heifa_code': heifa_code,
            '8_digit_code': food_row['8_digit_code'],
            'food_group': food_row['food_group'],
            'serving_size': food_row['serving_size'],
            'serving_measure': food_row['serving_measure']
        }

        heifa_food_dict[heifa_code] = FoodComposition(info_dict)

    heifa_food_df.apply(populate_food_composition, axis = 1)

    return heifa_food_dict

# Create the objects
food_composition_dict = create_food_comp_objects(heifa_food_df)

In [6]:
for key, food_comp_obj in food_composition_dict.items():

    food_comp_obj.print_full_details()

Heifa Code: 01B10298
 8 Digit Code: 11101001
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10330
 8 Digit Code: 11101002
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10390
 8 Digit Code: 11101003
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10391
 8 Digit Code: 11101004
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10404
 8 Digit Code: 11101005
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10392
 8 Digit Code: 11101006
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10482
 8 Digit Code: 11101007
 Is a recipe: False
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10487
 8 Digit Code: 11101008
 Is a recipe: False
Serving Size: 600.0
 Serving measure: kJ

Heifa Code: 01B10480
 8 Digit Code: 11102001
 Is a recipe: True
Serving Size: nan
 Serving measure: nan

Heifa Code: 01B10488
 8 Digit Code: 11102002
 