# Completion


# Clarifications


# Questions to Ask



In [None]:
from utils import *
from pprint import pprint

import nest_asyncio
import asyncio

# Only run nest_asyncio in a Jupyter Notebook environment
nest_asyncio.apply()

In [None]:
# Load the respective files (Extract)

async def get_all_dataframes():
    return await asyncio.gather(
        load_intake24(),
        load_latrobe_file(),
        load_heifa_ingredients(),
        load_heifa_recipes(),
        load_heifa_scores()
    )

intake24_df, latrobe_df, heifa_food_df, heifa_recipes_df, heifa_scores_df = asyncio.run(get_all_dataframes())

# Breakdown of Intake 24:

The file has many users.

Each user has many surveys.

Each survey has many meal intake.

Each intake consists of many food components.

Every food component is marked with a "Nutrition ID code".

In [None]:
user_dict = create_user_objects(intake24_df)
food_composition_dict = create_food_objects(heifa_food_df)
recipe_dict = create_recipe_objects(heifa_recipes_df)
heifa_scores_dict = create_scores_objects(heifa_scores_df)

In [None]:
#for user_id in user_dict.keys():

#    print(f"Printing for User {user_id}")
#    user_obj = user_dict[user_id]
#    user_obj.print_information()

# Breakdown of HEIFA (Food Composition)

Every row in the file is a unique ingredient.

Every ingredient:
- has it's own attributes.
- can be mapped to a 8-digit code (for HEIFA Recipe)
- is used as a divisor for either energy (kilo joules) or grams (g)

In [None]:
# Create the objects

#for key, food_comp_obj in food_composition_dict.items():
#    food_comp_obj.print_full_details()

# Breakdown of HEIFA (Recipes)

- Every recipe has multiple ingredients
- Keys are repeated across rows (similar to Survey ID of Intake24)
- Every ingredient has respective proportion to the recipe

In [None]:
#for id, recipe_obj in recipe_dict.items():
#    print(f"Printing for ID {id}\n")
#    recipe_obj.print_ingredients_information()

## Mapping between Intake24 and HEIFA Ingredients

- For each user, extract the given nutrients and store in an array.
- This is from ALL the survey data.
- We don't care about the order here.
- The array will contain a list of dictionaries/JSON.

In the array:

- Use the HEIFA ID (from user) to map to the HEIFA Ingredients' HEIFA ID.
- Check if a result is found or not.
- Check if it requires a recipe or not.

## Mapping between Intake24 and HEIFA Recipes

This is in case a recipe is found (The second step).

- For the given recipe, extract the given nutrients ID and proportion, store in an array.
- We don't care about the order here.
- The array will contain a list of dictionaries.

In the array:

- Use the HEIFA ID (from the recipes) to map the HEIFA Ingredients' HEIFA ID.
- Check the energy and serving size.

In [None]:
user_daily_intake = calculate_user_servings(user_dict, food_composition_dict, recipe_dict)

# Test with Samara's CSV file and post the updates here

**Assumption**: This should be the same as Intake24 file format.

## Errors encountered

**Column  names between Intake24 and Latrobe**
- "Start date (AEST)" -> Different from Intake24 (used 'Start Time'). -> "RESOLVED"
- 'Nutrient table code (original)' -> Different from Intake24 (used 'Nutrient table code'). -> "RESOLVED"
- 'Energy, with dietary fibre (kJ)' -> Different from Intake24 (used 'Energy, with dietary fibre'). -> "RESOLVED"

**Nutrient ID related**
- Values of "N/A" in the Nutrient ID still present. -> "RESOLVED"
- Unknown codes still present (8416) -> "RESOLVED"
- Row difference before and after dropping: 6028 vs 5613 (415) -> "RESOLVED"

**Inside the file**:
- Some nutrient ID is the food description and not the ID (Example: Porridge, made with light milk) -> "RESOLVED"
- Nutrient ID does not have the ID from Row 3072 to 3294; it has description (same as previous reason) -> "RESOLVED"
- Some values of energy are not found; they are shown as #VALUE! (#VALUE! present in the google sheet) -> "RESOLVED"


In [None]:
user_latrobe_dict = create_user_objects(latrobe_df)

#for user_id in user_latrobe_dict.keys():

#    print(f"Printing for User {user_id}")
#    user_obj = user_latrobe_dict[user_id]
#    user_obj.print_information()

In [None]:
latrobe_user_daily_intake = calculate_user_servings(user_latrobe_dict, food_composition_dict, recipe_dict)
latrobe_user_heifa_scores = calculate_heifa_scores(heifa_scores_dict, latrobe_user_daily_intake)

In [None]:
# Display
for user_id, daily_intake_dict in latrobe_user_daily_intake.items():

    for survey_id, food_group_dict in daily_intake_dict.items():
        print(f"Breakdown of User {user_id} for Survey ID {survey_id}:")

        individual_dict = food_group_dict['individual']
        total_dict = food_group_dict['total']
        variations_dict = food_group_dict['variations']

        # Sort so can display in alphabetical order
        individual_dict = dict(sorted(individual_dict.items()))

        for food_group, total_serving in individual_dict.items():
            print(f"- {food_group}: {total_serving:.2f} serves")

        print("")
        
        print("***HEIFA SCORES CONVERSION (START)***\n")
        total_dict = dict(sorted(total_dict.items()))

        for food_group, total_serving in total_dict.items():
            
            print(f"> {food_group}: {total_serving:.2f} serves")

            if food_group not in latrobe_user_heifa_scores[user_id][survey_id]['breakdown']:
                print("* No score")
                print("")
                continue

            if food_group in variations_dict:

                variations = variations_dict[food_group]
                for sub_group, serving_size in variations.items():
                    print(f"-- {sub_group}: {serving_size:.2f} serves")

            gender_scores = latrobe_user_heifa_scores[user_id][survey_id]['breakdown'][food_group]

            male_score = gender_scores['male_score']
            female_score = gender_scores['female_score']
        
            print(f"* Male score: {male_score}")
            print(f"* Female score: {female_score}")
            print("")
        
        
        total_male_heifa = latrobe_user_heifa_scores[user_id][survey_id]['male_total']
        total_female_heifa = latrobe_user_heifa_scores[user_id][survey_id]['female_total']

        print(f"HEIFA Total (Male): {total_male_heifa}")
        print(f"HEIFA Total (Female): {total_female_heifa}")
        print("")
        
        print("***HEIFA SCORES CONVERSION (END)***")
        print("")
        print("=" * 20)

# Calculating the HEIFA Scores

Heifa scores are to be calculated on a **daily basis**.

To calculate them, let's break them down:

- Break down by user
- Break down by date
- Break down by major food group (Example: Vegetables/Green -> Vegetables is the major food group)
- Break down by sub-food group of the major (Example: Vegetables/Green -> Green is the sub-food group)
- Compare the scores by gender (male and female)

There are some exceptions to the rule, based on the HEIFA scores guideline:

- Grains and cereals/Wholegrains -> This is to be calculated separately as "Grains and cereals" and "Wholegrains".

Varieties:

- Store the variety score in a different dictionary
- Find the score of each variety group separately (based on the HEIFA rule book)

In [None]:
# Create the HEIFA scores list
user_heifa_scores = calculate_heifa_scores(heifa_scores_dict, user_daily_intake)

In [None]:
# Display
for user_id, daily_intake_dict in user_daily_intake.items():

    for survey_id, food_group_dict in daily_intake_dict.items():
        print(f"Breakdown of User {user_id} for Survey ID {survey_id}:")

        individual_dict = food_group_dict['individual']
        total_dict = food_group_dict['total']
        variations_dict = food_group_dict['variations']

        # Sort so can display in alphabetical order
        individual_dict = dict(sorted(individual_dict.items()))

        for food_group, total_serving in individual_dict.items():
            print(f"- {food_group}: {total_serving:.2f} serves")

        print("")
        
        print("***HEIFA SCORES CONVERSION (START)***\n")
        total_dict = dict(sorted(total_dict.items()))

        for food_group, total_serving in total_dict.items():
            
            print(f"> {food_group}: {total_serving:.2f} serves")

            if food_group not in user_heifa_scores[user_id][survey_id]['breakdown']:
                print("* No score")
                print("")
                continue

            if food_group in variations_dict:

                variations = variations_dict[food_group]
                for sub_group, serving_size in variations.items():
                    print(f"-- {sub_group}: {serving_size:.2f} serves")

            gender_scores = user_heifa_scores[user_id][survey_id]['breakdown'][food_group]

            male_score = gender_scores['male_score']
            female_score = gender_scores['female_score']
        
            print(f"* Male score: {male_score}")
            print(f"* Female score: {female_score}")
            print("")
        
        
        total_male_heifa = user_heifa_scores[user_id][survey_id]['male_total']
        total_female_heifa = user_heifa_scores[user_id][survey_id]['female_total']

        print(f"HEIFA Total (Male): {total_male_heifa}")
        print(f"HEIFA Total (Female): {total_female_heifa}")
        print("")
        
        print("***HEIFA SCORES CONVERSION (END)***")
        print("")
        print("=" * 20)

# The HEIFA Score CSV file

## First layer
~~Phase 1: Get the list of column names (User ID and Survey ID)~~ \
~~Phase 2: Get the list of column names (Refer to heifa_scores.csv (Food Group)) -> Whole groups~~ \
~~Phase 3: Get the list of column names (Refer to heifa_food_composition.csv (HEIFA Food Group)) -> Sub-groups (Fruits, Vegetables, Legumes)~~

## Second layer
~~Phase 4: Store the User ID and Survey ID~~ \
~~Phase 5: Store the whole group serves~~ \
~~Phase 6: Store the sub-group serves~~

## Third layer
~~Phase 7: Store the whole group Heifa scores (Male and Female)~~ \
~~Phase 8: Store the total HEIFA scores (Male and Female)~~

In [None]:
create_heifa_csv(
    heifa_scores_dict, food_composition_dict, 
    user_daily_intake, user_heifa_scores,
    'intake24_breakdown'
)

In [None]:
create_heifa_csv(
    heifa_scores_dict, food_composition_dict, 
    latrobe_user_daily_intake, latrobe_user_heifa_scores,
    'cleaned_intake24_breakdown'
)