In [22]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set up the directories
current_dir = os.getcwd()
code_dir = os.path.dirname(current_dir)
project_dir = os.path.dirname(code_dir)
data_dir = os.path.join(project_dir, 'data')
raw_data_dir = os.path.join(data_dir, 'raw_data')
ad_hoc_data_dir = os.path.join(data_dir, 'ad_hoc_data')
processed_data_dir = os.path.join(data_dir, 'processed_data')

# LOAD THE DATA

# Individual requirements by age, sex and activity level
file_path = os.path.join(raw_data_dir, '8.individual_requirements_short.xlsx')
individual_requirements = pd.read_excel(file_path, sheet_name='individual_requirements', index_col=1, header=0)

# Additional individual requirements for pregnant, lactating for different pregnancy and lactation stages
file_path = os.path.join(raw_data_dir, '9.individual_requirements_plw_short.xlsx')
individual_requirements_plw = pd.read_excel(file_path, sheet_name='individual_requirements_plw', index_col=1, header=0)

# Nutrient matching to build the processed nutritional_requirements dataset
file_path = os.path.join(ad_hoc_data_dir, 'nutrient_match.xlsx')
nutrient_match = pd.read_excel(file_path, sheet_name='nutrient_match', index_col=0, header=0)

# Population group matching (to average the individual requirements)
file_path = os.path.join(ad_hoc_data_dir, 'population_groups_match.xlsx')
population_groups_match = pd.read_excel(file_path, sheet_name='population_groups_match', index_col=1, header=0)

# PREPROCESS THE DATA

nutrients = nutrient_match.index.to_list()
individual_population_groups = individual_requirements.index.to_list()

# Extract the nutritional requirements of all individuals
all_individual_nutritional_requirements_dic = {}
for individual in individual_population_groups:
    one_individual_nutritional_requirements_dic = {}
    for nutrient in nutrients:
        RNI_UL_dic = {'RNI': np.nan, 'UL': np.nan}
        # Nutrients with exact values
        if nutrient_match.loc[nutrient, 'Exact']:
            value = individual_requirements.loc[individual, nutrient_match.loc[nutrient, 'Exact_requirement']]
            lower_value = value * (1 - 0.025)
            upper_value = value * (1 + 0.025)
        # Nutrients with ranges
        else:
            # If the lower value exists load it, otherwise set it to NaN 
            if nutrient_match.loc[nutrient, 'Lower']:
                lower_value = individual_requirements.loc[individual, nutrient_match.loc[nutrient, 'Lower_limits']]
            else:
                lower_value = np.nan
            # If the upper value exists load it, otherwise set it to NaN
            if nutrient_match.loc[nutrient, 'Upper']:
                upper_value = individual_requirements.loc[individual, nutrient_match.loc[nutrient, 'Upper_limits']]
            else:
                upper_value = np.nan
        # If nutrient is fat, it should be computed as a percentage of total energy
        if nutrient == 'Fat (g)':
            energy = individual_requirements.loc[individual, nutrient_match.loc['Energy (kcal)', 'Exact_requirement']]
            lower_value = lower_value * energy / (100*9)
            upper_value = upper_value * energy / (100*9)
        # Write dicts
        RNI_UL_dic['RNI'] = lower_value
        RNI_UL_dic['UL'] = upper_value
        one_individual_nutritional_requirements_dic[nutrient] = RNI_UL_dic
    all_individual_nutritional_requirements_dic[individual] = one_individual_nutritional_requirements_dic
    # Output: all_individual_nutritional_requirements_dic

# Individual nutritional requirements dict to dataframe
all_individual_nutritional_requirements_df = pd.DataFrame([
    {(nut, lim): val 
     for nut, lims in nuts.items() 
     for lim, val in lims.items()}
    for nuts in all_individual_nutritional_requirements_dic.values()
], index=all_individual_nutritional_requirements_dic.keys())
all_individual_nutritional_requirements_df.columns = pd.MultiIndex.from_tuples(all_individual_nutritional_requirements_df.columns)

# Extract the additional nutritional requirements for pregnant and lactating
plw_groups = individual_requirements_plw.index.to_list()
plw_nutritional_requirements_dic = {}
for individual in plw_groups:
    one_individual_nutritional_requirements_dic = {}
    for nutrient in nutrients:
        RNI_UL_dic = {'RNI': np.nan, 'UL': np.nan}
        # Nutrients with exact values
        if nutrient_match.loc[nutrient, 'Exact']:
            value = individual_requirements_plw.loc[individual, nutrient_match.loc[nutrient, 'Exact_requirement']]
            lower_value = value * (1 - 0.025)
            upper_value = value * (1 + 0.025)
        # Nutrients with ranges
        else:
            # If the lower value exists load it, otherwise set it to NaN 
            if nutrient_match.loc[nutrient, 'Lower']:
                lower_value = individual_requirements_plw.loc[individual, nutrient_match.loc[nutrient, 'Lower_limits']]
            else:
                lower_value = np.nan
            # If the upper value exists load it, otherwise set it to NaN
            if nutrient_match.loc[nutrient, 'Upper']:
                upper_value = individual_requirements_plw.loc[individual, nutrient_match.loc[nutrient, 'Upper_limits']]
            else:
                upper_value = np.nan
        # If nutrient is fat, it should be computed as a percentage of total energy
        if nutrient == 'Fat (g)':
            energy = individual_requirements_plw.loc[individual, nutrient_match.loc['Energy (kcal)', 'Exact_requirement']]
            lower_value = lower_value * energy / (100*9)
            upper_value = upper_value * energy / (100*9) 
        # Write dicts
        RNI_UL_dic['RNI'] = lower_value
        RNI_UL_dic['UL'] = upper_value
        one_individual_nutritional_requirements_dic[nutrient] = RNI_UL_dic
    plw_nutritional_requirements_dic[individual] = one_individual_nutritional_requirements_dic
    # Output: plw_nutritional_requirements_dic

# Additional requirements dict to dataframe
plw_nutritional_requirements_df = pd.DataFrame([
    {(nut, lim): val 
     for nut, lims in nuts.items() 
     for lim, val in lims.items()}
    for nuts in plw_nutritional_requirements_dic.values()
], index=plw_nutritional_requirements_dic.keys())
plw_nutritional_requirements_df.columns = pd.MultiIndex.from_tuples(plw_nutritional_requirements_df.columns)

# Average the individual requirements for each population group
population_groups = population_groups_match.index.to_list()

nutritional_requirements_df = pd.DataFrame()

for population_group in population_groups:
    first_individual = population_groups_match.loc[population_group, 'From']
    last_individual = population_groups_match.loc[population_group, 'To']
    mean_nutritional_requirements = all_individual_nutritional_requirements_df.loc[first_individual:last_individual].mean(axis=0).to_frame().T
    mean_nutritional_requirements.rename(index={0: population_group}, inplace=True)
    nutritional_requirements_df = pd.concat([nutritional_requirements_df, mean_nutritional_requirements], axis=0, ignore_index=False)
    # If pregnant add the additional requirements
    if population_groups_match.loc[population_group, 'Pregnant']:
        additional_requirements = plw_nutritional_requirements_df.loc['Pregnancy, trimester average']
        nutritional_requirements_df.loc[population_group] += additional_requirements
    elif population_groups_match.loc[population_group, 'Lactancy']:
        additional_requirements = plw_nutritional_requirements_df.loc['Lactation, semester average']
        nutritional_requirements_df.loc[population_group] += additional_requirements

# SAVE THE NUTRITIONAL REQUIREMENTS INTO AN EXCEL FILE
file_path = os.path.join(processed_data_dir, 'nutritional_requirements.xlsx')
with pd.ExcelWriter(file_path) as writer:
    nutritional_requirements_df.to_excel(writer, sheet_name='population_groups', index=True)
    all_individual_nutritional_requirements_df.to_excel(writer, sheet_name='all_individuals', index=True)
    plw_nutritional_requirements_df.to_excel(writer, sheet_name='plw', index=True)
