In [30]:
import os
import pandas as pd
from copy import copy

# Set up the directories
code_dir = os.getcwd()
project_dir = os.path.dirname(code_dir)
data_dir = os.path.join(project_dir, 'data')

raw_data_dir = os.path.join(data_dir, 'raw_data')
ad_hoc_data_dir = os.path.join(data_dir, 'ad_hoc_data')
processed_data_dir = os.path.join(data_dir, 'processed_data')

# Load the data
# Load add-hoc food items data (data containing food matches between differnet databases)
food_items = pd.read_excel(os.path.join(ad_hoc_data_dir, 'food_items.xlsx'))
food_items['food_id'] = food_items['CSES_food_item']
food_items.drop(['Comments 1', 'Comments 2'], axis=1, inplace=True)

# Load add-hoc nutrient data (data containing nutrient matches between different databases and information about lower and upper limits)
nutrient_match = pd.read_excel(os.path.join(ad_hoc_data_dir, 'nutrient_match.xlsx'))
nutrient_list = nutrient_match['nutrient'].to_list()

# Read food composition data (Excel 6. Food composition)
food_composition_raw = pd.read_excel(os.path.join(raw_data_dir, '6.food_composition.xlsx'), sheet_name='6.food_composition', header=0)
food_items_enhance_id = food_items['ENHANCE_ID'].to_list()

# ---------------------------------------------------------------------------------------------------------------------
# Build food_items_nutritional dataframe
# Columns: food_id, ENHANCE_ID, EDIBLE, nutrients (from nutrient_match)
# Stored: food_items_nutritional.xlsx in processed_data_dir
# ---------------------------------------------------------------------------------------------------------------------

# Filter the food composition data to include only the food items (rows) with ENHANCE_IDs present in food_items
# Select the nutrients (columns) for ENHANCE_ID and the nutrients listed in nutrient_match (with nutrient_name_food_composition label)
food_composition_filtered = food_composition_raw[
    food_composition_raw['ENHANCE_ID'].isin(food_items_enhance_id)
][['ENHANCE_ID', 'EDIBLE'] + nutrient_match['nutrient_name_food_composition'].tolist()]

# Replace 'nutrient_name_food_composition' names with 'nutrient' names in nutrient_match
food_composition_filtered.columns = ['ENHANCE_ID', 'EDIBLE'] + nutrient_list
# Add food_id column to the final food_items_nutritional dataframe
food_items_nutritional = food_items.loc[:,['food_id', 'ENHANCE_ID']].merge(food_composition_filtered, on='ENHANCE_ID', how='left')
# Save the food_items_nutritional dataframe to an Excel file
food_items_nutritional.to_excel(os.path.join(processed_data_dir, 'food_items_nutritional.xlsx'), index=False)

# ---------------------------------------------------------------------------------------------------------------------
# Build 
# Columns: 
# Stored: 
# ---------------------------------------------------------------------------------------------------------------------


Unnamed: 0,food_id,ENHANCE_ID,EDIBLE,Energy (kcal),Protein (g),Fat (g),Ca (mg),FolicAcid (Âug DFE),Iron Absorbed (mg),Iron (mg),Magnesium (mg),Niacin (mg NE),PantothenicAcid (mg),Vitamin A (Âug RE),Vitamin B1 (mg),Vitamin B2 (mg),Vitamin B6 (mg),Vitamin B12(Âug),Vitamin C (mg),Zinc (mg)
0,Eggs,13726,0.87,139.0,14.489,9.0,,50.0,0.385,1.54,20.88,3.4,1.396455,165.246,0.184,0.4,0.149231,2.587273,,2.35525
1,Mudfish,14121,0.77,101.0,17.7425,3.3,103.82,,0.260313,1.04125,42.65,8.026844,0.602328,,0.04,0.05,0.241643,3.415777,0.0,0.305714
2,Catfish,103879,0.52,117.0,16.33,5.7,38.0,8.0,0.225,0.9,30.0,1.4,0.602328,25.0,0.12,0.06,0.29,2.3,0.0,0.6
3,Shrimp or lobster,54233,1.0,79.0,16.7,0.9,31.0,3.0,0.625,2.5,27.0,5.983333,0.8,51.0,0.02,0.02,0.1,1.19,2.0,1.3
4,Processed or preserved fish,53801,1.0,105.0,18.5,2.9,12.0,10.0,0.1,0.4,28.0,5.85,0.91,15.0,0.23,0.07,0.11,2.9,1.0,0.6
5,Banana,44288,0.69,92.0,1.0,0.5,6.0,19.0,0.015,0.3,29.0,0.7,0.26,4.0,0.05,0.1,0.58,0.0,9.0,0.2
6,Mango,44493,0.7,65.0,0.5,0.3,10.0,14.0,0.005,0.1,9.0,0.733333,0.16,200.0,0.06,0.06,0.13,0.0,36.0,0.0
7,Papaya,14540,0.75,33.0,0.61,0.146667,28.682,,0.017435,0.348695,10.31,0.488,0.178481,60.105833,0.08,0.03,,0.0,61.8,0.174
8,"Rice, quality 1",104945,1.0,353.0,6.141215,0.462644,10.838122,20.0,0.03651,0.730194,35.0,0.423106,1.080266,0.0,0.072783,0.027146,0.125,0.0,0.0,1.1
9,Rice noodles,94879,1.0,108.0,1.79,0.2,4.0,1.0,0.007,0.14,3.0,0.072,0.011,0.0,0.018,0.004,0.006,0.0,0.0,0.25


In [19]:
display(food_items.loc[:,'ENHANCE_ID'])

food_id
Eggs                                              13726
Mudfish                                           14121
Catfish                                          103879
Shrimp or lobster                                 54233
Processed or preserved fish                       53801
Banana                                            44288
Mango                                             44493
Papaya                                            14540
Rice, quality 1                                  104945
Rice noodles                                      94879
Khmer noodles                                     94850
Nuts and seeds                                    55519
Peas, beans, soybean, bean sprouts                15557
Peas, beans, soybean, bean sprouts                65322
Peas, beans, soybean, bean sprouts                96689
Pork                                              96004
Beef                                              15650
Duck                                    

In [15]:
display(food_composition_raw.head())

Unnamed: 0,ENHANCE_ID,original_food_Code,FCT_ID,FCT_name,food_name_english,food_name_other,food_name_scientific,food_name_generic,FG1,FG2,...,FECOE,FEABS,FECOECAT,P,K,MN,MG,SE,SODIUM,ZN
0,15227,5227,1,Bangladesh,Salt,Lobon,,Salt,74,79.0,...,0.05,0.0,3.0,0.0,0.0,,0.0,,39340.0,0.0
1,14524,4524,1,Bangladesh,"Orange juice, raw, unsweetened",Komolar ross,Citrus reticulata,Orange,63,64.0,...,0.05,0.035,3.0,13.0,150.0,,8.0,,10.0,0.05
2,16819,6819,1,Bangladesh,"Gourd, ash, raw",Chalkumra,Benincasa hispida,Gourd,58,62.0,...,0.05,0.04,3.0,14.0,139.0,,17.423265,,38.78,0.1
3,14515,4515,1,Bangladesh,"Muskmelon, bangee, light orange flesh, ripe","Bangee, paka",Cucumis melo,Muskmelon,63,64.0,...,0.05,0.0,3.0,35.0,130.0,,20.0,,7.0,0.06
4,17246,7246,1,Bangladesh,"Tomato, red, ripe, raw",Paka tomato,Lycopersicon esculentum,Tomato,63,65.0,...,0.05,0.009861,3.0,24.0,156.31,,7.418,,6.797,0.41
