In [11]:
import os
import pandas as pd
from copy import copy

# Set up the directories
code_dir = os.getcwd()
project_dir = os.path.dirname(code_dir)
data_dir = os.path.join(project_dir, 'data')

raw_data_dir = os.path.join(data_dir, 'raw_data')
ad_hoc_data_dir = os.path.join(data_dir, 'ad_hoc_data')
processed_data_dir = os.path.join(data_dir, 'processed_data')

# Load the data
# Load add-hoc food items data (data containing food matches between differnet databases)
food_items = pd.read_excel(os.path.join(ad_hoc_data_dir, 'food_items.xlsx'))
food_items['food_id'] = food_items['CSES_food_item']
food_items.drop(['Comments 1', 'Comments 2'], axis=1, inplace=True)

# Load add-hoc nutrient data (data containing nutrient matches between different databases and information about lower and upper limits)
nutrient_match = pd.read_excel(os.path.join(ad_hoc_data_dir, 'nutrient_match.xlsx'))
nutrient_list = nutrient_match['nutrient'].to_list()

# Read food composition data (Excel 6. Food composition)
food_composition_raw = pd.read_excel(os.path.join(raw_data_dir, '6.food_composition.xlsx'), sheet_name='6.food_composition', header=0)
food_items_enhance_id = food_items['ENHANCE_ID'].to_list()

# ---------------------------------------------------------------------------------------------------------------------
# Build food_items_nutritional dataframe
# Rows: food items (food_id) from food_items
# Columns: food_id, ENHANCE_ID, EDIBLE, nutrients (from nutrient_match)
# Stored: food_items_nutritional.xlsx in processed_data_dir
# ---------------------------------------------------------------------------------------------------------------------

# Filter the food composition data to include only the food items (rows) with ENHANCE_IDs present in food_items
# Select the nutrients (columns) for ENHANCE_ID and the nutrients listed in nutrient_match (with nutrient_name_food_composition label)
food_composition_filtered = food_composition_raw[
    food_composition_raw['ENHANCE_ID'].isin(food_items_enhance_id)
][['ENHANCE_ID', 'EDIBLE'] + nutrient_match['nutrient_name_food_composition'].tolist()]

# Replace 'nutrient_name_food_composition' names with 'nutrient' names in nutrient_match
food_composition_filtered.columns = ['ENHANCE_ID', 'EDIBLE'] + nutrient_list
# Add food_id column to the final food_items_nutritional dataframe
food_items_nutritional = food_items.loc[:,['food_id', 'ENHANCE_ID']].merge(food_composition_filtered, on='ENHANCE_ID', how='left')
# Save the food_items_nutritional dataframe to an Excel file
food_items_nutritional.to_excel(os.path.join(processed_data_dir, 'food_items_nutritional.xlsx'), index=False)

# ---------------------------------------------------------------------------------------------------------------------
# Build food_items_enviromental dataframe
# Rows: food items (food_id) from food_items
# Columns: 
# Stored: 
# ---------------------------------------------------------------------------------------------------------------------


ValueError: Worksheet named '7.food_environmental_impact' not found

In [35]:
# Read food environmental data (Excel 7. Food environmental impact)
food_environmental_raw = pd.read_excel(os.path.join(raw_data_dir, 'Cambodia list 18102023.xlsx'), sheet_name='2.CSES list + EI', header=0)

ValueError: Worksheet named '2. CSES list + EI' not found

In [26]:
food_environmental_filtered

Unnamed: 0,country_code,country,fbs_item_code,fbs_item,footprint_type,footprint/kg,method,extraction_factor
120,115,Cambodia,2511,Wheat and products,l_blue_green_wf,1805.034066,weighted_avg_over_coo,0.996781
125,115,Cambodia,2533,Sweet potatoes,l_blue_green_wf,1376.360504,weighted_avg_over_coo,1.000000
126,115,Cambodia,2534,"Roots, Other",l_blue_green_wf,285.949441,weighted_avg_over_coo,1.000000
127,115,Cambodia,2536,Sugar cane,l_blue_green_wf,724.636496,weighted_avg_over_coo,1.000000
130,115,Cambodia,2546,Beans,l_blue_green_wf,4635.509833,weighted_avg_over_coo,1.000000
...,...,...,...,...,...,...,...,...
355,115,Cambodia,2733,Pigmeat,l_green_wf,3396.928022,weighted_avg_over_coo,1.000000
356,115,Cambodia,2734,Poultry Meat,l_green_wf,5382.239604,weighted_avg_over_coo,1.000000
360,115,Cambodia,2744,Eggs,l_green_wf,3327.720548,weighted_avg_over_coo,1.000000
361,115,Cambodia,2805,Rice (Milled Equivalent),l_green_wf,3490.363616,weighted_avg_over_coo,0.996781
