In [14]:
import os
import pandas as pd
import numpy as np

# Set up the directories
current_dir = os.getcwd()
code_dir = os.path.dirname(current_dir)
project_dir = os.path.dirname(code_dir)
data_dir = os.path.join(project_dir, 'data')
raw_data_dir = os.path.join(data_dir, 'raw_data')
ad_hoc_data_dir = os.path.join(data_dir, 'ad_hoc_data')
processed_data_dir = os.path.join(data_dir, 'processed_data')

# LOAD THE DATA

# Index are ENHANCE_ID
# Food items match
file_path = os.path.join(ad_hoc_data_dir, 'food_items_match.xlsx')
food_items_match = pd.read_excel(file_path, index_col=4)
food_items_match = food_items_match.drop(columns=['Food group','fbs_item'])

# Food items environmental data
file_path = os.path.join(raw_data_dir, 'Cambodia list 18102023.xlsx')
cambodia_list_env = pd.read_excel(file_path, sheet_name='1.FNG list + EI ', index_col=1)
cambodia_list_env = cambodia_list_env[['kg_co2e_total','l_blue_green_wf','kg_co2e_total_extrapplied','l_blue_green_wf_extrapplied']]

# PROCESS THE DATA

# Merge 
food_items_environmental = pd.merge(food_items_match, cambodia_list_env, left_index=True, right_index=True, how='left')
# Index as column
food_items_environmental.reset_index(inplace=True)
# Resort the first three columns to have 'ENHANCE_ID', 'food_id', 'group_id', 'Food names' at the beginning in this order
food_items_environmental = food_items_environmental[['ENHANCE_ID', 'food_id', 'group_id', 'Food names'] + [col for col in food_items_environmental.columns if col not in ['ENHANCE_ID', 'food_id', 'group_id', 'Food names']]]

# SAVE THE DATA
file_path = os.path.join(processed_data_dir, 'food_environmental.xlsx')

with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
    food_items_environmental.to_excel(writer, sheet_name='food_environmental', index=False)
    worksheet = writer.sheets['food_environmental']
    for idx, col in enumerate(food_items_environmental.columns):
        max_len = max(food_items_environmental[col].astype(str).map(len).max(), len(col))
        worksheet.set_column(idx, idx, max_len)