In [118]:
import pandas as pd
import numpy as np
from HEI_2015_Scoring import hei2015

In [119]:
#load and clean FFQ data
spain_data = pd.read_excel('/projects/thdmi/metadata/nutrient_ffq_data/Spain FFQ wt corrected gr of food.xlsx', index_col=0)
spain_data.drop(columns=['Columna1'], inplace=True)
spain_data.index = ['10317.' + i for i in spain_data.index]
spain_data.columns = [c.strip() for c in spain_data.columns]
# upload metadata
md = pd.read_csv('/home/lakhatib/thdmi/pangenome_filtered/data/thdmi_metadata_valid_covariates.tsv', sep = '\t', index_col=0, low_memory=False)
spain_data = spain_data.loc[spain_data.index.intersection(md.index)]

In [120]:
#Load and clean nutrient data
spain_nutrients = pd.read_excel('/projects/thdmi/metadata/nutrient_ffq_data/spain_nutrients.xlsx', index_col=0)
spain_nutrients.index = ['10317.' + i for i in spain_nutrients.index]
spain_nutrients.columns = [c.strip() for c in spain_nutrients.columns]
spain_nutrients = spain_nutrients.loc[spain_nutrients.index.intersection(md.index)]

In [121]:
#Load and MPED Conversions
MPED_conversions = pd.read_excel('/home/lakhatib/thdmi/pangenome_filtered/data/HEI2015- Spain.xlsx', sheet_name=2)
MPED_conversions.columns = [c.strip() for c in MPED_conversions.columns]
mped_mapping = MPED_conversions.set_index('Food item (FFQ Spain)').filter(like='MPED_')
mped_mapping.index = [c.strip() for c in mped_mapping.index]

In [122]:
#Convert values from 100 grams to 1 gram
mped_mapping = mped_mapping / 100

In [123]:
#Align MPED mappings with Spain data and filter foods
common_foods = spain_data.columns.intersection(mped_mapping.index)
spain_data_filtered = spain_data[common_foods]
mped_mapping_filtered = mped_mapping.loc[common_foods]

In [124]:
#Ensure numeric values 
spain_data_filtered = spain_data_filtered.apply(pd.to_numeric, errors='coerce')
mped_mapping_filtered = mped_mapping_filtered.apply(pd.to_numeric, errors='coerce')

# Convert dataframes to NumPy arrays
mped_mapping_array = mped_mapping_filtered.values  # Shape: (n_foods, n_components)
spain_data_array = spain_data_filtered.values  # Shape: (n_participants, n_foods)

# Perform matrix multiplication
mped_scores = np.dot(spain_data_array, mped_mapping_array)

# Create a DataFrame with participants as rows and MPED components as columns
mped_df = pd.DataFrame(mped_scores, 
                              index=spain_data_filtered.index, 
                              columns=mped_mapping_filtered.columns)

In [125]:
spain_data_filtered = spain_data_filtered.replace(np.nan, 0)

In [126]:
#Add nutrient information to dataframe
mped_df[spain_nutrients.columns] = spain_nutrients[spain_nutrients.columns]

In [127]:
# Calculate monopoly (sum of monounsaturated and polyunsaturated fats)
mped_df['monopoly'] = mped_df['Monounsaturated fats (MUFA) (g)'] + mped_df['Polyunsaturated fats (PUFA) (g)']

In [128]:
# Combine vegetable intake with legumes
mped_df['VTOTALLEG'] = mped_df['MPED_V_TOTAL'] + mped_df['MPED_V_LEGUMES']
mped_df['VDRKGRLEG'] = mped_df['MPED_V_DRKGR'] + mped_df['MPED_V_LEGUMES']

In [129]:
# Convert cup equivalents of legumes to ounce equivalents
mped_df['protlegumes'] = mped_df['MPED_V_LEGUMES'] * 4

In [130]:
# Calculate total protein intake from various sources
mped_df['PFALLPROTLEG'] = (
    mped_df['MPED_PF_TOTAL'] + mped_df['protlegumes']
)
mped_df['PFSEAPLANTLEG'] = (
    mped_df['MPED_PF_SEAFD_HI'] + mped_df['MPED_PF_SEAFD_LOW'] + 
    mped_df['MPED_PF_SOY'] + mped_df['MPED_PF_NUTSDS'] + mped_df['protlegumes']
)

In [131]:
#calculate HEI

hei_mped_df = hei2015(mped_df, mped_df['Energy(Kcal)'], mped_df['VTOTALLEG'], mped_df['VDRKGRLEG'], mped_df['MPED_F_TOTAL'], mped_df['MPED_F_CITMLB'], mped_df['MPED_G_WHOLE'], 
                 mped_df['MPED_D_TOTAL'], mped_df['PFALLPROTLEG'], mped_df['PFSEAPLANTLEG'], mped_df['monopoly'], mped_df['Saturated fats (SFA) (g)'], 
                 mped_df['Sodium (mg)'], mped_df['MPED_G_REFINED'], mped_df['MPED_ADD_SUGARS'])

In [132]:
hei_df = hei_mped_df.filter(like='HEI')

In [136]:
hei_df = hei_df.drop(index=['10317.DM2898999'])

In [139]:
hei_df.to_csv('/home/lakhatib/thdmi/pangenome_filtered/data/spain_HEI_df.csv')