### MIND Diet Code

In [None]:
import pandas as pd

# load in raw data provided
df = pd.read_csv("/workspaces/enterntainment720/nutrition/data/raw/hcns2013_raw.csv")
print(df.columns.tolist())

# build dictionary holding all values required for calculation
mind_dictionary = {
    # Identifiers & Demographics
    'HHID': 'Household Identifier',
    'PN': 'Person Number within Household',
    'QNR13': 'Questionnaire ID',
    'AGE': 'Age of Respondent',
    'GENDER': 'Gender of Respondent',
    'RACE': 'Race of Respondent',
    'EDYRS': 'Years of Education',
    'BMI': 'Body Mass Index',
    'HCNSWGTR': 'Survey Weight',
    'HCNSWGTR_NT': 'Non-Telephone Survey Weight',
    'HCNSVERSION': 'Survey Version',

    # Brain-Healthy Food Groups
    'C3A_FF_13': 'Leafy green vegetables (spinach, kale, etc.)',
    'C3B_FF_13': 'Other vegetables',
    'C4A_FF_13': 'Berries - strawberries',
    'C4B_FF_13': 'Berries - blueberries',
    'C5A_FF_13': 'Nuts - peanuts',
    'C5B_FF_13': 'Nuts - almonds',
    'C5C_FF_13': 'Nuts - walnuts',
    'C5D_FF_13': 'Nuts - other',
    'C6A_FF_13': 'Whole grains - brown rice',
    'C6B_FF_13': 'Whole grains - whole wheat bread',
    'C6C_FF_13': 'Whole grains - oatmeal',
    'C6D_FF_13': 'Whole grains - cereal',
    'C7A_FF_13': 'Fish - non-fried',
    'C7B_FF_13': 'Fish - other',
    'C8A_FF_13': 'Beans - pinto',
    'C8B_FF_13': 'Beans - black',
    'C8C_FF_13': 'Beans - lentils',
    'C7E_FF_13': 'Poultry - chicken',
    'C7F_FF_13': 'Poultry - turkey',
    'C9A_FF_13': 'Olive oil use (primary oil)',
    'C9B_FF_13': 'Wine (red or white)',

    # Foods to Limit
    'C9C_FF_13': 'Red meat - beef',
    'C9D_FF_13': 'Red meat - pork',
    'C9E_FF_13': 'Red meat - processed',
    'C9F_FF_13': 'Butter or margarine',
    'C9G_FF_13': 'Cheese - regular',
    'C9H_FF_13': 'Cheese - processed',
    'C9I_FF_13': 'Pastries - cake, cookies',
    'C9J_FF_13': 'Candy or sweets',
    'C9K_FF_13': 'Other sweets - ice cream',
    'C9L_FF_13': 'Fried food - french fries',
    'C9M_FF_13': 'Fast food - burgers, takeout',

    # Optional extra supporting items
    'ALCO_SUM': 'Total alcohol intake (g/day)',
    'CALOR_SUM': 'Total daily caloric intake',
    'SATFAT_SUM': 'Total saturated fat intake',
    'SODIUM_SUM': 'Total sodium intake'
}

# Keep only needed columns
needed_columns = list(mind_dictionary.keys())
df_mind = df[needed_columns].copy()

# Define MIND diet components
positive_foods = {
    'LeafyGreens': ['C3A_FF_13'],
    'OtherVegetables': ['C3B_FF_13'],
    'Berries': ['C4A_FF_13', 'C4B_FF_13'],
    'Nuts': ['C5A_FF_13', 'C5B_FF_13', 'C5C_FF_13', 'C5D_FF_13'],
    'WholeGrains': ['C6A_FF_13', 'C6B_FF_13', 'C6C_FF_13', 'C6D_FF_13'],
    'Fish': ['C7A_FF_13', 'C7B_FF_13'],
    'Beans': ['C8A_FF_13', 'C8B_FF_13', 'C8C_FF_13'],
    'Poultry': ['C7E_FF_13', 'C7F_FF_13'],
    'OliveOil': ['C9A_FF_13'],
    'Wine': ['C9B_FF_13']
}

negative_foods = {
    'RedMeat': ['C9C_FF_13', 'C9D_FF_13', 'C9E_FF_13'],
    'Butter': ['C9F_FF_13'],
    'Cheese': ['C9G_FF_13', 'C9H_FF_13'],
    'Pastries': ['C9I_FF_13', 'C9J_FF_13', 'C9K_FF_13'],
    'FriedFastFood': ['C9L_FF_13', 'C9M_FF_13']
}

# Summing intake per group
for group, cols in {**positive_foods, **negative_foods}.items():
    df_mind[group] = df_mind[cols].sum(axis=1)

# Apply scoring rules — simplified threshold cutoffs
df_mind['Score_LeafyGreens'] = (df_mind['LeafyGreens'] >= 6).astype(int)
df_mind['Score_OtherVegetables'] = (df_mind['OtherVegetables'] >= 1).astype(int)
df_mind['Score_Berries'] = (df_mind['Berries'] >= 2).astype(int)
df_mind['Score_Nuts'] = (df_mind['Nuts'] >= 5).astype(int)
df_mind['Score_WholeGrains'] = (df_mind['WholeGrains'] >= 3).astype(int)
df_mind['Score_Fish'] = (df_mind['Fish'] >= 1).astype(int)
df_mind['Score_Beans'] = (df_mind['Beans'] >= 3).astype(int)
df_mind['Score_Poultry'] = (df_mind['Poultry'] >= 2).astype(int)
df_mind['Score_OliveOil'] = (df_mind['OliveOil'] > 0).astype(int)
df_mind['Score_Wine'] = ((df_mind['Wine'] >= 1) & (df_mind['Wine'] <= 7)).astype(int)

# Reverse scores for foods to limit
df_mind['Score_RedMeat'] = (df_mind['RedMeat'] < 4).astype(int)
df_mind['Score_Butter'] = (df_mind['Butter'] < 1).astype(int)
df_mind['Score_Cheese'] = (df_mind['Cheese'] < 1).astype(int)
df_mind['Score_Pastries'] = (df_mind['Pastries'] < 5).astype(int)
df_mind['Score_FriedFastFood'] = (df_mind['FriedFastFood'] < 1).astype(int)

# Final MIND Diet score (0–15)
score_cols = [col for col in df_mind.columns if col.startswith('Score_')]
df_mind['MIND_Diet_Score'] = df_mind[score_cols].sum(axis=1)

# Optional: keep relevant columns
final_cols = ['HHID', 'PN', 'AGE', 'GENDER', 'RACE', 'MIND_Diet_Score']
df_result = df_mind[final_cols]

# Preview
print(df_result.head())

['HHID', 'PN', 'QNR13', 'HNA1_13', 'HNA2_13', 'HNA3_13', 'HNA4M1_13', 'HNA4M2_13', 'HNA4M3_13', 'HNA4M4_13', 'HNA4M5_13', 'HNA5_13', 'HNA6_13', 'HNA7_13', 'HNA8_13', 'HNA9M1_13', 'HNA9M2_13', 'HNA9M3_13', 'HNA9M4_13', 'HNA9M5_13', 'HNA9M6_13', 'HNA9M7_13', 'HNA9M8_13', 'HNA9M9_13', 'HNA9M10_13', 'HNA9M11_13', 'HNA10_13', 'HNA11_13', 'HNA12_13', 'HNA13_13', 'HNB1_13', 'HNB2_13', 'HNB3_13', 'HNB4_13', 'HNB5_13', 'HNB6M1_13', 'HNB6M2_13', 'HNB6M3_13', 'HNB6M4_13', 'HNB6M5_13', 'HNB6M6_13', 'HNB7_13', 'HNB8_13', 'HNB9_13', 'HNB10_13', 'HNB11_13', 'HNB12_13', 'HNB13_13', 'HNB14_13', 'HNB15_13', 'HNB16_13', 'HNB17_13', 'HNB18_13', 'HNB19_13', 'HNC1_13', 'HNC1A_13', 'HNC1B_13', 'HNC2M1_13', 'HNC2M2_13', 'HNC2M3_13', 'HNC2M4_13', 'HNC2M5_13', 'HNC2M6_13', 'HNC2M7_13', 'HNC2M8_13', 'HNC2M9_13', 'HNC2M10_13', 'HNC2M11_13', 'HNC2M12_13', 'HNC2AM1_13', 'HNC2AM2_13', 'HNC2AM3_13', 'HNC2AM4_13', 'HNC2AM5_13', 'HNC2AM6_13', 'HNC2AM7_13', 'HNC2AM8_13', 'HNC2AM9_13', 'HNC2AM10_13', 'HNC2AM11_13', 'HNC2

In [None]:
mind_dictionary = {
    # Identifiers & Demographics
    'HHID': 'Household Identifier',
    'PN': 'Person Number within Household',
    'QNR13': 'Questionnaire ID',
    'AGE': 'Age of Respondent',
    'GENDER': 'Gender of Respondent',
    'RACE': 'Race of Respondent',
    'EDYRS': 'Years of Education',
    'BMI': 'Body Mass Index',
    'HCNSWGTR': 'Survey Weight',
    'HCNSWGTR_NT': 'Non-Telephone Survey Weight',
    'HCNSVERSION': 'Survey Version',

    # Brain-Healthy Food Groups
    'C3A_FF_13': 'Leafy green vegetables (spinach, kale, etc.)',
    'C3B_FF_13': 'Other vegetables',
    'C4A_FF_13': 'Berries - strawberries',
    'C4B_FF_13': 'Berries - blueberries',
    'C5A_FF_13': 'Nuts - peanuts',
    'C5B_FF_13': 'Nuts - almonds',
    'C5C_FF_13': 'Nuts - walnuts',
    'C5D_FF_13': 'Nuts - other',
    'C6A_FF_13': 'Whole grains - brown rice',
    'C6B_FF_13': 'Whole grains - whole wheat bread',
    'C6C_FF_13': 'Whole grains - oatmeal',
    'C6D_FF_13': 'Whole grains - cereal',
    'C7A_FF_13': 'Fish - non-fried',
    'C7B_FF_13': 'Fish - other',
    'C8A_FF_13': 'Beans - pinto',
    'C8B_FF_13': 'Beans - black',
    'C8C_FF_13': 'Beans - lentils',
    'C7E_FF_13': 'Poultry - chicken',
    'C7F_FF_13': 'Poultry - turkey',
    'C9A_FF_13': 'Olive oil use (primary oil)',
    'C9B_FF_13': 'Wine (red or white)',

    # Foods to Limit
    'C9C_FF_13': 'Red meat - beef',
    'C9D_FF_13': 'Red meat - pork',
    'C9E_FF_13': 'Red meat - processed',
    'C9F_FF_13': 'Butter or margarine',
    'C9G_FF_13': 'Cheese - regular',
    'C9H_FF_13': 'Cheese - processed',
    'C9I_FF_13': 'Pastries - cake, cookies',
    'C9J_FF_13': 'Candy or sweets',
    'C9K_FF_13': 'Other sweets - ice cream',
    'C9L_FF_13': 'Fried food - french fries',
    'C9M_FF_13': 'Fast food - burgers, takeout',

    # Optional extra supporting items
    'ALCO_SUM': 'Total alcohol intake (g/day)',
    'CALOR_SUM': 'Total daily caloric intake',
    'SATFAT_SUM': 'Total saturated fat intake',
    'SODIUM_SUM': 'Total sodium intake'
}