In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors

In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_processed_foods = df_food_scaled[df_food_scaled['food_group'] == 'processed_foods']
df_processed_foods.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
290599,MClassic - Saladbowl Chicken,254.631479,1.645896,85.009150
291464,Betty Bossi - Chicken Caesar Salad (& caesar d...,147.293505,1.478695,78.709698
261242,"Chicken caesar salad (green salad, cheese, cro...",143.393908,1.126360,80.829971
256693,Bami goreng,86.332573,1.083334,72.360804
297433,Tandoori Wrap,64.399538,1.224223,71.323872
...,...,...,...,...
299379,Migros Daily - Salatschale gemischt mit Balsam...,0.007489,0.985452,0.748878
316141,Migros V-Love - Plant-based Schnitzel,0.000000,0.000000,0.000000
316159,Pierre Baguette - Wrap Caesar,0.000000,0.000000,0.000000
310824,Nissin Demae Ramen Beef,0.000000,0.330351,0.000000


In [5]:
high_qi_processed_foods = df_processed_foods[df_processed_foods['QI'] > 2].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_processed_foods[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
290599,MClassic - Saladbowl Chicken,250.0,254.631479,1.645896,85.00915
291464,Betty Bossi - Chicken Caesar Salad (& caesar d...,260.0,147.293505,1.478695,78.709698
261242,"Chicken caesar salad (green salad, cheese, cro...",200.0,143.393908,1.12636,80.829971
256693,Bami goreng,400.0,86.332573,1.083334,72.360804
297433,Tandoori Wrap,190.0,64.399538,1.224223,71.323872
294058,COOP Spicy Chicken Avocado,235.0,63.639363,1.244381,70.529591
301176,migrolino - Avocado & Kalbsspeck Chicken Wrap,200.0,57.34259,1.231895,67.389236
281830,Chicken Ketchup Sandwich,215.0,51.153458,1.023575,62.918973
281347,Chicken Crispy Sandwich,230.0,47.803951,1.105742,61.095693
290384,"Coop, Betty Bossi - Sandwich Poulet tartare",235.0,44.197996,1.07028,59.97149


In [7]:
ratio_cols = [i for i in high_qi_processed_foods.columns if '_ratio_scaled' in i]

df_corr = high_qi_processed_foods[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

vitamin_d_eaten_ratio_scaled                                -0.246694
all_trans_retinol_equivalents_activity_eaten_ratio_scaled   -0.238711
vitamin_c_eaten_ratio_scaled                                -0.186693
iron_eaten_ratio_scaled                                     -0.174035
zinc_eaten_ratio_scaled                                     -0.171968
pantothenic_acid_eaten_ratio_scaled                         -0.124503
magnesium_eaten_ratio_scaled                                -0.113676
vitamin_b12_eaten_ratio_scaled                              -0.097083
potassium_eaten_ratio_scaled                                -0.096146
vitamin_e_activity_eaten_ratio_scaled                       -0.082748
cholesterol_eaten_ratio_scaled                              -0.072221
salt_eaten_ratio_scaled                                     -0.068826
fiber_eaten_ratio_scaled                                    -0.064617
phosphorus_eaten_ratio_scaled                               -0.057383
protein_eaten_ratio_

In [8]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_processed_foods, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_processed_foods, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [9]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- vitamin b2
- vitamin b1
- vitamin b6


In [10]:
exclude_list = ['vitamin_b1_eaten', 'vitamin_b6_eaten', 'vitamin_b2_eaten']
qualifying_nutrients = list(nutrient_info.keys())

for idx, row in high_qi_processed_foods.iterrows():

    qi_incl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=[])
    qi_excl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    nb_incl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=[]) 
    nb_excl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    print(f"Food item : {row['combined_name']}")
    print(f"QI (incl. all): {qi_incl}")
    print(f"QI (excl. {', '.join(exclude_list)}): {qi_excl}")
    print(f"NB (incl. all): {nb_incl:.2f}")
    print(f"NB (excl. {', '.join(exclude_list)}): {nb_excl:.2f}")
    print("-----------")

Food item : MClassic - Saladbowl Chicken
QI (incl. all): 203.25717289336825
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 1.9624300249514324
NB (incl. all): 83.70
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 81.47
-----------
Food item : Betty Bossi - Chicken Caesar Salad (& caesar dressing)
QI (incl. all): 117.70222736532166
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 1.330840233707126
NB (incl. all): 79.62
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 76.84
-----------
Food item : Chicken caesar salad (green salad, cheese, croutons, sauce)
QI (incl. all): 114.51090197950856
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 1.2121934740808353
NB (incl. all): 77.39
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 74.31
-----------
Food item : Bami goreng
QI (incl. all): 68.90442175019095
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, vitamin_b2_eaten): 1.017362260899655
NB (incl