In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors

In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_cereal_grains_starches = df_food_scaled[df_food_scaled['food_group'] == 'cereal_grains_starches']
df_cereal_grains_starches.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
275126,"Pasta noodles, without gluten",9.282747,0.117102,36.191516
297368,Alnatura - Rote Linsen Spirelli,4.594666,0.078480,26.904351
296557,Spaghetti gelbe Linsen,4.587200,0.083617,26.939159
300573,Barilla : Spaghetti no.5 : Glutenfrei,4.236493,0.067197,21.535476
110733,Wheatgerm,3.236498,0.262335,72.424547
...,...,...,...,...
313255,"Carloni, Enjoy free! - Quinoa & Buchweizen Penne",0.042116,0.050463,4.211569
280767,Mei Yang Spring Roll Paper,0.036950,0.044540,3.695036
312120,Gut Bio - Mini Mais-Waffeln Popcorn (Vegan),0.033879,0.063457,3.387916
285311,noodles vegetable,0.028986,1.643139,2.898551


In [5]:
high_qi_cereal_grains_starches = df_cereal_grains_starches[df_cereal_grains_starches['QI'] > 2].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_cereal_grains_starches[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
275126,"Pasta noodles, without gluten",150.0,9.282747,0.117102,36.191516
297368,Alnatura - Rote Linsen Spirelli,50.0,4.594666,0.07848,26.904351
296557,Spaghetti gelbe Linsen,184.0,4.5872,0.083617,26.939159
300573,Barilla : Spaghetti no.5 : Glutenfrei,50.0,4.236493,0.067197,21.535476
110733,Wheatgerm,15.0,3.236498,0.262335,72.424547
308195,Saitaku - Shirataki noodles,80.0,2.38587,1.658508,10.0


In [7]:
ratio_cols = [i for i in high_qi_cereal_grains_starches.columns if '_ratio_scaled' in i]

df_corr = high_qi_cereal_grains_starches[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

protein_eaten_ratio_scaled                                  -0.645022
sugar_eaten_ratio_scaled                                    -0.615259
fat_eaten_ratio_scaled                                      -0.579919
fiber_eaten_ratio_scaled                                    -0.507670
fatty_acids_saturated_eaten_ratio_scaled                    -0.487101
salt_eaten_ratio_scaled                                     -0.477811
beta_carotene_eaten_ratio_scaled                            -0.303281
vitamin_e_activity_eaten_ratio_scaled                       -0.303281
vitamin_b2_eaten_ratio_scaled                               -0.303281
vitamin_b1_eaten_ratio_scaled                               -0.275525
zinc_eaten_ratio_scaled                                     -0.259552
potassium_eaten_ratio_scaled                                -0.250691
iron_eaten_ratio_scaled                                     -0.196498
phosphorus_eaten_ratio_scaled                               -0.195114
folate_eaten_ratio_s

In [8]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_cereal_grains_starches, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_cereal_grains_starches, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [9]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- vitamin b6
- fiber
- protein

In [10]:
exclude_list = ['fiber_eaten', 'vitamin_b6_eaten', 'protein_eaten']
qualifying_nutrients = list(nutrient_info.keys())

for idx, row in high_qi_cereal_grains_starches.iterrows():

    qi_incl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=[])
    qi_excl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    nb_incl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=[]) 
    nb_excl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    print(f"Food item : {row['combined_name']}")
    print(f"QI (incl. all): {qi_incl}")
    print(f"QI (excl. {', '.join(exclude_list)}): {qi_excl}")
    print(f"NB (incl. all): {nb_incl:.2f}")
    print(f"NB (excl. {', '.join(exclude_list)}): {nb_excl:.2f}")
    print("-----------")

Food item : Pasta noodles, without gluten
QI (incl. all): 7.454302070660035
QI (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 0.25966644759717084
NB (incl. all): 31.76
NB (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 25.57
-----------
Food item : Alnatura - Rote Linsen Spirelli
QI (incl. all): 3.6945679476944284
QI (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 0.12962508888291718
NB (incl. all): 23.41
NB (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 12.96
-----------
Food item : Spaghetti gelbe Linsen
QI (incl. all): 3.689828377474806
QI (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 0.1313423602311917
NB (incl. all): 23.56
NB (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 13.13
-----------
Food item : Barilla : Spaghetti no.5 : Glutenfrei
QI (incl. all): 3.40532140572286
QI (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 0.12172470118273776
NB (incl. all): 18.84
NB (excl. fiber_eaten, vitamin_b6_eaten, protein_eaten): 12.17
----------