In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors

In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_sweets_desserts = df_food_scaled[df_food_scaled['food_group'] == 'sweets_desserts']
df_sweets_desserts.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
310392,Flying powder (Sugarfree),111.440781,0.000000,20.000000
294423,Gerblé - Preparation without added sugars (str...,101.135828,0.134211,48.624207
298382,Yogos honey,35.602400,1.172000,54.136598
303176,Yogos with Honey,35.589892,1.172000,53.603543
297359,"Excellence - Linzentorte, extra cremig",35.353995,1.096179,54.098106
...,...,...,...,...
316815,Biscotto - Butter Karamell Waffeln,0.000000,0.000000,0.000000
312364,MClassic - Biscuits Japonais,0.000000,0.899636,0.000000
312764,Frey - Duett Noir & Lait café,0.000000,0.000000,0.000000
311979,Ricolas - Bonbons (Citron Mélisse),0.000000,0.000000,0.000000


In [5]:
high_qi_sweets_desserts = df_sweets_desserts[df_sweets_desserts['QI'] > 1.5].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_sweets_desserts[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
310392,Flying powder (Sugarfree),2.5,111.440781,0.0,20.0
294423,Gerblé - Preparation without added sugars (str...,20.0,101.135828,0.134211,48.624207
298382,Yogos honey,180.0,35.6024,1.172,54.136598
303176,Yogos with Honey,20.0,35.589892,1.172,53.603543
297359,"Excellence - Linzentorte, extra cremig",150.0,35.353995,1.096179,54.098106
290122,Yogos fig,180.0,34.894035,1.148402,55.194251
272310,"Biscuit tile, crisp (sweet)",10.0,34.463908,0.85203,44.792348
269882,Apple turnover,50.0,28.866701,1.076126,42.790425
259975,Cheesecake,120.0,22.398516,1.351955,37.341901
293430,Excellence - Kirsche mit Rahm,150.0,21.040637,0.681008,44.594824


In [7]:
ratio_cols = [i for i in high_qi_sweets_desserts.columns if '_ratio_scaled' in i]

df_corr = high_qi_sweets_desserts[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

sodium_eaten_ratio_scaled                                   -0.260063
magnesium_eaten_ratio_scaled                                -0.234277
salt_eaten_ratio_scaled                                     -0.195274
potassium_eaten_ratio_scaled                                -0.190826
phosphorus_eaten_ratio_scaled                               -0.189720
sugar_eaten_ratio_scaled                                    -0.189600
vitamin_d_eaten_ratio_scaled                                -0.185815
all_trans_retinol_equivalents_activity_eaten_ratio_scaled   -0.181797
protein_eaten_ratio_scaled                                  -0.170561
vitamin_e_activity_eaten_ratio_scaled                       -0.151219
vitamin_c_eaten_ratio_scaled                                -0.140635
beta_carotene_eaten_ratio_scaled                            -0.133036
iron_eaten_ratio_scaled                                     -0.123387
folate_eaten_ratio_scaled                                   -0.108434
calcium_eaten_ratio_

In [8]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_sweets_desserts, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_sweets_desserts, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [10]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- niacin
- pantothenic acid
- vitamin b2
- vitamin b1
- vitamin b12
- vitamin b6
- phosphorus