In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors


In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_soups_sauces_condiments = df_food_scaled[df_food_scaled['food_group'] == 'soups_sauces_condiments']
df_soups_sauces_condiments.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
271629,Wine sauce,101.440674,2.046296,75.638337
300030,M Classic Mustard-honey sauce,58.776342,2.372016,42.969739
293026,La Costeña Salsa Verde,44.875017,22.182639,80.000000
271603,Sauce with mustard,38.269095,1.204271,35.795955
266884,Tarama,28.764989,0.884588,36.872999
...,...,...,...,...
308581,Hochstamm Obstessig,0.000000,0.000000,0.000000
315382,Coop Naturaplan Bio - Sugo alla toscana bio mi...,0.000000,1.443484,0.000000
315083,Coop Prix Garantie - Zitronensaft,0.000000,0.246914,0.000000
235291,Dips,0.000000,0.435192,0.000000


In [5]:
high_qi_soups_sauces_condiments = df_soups_sauces_condiments[df_soups_sauces_condiments['QI'] > 3].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_soups_sauces_condiments[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
271629,Wine sauce,1.0,101.440674,2.046296,75.638337
300030,M Classic Mustard-honey sauce,40.0,58.776342,2.372016,42.969739
293026,La Costeña Salsa Verde,30.0,44.875017,22.182639,80.0
271603,Sauce with mustard,1.0,38.269095,1.204271,35.795955
266884,Tarama,20.0,28.764989,0.884588,36.872999
306274,Sandro VANINI Feigensenfsauce,5.0,23.762828,1.10832,29.2232
281686,Maçarico - Piri Piri molho hot sauce,10.0,20.997048,22.256339,83.214286
252208,"Bouillon, meat",2.0,13.163585,24.75136,57.0
252376,"Bouillon, beef",2.0,13.163585,24.75136,57.0
302642,cenovis Für Salate und Suppen,7.0,12.792323,30.279594,10.540541


In [7]:
ratio_cols = [i for i in high_qi_soups_sauces_condiments.columns if '_ratio_scaled' in i]

df_corr = high_qi_soups_sauces_condiments[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

protein_eaten_ratio_scaled                                  -0.250787
fiber_eaten_ratio_scaled                                    -0.207419
sodium_eaten_ratio_scaled                                   -0.087510
folate_eaten_ratio_scaled                                   -0.033023
niacin_eaten_ratio_scaled                                   -0.004707
fatty_acids_saturated_eaten_ratio_scaled                     0.012956
cholesterol_eaten_ratio_scaled                               0.018886
calcium_eaten_ratio_scaled                                   0.030027
magnesium_eaten_ratio_scaled                                 0.031840
all_trans_retinol_equivalents_activity_eaten_ratio_scaled    0.068559
salt_eaten_ratio_scaled                                      0.092284
fat_eaten_ratio_scaled                                       0.095501
vitamin_c_eaten_ratio_scaled                                 0.107296
phosphorus_eaten_ratio_scaled                                0.108407
zinc_eaten_ratio_sca

In [8]:
high_di_soups_sauces_condiments = df_soups_sauces_condiments[df_soups_sauces_condiments['DI'] > 6].drop_duplicates('combined_name').sort_values('DI', ascending=False)

In [9]:
high_di_soups_sauces_condiments[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
311683,"A.Vogel - Herbamare, Salat & Gemüse Bio",3.0,0.108696,289.650695,5.000000
316839,Herbamare - Sel marin aux légumes et herbes ar...,3.0,0.049407,236.829201,4.940711
294030,Knorr Original Aromat Streuwürze,3.0,0.218739,45.327632,6.971831
280896,Knorr - Aromat natürlich ohne Zusatz von Glutamat,10.0,0.113947,42.907646,7.077922
274895,Knorr - Original Aromat,3.0,0.218739,41.415269,6.971831
...,...,...,...,...,...
312353,Lacroix Kalbs Fond,20.0,0.181159,6.429630,5.000000
295593,Tabasco Pepper Sauce,0.1,0.454076,6.348095,10.000000
229923,"Sauce, roast",60.0,1.075158,6.250848,68.301353
307600,Chef's Best Chili,15.0,0.276408,6.119085,10.000000


In [10]:
ratio_cols = [i for i in high_di_soups_sauces_condiments.columns if '_ratio_scaled' in i]

df_corr = high_di_soups_sauces_condiments[ratio_cols + ['DI']]
corr = df_corr.corr()['DI']
corr.sort_values(ascending=True)

fiber_eaten_ratio_scaled                                    -0.126925
protein_eaten_ratio_scaled                                  -0.082709
all_trans_retinol_equivalents_activity_eaten_ratio_scaled   -0.037911
sodium_eaten_ratio_scaled                                   -0.033897
calcium_eaten_ratio_scaled                                  -0.021578
zinc_eaten_ratio_scaled                                     -0.020915
phosphorus_eaten_ratio_scaled                               -0.019133
vitamin_b2_eaten_ratio_scaled                               -0.018910
vitamin_e_activity_eaten_ratio_scaled                       -0.017901
vitamin_b6_eaten_ratio_scaled                               -0.017232
iron_eaten_ratio_scaled                                     -0.012503
potassium_eaten_ratio_scaled                                -0.011862
fat_eaten_ratio_scaled                                      -0.010701
vitamin_c_eaten_ratio_scaled                                -0.008977
vitamin_d_eaten_rati

In [11]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_soups_sauces_condiments, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_soups_sauces_condiments, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])
    df_di = compare_di_excluding_nutrient(
    df= high_di_soups_sauces_condiments, 
    nutrient_to_exclude=nutrient,
    disqualifying_nutrients=disqualifying_nutrients,  # make sure this list is defined
    scaling_factor=2000
    )
    display(df_di[['combined_name', 'DI', f"DI_excl_{nutrient}", 'DI_diff', 'DI_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten',
                   'fat_eaten',
                   'fatty_acids_saturated_eaten',
                   'cholesterol_eaten',
                   'sugar_eaten',
                   'sodium_eaten',
                   'salt_eaten'
                   ]

In [12]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

In [13]:
for i in df_food_scaled.columns :
    print(i)

food_id
barcode
dish_id
eaten_quantity
eaten_unit
subject_key
eaten_at
eaten_at_utc_offset
media_count
food_group_cname
type
display_name_en
display_name_fr
display_name_de
fallback_food_id
standard_portion_quantity
standard_portion_unit
specific_gravity
alcohol
all_trans_retinol_equivalents_activity
beta_carotene
beta_carotene_activity
calcium
carbohydrates
chloride
cholesterol
energy_kcal
energy_kj
fat
fatty_acids_monounsaturated
fatty_acids_polyunsaturated
fatty_acids_saturated
fiber
folate
iodide
iron
magnesium
niacin
pantothenic_acid
phosphorus
potassium
protein
salt
sodium
starch
sugar
vitamin_a_activity
vitamin_b1
vitamin_b12
vitamin_b2
vitamin_b6
vitamin_c
vitamin_d
vitamin_e_activity
water
zinc
eaten_quantity_in_gram
energy_kcal_eaten
energy_kj_eaten
carb_eaten
fat_eaten
protein_eaten
fiber_eaten
alcohol_eaten
local_eaten_at
eaten_date
combined_name
all_trans_retinol_equivalents_activity_eaten
beta_carotene_eaten
calcium_eaten
cholesterol_eaten
fatty_acids_monounsaturated_eate

In [14]:
for i in qualifying_nutrients:
    print(i, df_food_scaled.drop_duplicates('combined_name')[f'{i}_ratio'].describe().loc["max"].round())

folate_eaten 2.0
niacin_eaten 4.0
pantothenic_acid_eaten 63.0
vitamin_b2_eaten 166.0
vitamin_b1_eaten 208.0
all_trans_retinol_equivalents_activity_eaten 10.0
vitamin_b12_eaten 27.0
vitamin_b6_eaten 188.0
vitamin_c_eaten 10.0
vitamin_d_eaten 3.0
vitamin_e_activity_eaten 25.0
calcium_eaten 3.0
iron_eaten 55.0
magnesium_eaten 4.0
phosphorus_eaten 2.0
potassium_eaten 2.0
zinc_eaten 3.0
fiber_eaten 2.0
protein_eaten 4.0


- folate
- niacin
- pantothenic acid
- vitamin b1
- beta carotene
- vitamin b12
- vitamin b6
- vitamin c
- magnesium


- salt
- sodium