In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors



In [2]:
print(os.getcwd())


c:\Users\Sophie\Desktop\EPFL\Bachelor project\Bachelor-Project\food groups


In [3]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [4]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [5]:
df_yogurt_fresh_dairy = df_food_scaled[df_food_scaled['food_group'] == 'yogurt_fresh_dairy']
df_yogurt_fresh_dairy.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
284621,Migros - Oh! Yogurt greek style (Nature),91.427047,0.607057,63.380585
294398,Isey skyr pommes cuites,89.839474,0.598977,63.236092
294326,Icelandic Skyr - The Original (Lemon Cheesecake),86.855694,0.583826,62.961556
303882,YoQua Moka,84.018835,0.556391,62.704731
271962,"Greek Yaourt, yahourt, yogourt ou yoghourt",75.223594,1.070352,61.913073
...,...,...,...,...
308758,"Abbot Kinney's - Coco start, coconut based yog...",0.000000,0.000000,0.000000
311905,"Nestlé - Hirz, choco",0.000000,0.000000,0.000000
313704,Milco - Yogurt à la crème de la Gruyère sur li...,0.000000,0.182708,0.000000
315265,Coop Naturaplan - Bio Jogurt (Zwetschge),0.000000,0.000000,0.000000


In [6]:
high_qi_yogurt_fresh_dairy = df_yogurt_fresh_dairy[df_yogurt_fresh_dairy['QI'] > 5].drop_duplicates('combined_name').sort_values('QI', ascending=False)


In [7]:
high_qi_yogurt_fresh_dairy[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
284621,Migros - Oh! Yogurt greek style (Nature),170.0,91.427047,0.607057,63.380585
294398,Isey skyr pommes cuites,170.0,89.839474,0.598977,63.236092
294326,Icelandic Skyr - The Original (Lemon Cheesecake),170.0,86.855694,0.583826,62.961556
303882,YoQua Moka,60.0,84.018835,0.556391,62.704731
271962,"Greek Yaourt, yahourt, yogourt ou yoghourt",40.0,75.223594,1.070352,61.913073
284217,"Nestlé YOGURT Greek-style, plain, unsweetened",170.0,49.524165,1.168528,59.013746
294821,Nestlé - Le Yaourt A La Grecque,150.0,48.563941,1.453769,58.845265
267589,coop naturaplan Griechischer Jogurt Nature,150.0,46.393967,1.461046,58.249494
267627,coop naturaplan Griechischer Jogurt Nature,170.0,46.393967,1.461046,58.249494
293602,Nestlé THE YOGURT WITH GREEK NATURE sweet,150.0,44.062586,1.208376,57.491045


In [8]:
ratio_cols = [i for i in high_qi_yogurt_fresh_dairy.columns if '_ratio_scaled' in i]

df_corr = high_qi_yogurt_fresh_dairy[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

fat_eaten_ratio_scaled                                      -0.792821
fatty_acids_saturated_eaten_ratio_scaled                    -0.728951
fiber_eaten_ratio_scaled                                    -0.303561
sugar_eaten_ratio_scaled                                    -0.098691
salt_eaten_ratio_scaled                                      0.565620
protein_eaten_ratio_scaled                                   0.940160
vitamin_b2_eaten_ratio_scaled                                0.999998
phosphorus_eaten_ratio_scaled                                0.999998
vitamin_b1_eaten_ratio_scaled                                0.999998
pantothenic_acid_eaten_ratio_scaled                          0.999998
cholesterol_eaten_ratio_scaled                               0.999998
folate_eaten_ratio_scaled                                    0.999998
iron_eaten_ratio_scaled                                      0.999998
potassium_eaten_ratio_scaled                                 0.999998
vitamin_b6_eaten_rat

In [9]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_yogurt_fresh_dairy, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_yogurt_fresh_dairy, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [10]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- vitamin b1
- vitamin b6
- folate

In [11]:
exclude_list = ['vitamin_b1_eaten', 'vitamin_b6_eaten', 'folate_eaten']
qualifying_nutrients = list(nutrient_info.keys())

for idx, row in high_qi_yogurt_fresh_dairy.iterrows():

    qi_incl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=[])
    qi_excl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    nb_incl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=[]) 
    nb_excl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    print(f"Food item : {row['combined_name']}")
    print(f"QI (incl. all): {qi_incl}")
    print(f"QI (excl. {', '.join(exclude_list)}): {qi_excl}")
    print(f"NB (incl. all): {nb_incl:.2f}")
    print(f"NB (excl. {', '.join(exclude_list)}): {nb_excl:.2f}")
    print("-----------")

Food item : Migros - Oh! Yogurt greek style (Nature)
QI (incl. all): 73.28733089671181
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 2.0799679913458458
NB (incl. all): 62.05
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 56.87
-----------
Food item : Isey skyr pommes cuites
QI (incl. all): 72.01533366812707
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 2.0345353856720934
NB (incl. all): 61.82
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 56.61
-----------
Food item : Icelandic Skyr - The Original (Lemon Cheesecake)
QI (incl. all): 69.62467375358564
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 1.977912093721056
NB (incl. all): 61.71
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 56.49
-----------
Food item : YoQua Moka
QI (incl. all): 67.3486015287152
QI (excl. vitamin_b1_eaten, vitamin_b6_eaten, folate_eaten): 1.8798868789163183
NB (incl. all): 61.32
NB (excl. vitamin_b1_eaten, vitamin_b6_eaten, fo