In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors

In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_vegetable_fruit_juice = df_food_scaled[df_food_scaled['food_group'] == 'vegetable_fruit_juices']
df_vegetable_fruit_juice.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
293872,COOP NATURAPLAN BIO Biotta Gemüse-Cocktail,30.366425,1.928472,84.166667
298929,Migros ORGANIC Vegetable Cocktail,29.095072,2.520723,84.000000
300997,Jus de curcuma bio,22.297306,0.285589,77.944269
195496,"Juice, vegetable",22.078085,0.716485,81.429513
302968,Alnatura beetroot juice,20.217955,0.807460,81.250000
...,...,...,...,...
316659,Innocent Direktsaft Apfel & Mango,0.000000,0.521739,0.000000
316978,Coop Naturaplan Bio - Zitronensaft,0.000000,0.185507,0.000000
316889,Rabenhorst - pomegranate,0.000000,0.612022,0.000000
317505,demeter - Karottensaft (Rodelika),0.000000,0.696477,0.000000


In [5]:
high_qi_vegetable_fruit_juices = df_vegetable_fruit_juice[df_vegetable_fruit_juice['QI'] > 3].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_vegetable_fruit_juices[['combined_name', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,QI,DI,NB
293872,COOP NATURAPLAN BIO Biotta Gemüse-Cocktail,30.366425,1.928472,84.166667
298929,Migros ORGANIC Vegetable Cocktail,29.095072,2.520723,84.0
300997,Jus de curcuma bio,22.297306,0.285589,77.944269
195496,"Juice, vegetable",22.078085,0.716485,81.429513
302968,Alnatura beetroot juice,20.217955,0.80746,81.25
305192,coop naturaplan Randensaft,17.698268,0.655962,75.487805
292291,Pfanner Bio - Organic beetroot,16.958814,0.8233,80.232558
292435,"Juicy: pear, orange, banana, spinach, celeriac",10.360972,0.460064,72.547653
303801,Coop Naturaplan Sweet Mild Orange Mild,9.831389,0.585366,12.529162
294697,Hohes C - Milder Multivitamin 100% Saft,8.248337,0.676611,74.824992


In [7]:
ratio_cols = [i for i in high_qi_vegetable_fruit_juices.columns if '_ratio_scaled' in i]

df_corr = high_qi_vegetable_fruit_juices[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

vitamin_b1_eaten_ratio_scaled                               -0.446987
sugar_eaten_ratio_scaled                                    -0.424316
niacin_eaten_ratio_scaled                                   -0.405232
vitamin_b2_eaten_ratio_scaled                               -0.364504
vitamin_e_activity_eaten_ratio_scaled                       -0.229465
vitamin_b12_eaten_ratio_scaled                              -0.183108
vitamin_c_eaten_ratio_scaled                                -0.163494
vitamin_d_eaten_ratio_scaled                                -0.109932
vitamin_b6_eaten_ratio_scaled                               -0.080208
folate_eaten_ratio_scaled                                   -0.068749
sodium_eaten_ratio_scaled                                   -0.006292
pantothenic_acid_eaten_ratio_scaled                          0.121522
fat_eaten_ratio_scaled                                       0.234113
zinc_eaten_ratio_scaled                                      0.256409
salt_eaten_ratio_sca

In [8]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_vegetable_fruit_juices, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_vegetable_fruit_juices, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [9]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- beta carotene
- vitamin c
- vitamin e

In [10]:
exclude_list = ['beta_carotene_eaten', 'vitamin_c_eaten', 'vitamin_e_eaten']
qualifying_nutrients = list(nutrient_info.keys())

for idx, row in high_qi_vegetable_fruit_juices.iterrows():

    qi_incl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=[])
    qi_excl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    nb_incl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=[]) 
    nb_excl = compute_nb_excluding_multiple(row, qualifying_nutrients, exclude_list=exclude_list)

    print(f"Food item : {row['combined_name']}")
    print(f"QI (incl. all): {qi_incl}")
    print(f"QI (excl. {', '.join(exclude_list)}): {qi_excl}")
    print(f"NB (incl. all): {nb_incl:.2f}")
    print(f"NB (excl. {', '.join(exclude_list)}): {nb_excl:.2f}")
    print("-----------")

Food item : COOP NATURAPLAN BIO Biotta Gemüse-Cocktail
QI (incl. all): 2.946449599934103
QI (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 2.7451442591906163
NB (incl. all): 75.33
NB (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 74.31
-----------
Food item : Migros ORGANIC Vegetable Cocktail
QI (incl. all): 2.943888520771037
QI (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 2.7554394313587185
NB (incl. all): 81.25
NB (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 80.47
-----------
Food item : Jus de curcuma bio
QI (incl. all): 17.90638665041428
QI (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 18.62795051759549
NB (incl. all): 69.21
NB (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 69.64
-----------
Food item : Juice, vegetable
QI (incl. all): 1.9729527714395252
QI (excl. beta_carotene_eaten, vitamin_c_eaten, vitamin_e_eaten): 1.8194689012259364
NB (incl. all): 72.99
NB (excl. beta_carotene

In [11]:
df_vegetable_fruit_juice[df_vegetable_fruit_juice['combined_name'] == 'Jus de curcuma bio'].drop_duplicates('combined_name')[[i for i in df_vegetable_fruit_juice.columns if '_ratio_scaled' in i]].T

Unnamed: 0,300997
folate_eaten_ratio_scaled,3.154574
niacin_eaten_ratio_scaled,6.083822
pantothenic_acid_eaten_ratio_scaled,6.81388
vitamin_b2_eaten_ratio_scaled,8.603384
vitamin_b1_eaten_ratio_scaled,3.049422
all_trans_retinol_equivalents_activity_eaten_ratio_scaled,0.0
beta_carotene_eaten_ratio_scaled,0.0
vitamin_b12_eaten_ratio_scaled,0.0
vitamin_b6_eaten_ratio_scaled,5.33851
vitamin_c_eaten_ratio_scaled,0.588854


In [12]:
row = df_vegetable_fruit_juice[df_vegetable_fruit_juice['combined_name'] == 'Jus de curcuma bio'].iloc[0]

exclude_list = ['iron_eaten', 'fiber_eaten']
qualifying_nutrients = list(nutrient_info.keys())

qi_excl = compute_qi_excluding_multiple(row, qualifying_nutrients, exclude_list, scaling_factor=2000)
print(f"QI excluding {exclude_list}: {qi_excl:.2f}")

QI excluding ['iron_eaten', 'fiber_eaten']: 8.59
