In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output, display
import sys
sys.path.append('../scripts') 
from functions import *
from config import nutrient_info, conversion_factors

In [2]:
df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')

  df_food_scaled = pd.read_csv('../data/df_food_scaled.csv')


In [3]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [4]:
df_sweetened_beverages = df_food_scaled[df_food_scaled['food_group'] == 'sweetened_beverages']
df_sweetened_beverages.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
301657,Coca-Cola Energy,209.000523,15.079365,20.0
317152,Red Bull Zero Calories,185.734636,1.851852,20.0
277171,OK zero,167.161172,7.777778,20.0
310452,Denner - E-Zero,167.161172,5.688889,20.0
301207,Monster Energy - Ultra citron,155.209707,5.555556,20.0
...,...,...,...,...
316775,"Enertea by Rivella - Guayusa, Herbes des Alpes",0.000000,0.571429,0.0
316740,M Budget - Energy Drink,0.000000,0.666667,0.0
317676,Vita Malz - Das Original Alkoholfrei,0.000000,0.501587,0.0
317694,"Nastea Energy - Tea, Guarana (Green apple & pe...",0.000000,0.634722,0.0


In [5]:
high_qi_sweetened_beverages = df_sweetened_beverages[df_sweetened_beverages['QI'] > 2].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [6]:
high_qi_sweetened_beverages[['combined_name', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,QI,DI,NB
301657,Coca-Cola Energy,209.000523,15.079365,20.000000
317152,Red Bull Zero Calories,185.734636,1.851852,20.000000
277171,OK zero,167.161172,7.777778,20.000000
310452,Denner - E-Zero,167.161172,5.688889,20.000000
301207,Monster Energy - Ultra citron,155.209707,5.555556,20.000000
...,...,...,...,...
289527,Caotina original,2.356001,0.707224,72.039811
279433,Nestle Nesquik,2.194968,0.478740,59.450531
293541,Capri-sun multi-vitamin,2.185182,0.762916,71.414942
308734,original CAOTINA SWISS CHOCOLATE PURE SENSATION,2.091287,0.705439,52.794233


In [7]:
ratio_cols = [i for i in high_qi_sweetened_beverages.columns if '_ratio_scaled' in i]

df_corr = high_qi_sweetened_beverages[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

sugar_eaten_ratio_scaled                                    -0.298810
vitamin_e_activity_eaten_ratio_scaled                       -0.177651
fatty_acids_saturated_eaten_ratio_scaled                    -0.164504
vitamin_b2_eaten_ratio_scaled                               -0.154554
vitamin_b1_eaten_ratio_scaled                               -0.144916
phosphorus_eaten_ratio_scaled                               -0.135109
calcium_eaten_ratio_scaled                                  -0.133451
vitamin_c_eaten_ratio_scaled                                -0.130853
magnesium_eaten_ratio_scaled                                -0.125957
zinc_eaten_ratio_scaled                                     -0.100206
folate_eaten_ratio_scaled                                   -0.096320
beta_carotene_eaten_ratio_scaled                            -0.092900
potassium_eaten_ratio_scaled                                -0.088669
iron_eaten_ratio_scaled                                     -0.085730
fiber_eaten_ratio_sc

In [8]:
high_di_sweetened_beverages = df_sweetened_beverages[df_sweetened_beverages['DI'] > 2].drop_duplicates('combined_name').sort_values('DI', ascending=False)

In [9]:
high_di_sweetened_beverages[['combined_name', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,QI,DI,NB
308540,Bundaberg - Passionfruit sparkling drink,0.0,50.093075,0.0
301657,Coca-Cola Energy,209.000523,15.079365,20.0
314664,Capri-sun Mango & Maracuja,0.543478,13.077778,5.0
294175,Sinalco - Cola Zero,3.623188,11.613248,5.0
299977,Munster - Ultra zero sucre,112.161172,9.722222,20.0
312075,"PowerBar - 5 Electrolytes (mango, passionfruit)",1.797321,8.477551,10.0
277171,OK zero,167.161172,7.777778,20.0
309121,Dame Gingembre - Ginger shot (citron),0.217391,7.764724,5.0
312252,7Up Free Zero Sugar,0.543478,6.944444,5.0
312179,"7Up free, zero sugar",0.543478,6.944444,5.0


In [10]:
ratio_cols = [i for i in high_di_sweetened_beverages.columns if '_ratio_scaled' in i]

df_corr = high_di_sweetened_beverages[ratio_cols + ['DI']]
corr = df_corr.corr()['DI']
corr.sort_values(ascending=True)

salt_eaten_ratio_scaled                                     -0.165700
phosphorus_eaten_ratio_scaled                               -0.138077
potassium_eaten_ratio_scaled                                -0.135205
fiber_eaten_ratio_scaled                                    -0.109745
folate_eaten_ratio_scaled                                   -0.083073
vitamin_b2_eaten_ratio_scaled                               -0.061941
iron_eaten_ratio_scaled                                     -0.046141
zinc_eaten_ratio_scaled                                     -0.038907
vitamin_e_activity_eaten_ratio_scaled                       -0.038907
vitamin_b1_eaten_ratio_scaled                               -0.038907
vitamin_c_eaten_ratio_scaled                                -0.038828
magnesium_eaten_ratio_scaled                                -0.017046
vitamin_b6_eaten_ratio_scaled                                0.024397
calcium_eaten_ratio_scaled                                   0.033144
niacin_eaten_ratio_s

In [11]:
def interactive_exclude_nutrient(nutrient):
    clear_output(wait=True)
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_sweetened_beverages, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_sweetened_beverages, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])
    df_di = compare_di_excluding_nutrient(
    df= high_di_sweetened_beverages, 
    nutrient_to_exclude=nutrient,
    disqualifying_nutrients=disqualifying_nutrients,  # make sure this list is defined
    scaling_factor=2000
    )
    display(df_di[['combined_name', 'DI', f"DI_excl_{nutrient}", 'DI_diff', 'DI_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten',
                   'fat_eaten',
                   'fatty_acids_saturated_eaten',
                   'cholesterol_eaten',
                   'sugar_eaten',
                   'sodium_eaten',
                   'salt_eaten'
                   ]

In [12]:
interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>