In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations
from ipywidgets import interact
import ipywidgets as widgets

In [3]:
df_food_scaled = pd.read_csv('df_food_scaled.csv')

  df_food_scaled = pd.read_csv('df_food_scaled.csv')


In [4]:
nutrient_info = {
    'folate_eaten':                                         {'unit': 'mcg', 'target': 400,  'type': 'qualifying'},
    'niacin_eaten':                                         {'unit': 'mg',  'target': 14,   'type': 'qualifying'},
    'pantothenic_acid_eaten':                               {'unit': 'mg',  'target': 5,    'type': 'qualifying'},
    'vitamin_b2_eaten':                                     {'unit': 'mg',  'target': 1.1,  'type': 'qualifying'},
    'vitamin_b1_eaten':                                     {'unit': 'mg',  'target': 1.2,  'type': 'qualifying'},
    'all_trans_retinol_equivalents_activity_eaten':         {'unit': 'IU',  'target': 700,  'type': 'qualifying'},
    'beta_carotene_eaten':                                  {'unit' : 'mcg','target' : 700, 'type' :'qualifying'},
    'vitamin_b12_eaten':                                    {'unit': 'mcg', 'target': 2.4,  'type': 'qualifying'},
    'vitamin_b6_eaten':                                     {'unit': 'mg',  'target': 1.3,  'type': 'qualifying'},
    'vitamin_c_eaten':                                      {'unit': 'mg',  'target': 75,   'type': 'qualifying'},
    'vitamin_d_eaten':                                      {'unit': 'mcg', 'target': 7.5,  'type': 'qualifying'},
    'vitamin_e_activity_eaten':                             {'unit': 'TAE', 'target': 15,    'type': 'qualifying'},
    'calcium_eaten':                                        {'unit': 'mg',  'target': 1000, 'type': 'qualifying'},
    'iron_eaten':                                           {'unit': 'mg',  'target': 18,   'type': 'qualifying'},
    'magnesium_eaten':                                      {'unit': 'mg',  'target': 320,  'type': 'qualifying'},
    'phosphorus_eaten':                                     {'unit': 'mg',  'target': 700,  'type': 'qualifying'},
    'potassium_eaten':                                      {'unit': 'mg',  'target': 4700, 'type': 'qualifying'},
    'zinc_eaten':                                           {'unit': 'mg',  'target': 8,    'type': 'qualifying'},
    'fiber_eaten':                                          {'unit': 'g',   'target': 25,   'type': 'qualifying'},
    'protein_eaten':                                        {'unit': 'g',   'target': 46,   'type': 'qualifying'},
    'fat_eaten':                                            {'unit': 'g',   'target': 78,   'type': 'disqualifying'},
    'fatty_acids_saturated_eaten':                          {'unit': 'g',   'target': 22,   'type': 'disqualifying'},
    'cholesterol_eaten':                                    {'unit': 'mg',  'target': 300,  'type': 'disqualifying'},
    'sugar_eaten':                                          {'unit': 'g',   'target': 125,  'type': 'disqualifying'},
    'sodium_eaten':                                         {'unit': 'mg',  'target': 2400, 'type': 'disqualifying'},
    'salt_eaten' :                                          {'unit' : 'g',  'target': 6,   'type': 'disqualifying'}
}


conversion_factors = {
    "mg": 1000,
    "g": 1,
    "mcg": 1000000,
    "IU": 1,
    "TAE": 1,
}

In [5]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}

In [6]:
df_processed_meats = df_food_scaled[df_food_scaled['food_group'] == 'processed_meats']
df_processed_meats.drop_duplicates('combined_name')[['combined_name', 'QI', 'DI', 'NB']].sort_values('QI', ascending=False)

Unnamed: 0,combined_name,QI,DI,NB
271105,Merguez,18.655773,1.723488,62.564156
298449,Naturafarm Spread liver sausage,17.065704,2.858173,71.697812
281808,Beef product cooked in jelly,6.467694,2.893778,77.733333
294537,Original Le Parfait,3.929853,2.633597,74.625165
273748,Duck confit,3.904066,3.492710,57.028149
...,...,...,...,...
308967,Le Patron Goose liver mousse,0.072464,1.062330,5.000000
276739,Madrange - Mousse de canard au Porto,0.054556,1.263369,5.455571
311105,Yolo - Cold meat slices (nature),0.035723,0.315889,3.572268
108291,Parfait,0.033811,1.084117,3.381103


In [7]:
high_qi_processed_meats = df_processed_meats[df_processed_meats['QI'] > 3].drop_duplicates('combined_name').sort_values('QI', ascending=False)
high_qi_processed_meats[['combined_name', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,QI,DI,NB
271105,Merguez,18.655773,1.723488,62.564156
298449,Naturafarm Spread liver sausage,17.065704,2.858173,71.697812
281808,Beef product cooked in jelly,6.467694,2.893778,77.733333
294537,Original Le Parfait,3.929853,2.633597,74.625165
273748,Duck confit,3.904066,3.49271,57.028149
152582,"Ham, back, pieces put together",3.588889,3.21512,61.991758
293132,Fleury Michon - Le rôti de porc cuit,3.53509,2.029512,65.172573
300624,Coop naturafarm Hinterschinken,3.520721,2.838928,63.078609
151636,Ham,3.438286,3.047861,63.002396
294038,Herta - Le Bon Paris,3.438286,3.047861,63.002396


In [8]:
ratio_cols = [i for i in high_qi_processed_meats.columns if '_ratio_scaled' in i]

df_corr = high_qi_processed_meats[ratio_cols + ['QI']]
corr = df_corr.corr()['QI']
corr.sort_values(ascending=True)

protein_eaten_ratio_scaled                                  -0.587263
phosphorus_eaten_ratio_scaled                               -0.466066
magnesium_eaten_ratio_scaled                                -0.390530
salt_eaten_ratio_scaled                                     -0.384756
sodium_eaten_ratio_scaled                                   -0.373388
potassium_eaten_ratio_scaled                                -0.364286
vitamin_c_eaten_ratio_scaled                                -0.313610
niacin_eaten_ratio_scaled                                   -0.244357
zinc_eaten_ratio_scaled                                     -0.201545
vitamin_b6_eaten_ratio_scaled                               -0.198339
fiber_eaten_ratio_scaled                                    -0.165679
calcium_eaten_ratio_scaled                                  -0.044547
sugar_eaten_ratio_scaled                                     0.031549
iron_eaten_ratio_scaled                                      0.069354
cholesterol_eaten_ra

In [9]:
def compute_index(row, nutrient_cols, scaling_factor=2000) :

    index = 0
    ratio_sum = 0

    for nutr in nutrient_cols :
        ratio_sum += row[nutr + '_ratio']

    index = (scaling_factor / row['energy_kcal_eaten']) * (ratio_sum / len(nutrient_cols))
    return index

In [10]:
def compute_qi_excluding(row, nutrient_list, exclude=None, scaling_factor=2000):
    if exclude is not None:
        new_list = [nutr for nutr in nutrient_list if nutr != exclude]
    else:
        new_list = nutrient_list
    

    return compute_index(row, new_list, scaling_factor=scaling_factor)

In [11]:
def compare_qi_excluding_nutrient(df, nutrient_to_exclude, qualifying_nutrients, new_col_name=None,scaling_factor=2000):


    if new_col_name is None:
        new_col_name = f"QI_excl_{nutrient_to_exclude}"


    df[new_col_name] = df.apply(lambda row: compute_qi_excluding(row, qualifying_nutrients, exclude=nutrient_to_exclude, scaling_factor=scaling_factor), axis=1)

 
    df_plot = df.drop_duplicates('combined_name').copy()
    df_plot = df_plot[['combined_name', 'QI', new_col_name]]

    labels = df_plot['combined_name'].tolist()
    x = np.arange(len(labels))
    width = 0.35


    plt.figure(figsize=(16, 8))
    plt.bar(x - width/2, df_plot['QI'], width, label='QI (incl. ' + nutrient_to_exclude + ')', color='skyblue')
    plt.bar(x + width/2, df_plot[new_col_name], width, label=f"QI (excl. {nutrient_to_exclude})", color='deeppink')

    plt.xlabel('Food Item')
    plt.ylabel('QI Value')
    plt.title(f"Comparison of QI with and without {nutrient_to_exclude}")
    plt.xticks(x, labels, rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.show()

    df_plot['QI_diff'] = df_plot['QI'] - df_plot[new_col_name]
    df_plot['QI_pct_change'] = (df_plot['QI_diff'] / df_plot['QI']) * 100

    return df_plot

In [12]:
def compute_nb(row, nutrient_cols, scaling_factor=2000) :
    truncated_ratios = []
    for nutr in nutrient_cols:
        ratio =   row[nutr + '_ratio_scaled']
        if ratio > 1.0 :
            ratio = 1
        truncated_ratios.append(ratio)

    nb_value =  (sum(truncated_ratios) / len(nutrient_cols)) * 100
    return nb_value

In [13]:
def compute_nb_excluding(row, nutrient_list, exclude=None, scaling_factor=2000):

    if exclude is not None:
        new_list = [nutr for nutr in nutrient_list if nutr != exclude]
    else:
        new_list = nutrient_list
    return compute_nb(row, new_list, scaling_factor=scaling_factor)


In [14]:
def compare_nb_excluding_nutrient(df, nutrient_to_exclude, qualifying_nutrients, new_col_name=None, scaling_factor=2000):
  
    if new_col_name is None:
        new_col_name = f"NB_excl_{nutrient_to_exclude}"

    df['NB'] = df.apply(lambda row: compute_nb(row, qualifying_nutrients, scaling_factor=scaling_factor), axis=1)

    df[new_col_name] = df.apply(lambda row: compute_nb_excluding(row, qualifying_nutrients, exclude=nutrient_to_exclude, scaling_factor=scaling_factor), axis=1)
    
    df_plot = df.drop_duplicates('combined_name').copy()
    df_plot = df_plot[['combined_name', 'NB', new_col_name]]
    
    labels = df_plot['combined_name'].tolist()
    x = np.arange(len(labels))
    width = 0.35
    
    plt.figure(figsize=(16, 8))
    plt.bar(x - width/2, df_plot['NB'], width, label='NB (incl. all)', color='skyblue')
    plt.bar(x + width/2, df_plot[new_col_name], width, label=f"NB (excl. {nutrient_to_exclude})", color='deeppink')
    plt.xlabel('Food Item')
    plt.ylabel('NB Value (%)')
    plt.title(f"Comparison of NB with and without {nutrient_to_exclude}")
    plt.xticks(x, labels, rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    df_plot['NB_diff'] = df_plot['NB'] - df_plot[new_col_name]
    df_plot['NB_pct_change'] = (df_plot['NB_diff'] / df_plot['NB']) * 100
    return df_plot


In [15]:
def compute_nb_excluding_multiple(row, nutrient_list, exclude_list, scaling_factor=2000):
    new_list = nutrient_list.copy()  
    for nutr in exclude_list:
        if nutr in new_list:
            new_list.remove(nutr)
    return compute_nb(row, new_list, scaling_factor=scaling_factor)

In [16]:
def interactive_exclude_nutrient(nutrient):
    df_plot = compare_qi_excluding_nutrient(
        df=high_qi_processed_meats, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )

    display(df_plot[['combined_name', 'QI', f"QI_excl_{nutrient}", 'QI_diff', 'QI_pct_change']])

    print(f"\nExcluding {nutrient} for NB:")
    df_nb = compare_nb_excluding_nutrient(
        df=high_qi_processed_meats, 
        nutrient_to_exclude=nutrient,
        qualifying_nutrients=qualifying_nutrients, 
        scaling_factor=2000
    )
    display(df_nb[['combined_name', 'NB', f"NB_excl_{nutrient}", 'NB_diff', 'NB_pct_change']])

nutrient_choice = ['folate_eaten', 
                   'niacin_eaten', 
                   'pantothenic_acid_eaten', 
                   'vitamin_b2_eaten', 
                   'vitamin_b1_eaten', 
                   'all_trans_retinol_equivalents_activity_eaten', 
                   'beta_carotene_eaten', 
                   'vitamin_b12_eaten', 
                   'vitamin_b6_eaten', 
                   'vitamin_c_eaten', 
                   'vitamin_d_eaten', 
                   'vitamin_e_activity_eaten', 
                   'calcium_eaten', 
                   'iron_eaten', 
                   'magnesium_eaten', 
                   'phosphorus_eaten', 
                   'potassium_eaten', 
                   'zinc_eaten', 
                   'fiber_eaten', 
                   'protein_eaten']

In [17]:


interact(interactive_exclude_nutrient, nutrient=widgets.Dropdown(options=nutrient_choice, description='Exclude:'))

interactive(children=(Dropdown(description='Exclude:', options=('folate_eaten', 'niacin_eaten', 'pantothenic_a…

<function __main__.interactive_exclude_nutrient(nutrient)>

- Merguez : vitamin b1
- Naturafarm Spread liver sausage : RAE


- vitamin b12
- vitamin c
- phosphorus
- zinc
- protein