In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from matplotlib.patches import Circle
import os
import pickle
from scipy.stats import mannwhitneyu, ttest_ind, kruskal, f_oneway
from statannotations.Annotator import Annotator
from itertools import combinations

In [2]:
df_food = pd.read_csv("df_food_filtered_with_food_group.csv",index_col=0)
with open("df_food_openfoodfacts_merged.pkl", "rb") as f:
    df_food_openfoodfacts = pickle.load(f, encoding="latin1")

  df_food = pd.read_csv("df_food_filtered_with_food_group.csv",index_col=0)


In [5]:
df_food = df_food[df_food["eaten_quantity_in_gram"] > 0]
df_food = df_food[df_food["energy_kcal_eaten"] > 0]

In [6]:
nutrient_info = {
    'folate_eaten':                                         {'unit': 'mcg', 'target': 400,  'type': 'qualifying'},
    'niacin_eaten':                                         {'unit': 'mg',  'target': 14,   'type': 'qualifying'},
    'pantothenic_acid_eaten':                               {'unit': 'mg',  'target': 5,    'type': 'qualifying'},
    'vitamin_b2_eaten':                                     {'unit': 'mg',  'target': 1.1,  'type': 'qualifying'},
    'vitamin_b1_eaten':                                     {'unit': 'mg',  'target': 1.2,  'type': 'qualifying'},
    'all_trans_retinol_equivalents_activity_eaten':         {'unit': 'IU',  'target': 700,  'type': 'qualifying'},
    'beta_carotene_eaten':                                  {'unit' : 'mcg','target' : 700, 'type' :'qualifying'},
    'vitamin_b12_eaten':                                    {'unit': 'mcg', 'target': 2.4,  'type': 'qualifying'},
    'vitamin_b6_eaten':                                     {'unit': 'mg',  'target': 1.3,  'type': 'qualifying'},
    'vitamin_c_eaten':                                      {'unit': 'mg',  'target': 75,   'type': 'qualifying'},
    'vitamin_d_eaten':                                      {'unit': 'mcg', 'target': 7.5,  'type': 'qualifying'},
    'vitamin_e_activity_eaten':                             {'unit': 'TAE', 'target': 15,    'type': 'qualifying'},
    'calcium_eaten':                                        {'unit': 'mg',  'target': 1000, 'type': 'qualifying'},
    'iron_eaten':                                           {'unit': 'mg',  'target': 18,   'type': 'qualifying'},
    'magnesium_eaten':                                      {'unit': 'mg',  'target': 320,  'type': 'qualifying'},
    'phosphorus_eaten':                                     {'unit': 'mg',  'target': 700,  'type': 'qualifying'},
    'potassium_eaten':                                      {'unit': 'mg',  'target': 4700, 'type': 'qualifying'},
    'zinc_eaten':                                           {'unit': 'mg',  'target': 8,    'type': 'qualifying'},
    'fiber_eaten':                                          {'unit': 'g',   'target': 25,   'type': 'qualifying'},
    'protein_eaten':                                        {'unit': 'g',   'target': 46,   'type': 'qualifying'},
    'fat_eaten':                                            {'unit': 'g',   'target': 78,   'type': 'disqualifying'},
    'fatty_acids_saturated_eaten':                          {'unit': 'g',   'target': 22,   'type': 'disqualifying'},
    'cholesterol_eaten':                                    {'unit': 'mg',  'target': 300,  'type': 'disqualifying'},
    'sugar_eaten':                                          {'unit': 'g',   'target': 125,  'type': 'disqualifying'},
    'sodium_eaten':                                         {'unit': 'mg',  'target': 2400, 'type': 'disqualifying'},
    'salt_eaten' :                                          {'unit' : 'g',  'target': 6,   'type': 'disqualifying'}
}


conversion_factors = {
    "mg": 1000,
    "g": 1,
    "mcg": 1000000,
    "IU": 1,
    "TAE": 1,
}

In [7]:
qualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'qualifying'}
disqualifying_nutrients = {nutr: info for nutr, info in nutrient_info.items() if info['type'] == 'disqualifying'}



In [8]:
df_food_scaled = pd.read_csv('df_food_scaled.csv')

  df_food_scaled = pd.read_csv('df_food_scaled.csv')


In [9]:
df_food_scaled[['QI', 'DI','NB', 'combined_name', 'food_group']].groupby('food_group').median(numeric_only=True).sort_values('QI')

Unnamed: 0_level_0,QI,DI,NB
food_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alcoholic_beverages,0.038127,0.007668,3.812709
sweetened_beverages,0.041806,0.6643,4.180602
others,0.105016,0.666667,5.474351
soups_sauces_condiments,0.117561,1.068889,6.402525
salty_snacks,0.121828,0.535766,10.0
cooked_grains_pasta,0.135563,0.517566,9.358974
sweets_desserts,0.157373,0.85203,12.639102
vegetable_oils,0.245871,0.694173,6.645221
butter_margarine_spreads,0.284294,1.837749,15.794434
soy_granules_textured_protein,0.31086,0.904191,10.0


In [10]:
df_meat_fish_seafood = df_food_scaled[df_food_scaled['food_group'] == 'meat_fish_seafood']

In [11]:
df_meat_fish_seafood.drop_duplicates('combined_name')

Unnamed: 0,food_id,barcode,dish_id,eaten_quantity,eaten_unit,subject_key,eaten_at,eaten_at_utc_offset,media_count,food_group_cname,...,potassium_eaten_ratio_scaled,zinc_eaten_ratio_scaled,fiber_eaten_ratio_scaled,protein_eaten_ratio_scaled,fat_eaten_ratio_scaled,fatty_acids_saturated_eaten_ratio_scaled,cholesterol_eaten_ratio_scaled,sugar_eaten_ratio_scaled,sodium_eaten_ratio_scaled,salt_eaten_ratio_scaled
0,13,0,335994,110.0,g,bjsqab,2022-02-15 18:06:27,60,1,dairy_products_meat_fish_eggs_tofu,...,0.856776,7.214765,0.000000,7.936971,0.774393,1.037218,3.937360,0.000000,0.430649,0.000000
56,27,0,118524,60.0,g,nwfrgc,2020-05-11 17:53:04,120,1,dairy_products_meat_fish_eggs_tofu,...,0.584522,2.335165,0.000000,6.306737,1.183432,1.448551,2.783883,0.008791,22.756410,0.000000
800,71,0,372924,100.0,g,7wuem4,2022-07-06 18:42:58,120,1,dairy_products_meat_fish_eggs_tofu,...,0.368107,5.017301,0.000000,4.889424,1.570402,2.359232,2.029988,0.000000,0.129758,0.000000
802,82,0,102980,120.0,g,cbqxxj,2020-03-23 11:04:30,60,1,dairy_products_meat_fish_eggs_tofu,...,0.697593,6.420765,0.000000,7.555239,0.868712,1.490313,2.295082,0.000000,0.223133,0.000000
815,83,0,113397,250.0,g,8vtq3c,2020-04-27 17:30:00,120,0,dairy_products_meat_fish_eggs_tofu,...,0.788022,12.500000,0.000000,8.051530,0.743906,0.953984,3.497942,0.000000,0.303498,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317129,33144,7613312222911,386534,155.0,g,x4xuqb,2022-09-22 15:21:21,120,0,unclassified,...,0.000000,0.000000,0.000000,6.769071,1.074774,1.088732,0.000000,0.047904,0.000000,1.896208
317360,33417,7613269437260,393863,300.0,g,vxyht3,2022-10-21 20:15:55,120,0,unclassified,...,0.000000,0.000000,0.317460,5.521049,0.712251,0.721501,0.000000,0.825397,0.000661,0.000000
317488,33609,3660088100350,400544,90.0,g,7mm74m,2022-11-16 11:25:53,60,0,unclassified,...,0.000000,0.000000,0.000000,3.801242,1.846154,1.641558,0.000000,0.036571,0.000000,0.761905
317589,46072,7613312352717,406386,125.0,g,48wbxy,2022-12-08 11:10:36,60,0,unclassified,...,0.000000,0.000000,0.000000,3.881988,1.831502,1.217532,0.000000,0.000000,0.000000,0.148810


In [25]:
high_qi_meat_fish_seafood = df_meat_fish_seafood[df_meat_fish_seafood['QI'] > 10].drop_duplicates('combined_name').sort_values('QI', ascending=False)

In [42]:
high_qi_meat_fish_seafood[['combined_name', 'eaten_quantity', 'QI', 'DI', 'NB']]

Unnamed: 0,combined_name,eaten_quantity,QI,DI,NB
260724,Perch fillets (lake),120.0,61.567363,1.045119,80.933707
154859,Liver,30.0,31.765218,3.232951,77.415456
294168,"Sushi Nigiri ""saumon & thon""",90.0,17.293963,0.484705,62.241389
276920,GRAN MAR Polypfisch in Marinade,57.5,12.878941,1.905372,82.178683
285020,Octopus,110.0,12.093442,1.193991,81.31993


In [43]:
def compute_index(row, nutrient_cols, scaling_factor=2000) :

    index = 0
    ratio_sum = 0

    for nutr in nutrient_cols :
        ratio_sum += row[nutr + '_ratio']

    index = (scaling_factor / row['energy_kcal_eaten']) * (ratio_sum / len(nutrient_cols))
    return index

In [44]:
def compute_qi_excluding(row, nutrient_list, exclude=None, scaling_factor=2000):
    if exclude is not None:
        new_list = [nutr for nutr in nutrient_list if nutr != exclude]
    else:
        new_list = nutrient_list
    

    return compute_index(row, new_list, scaling_factor=scaling_factor)

In [45]:
high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Perch fillets (lake)'][[i for i in df_meat_fish_seafood.columns if '_ratio_scaled' in i]].T

Unnamed: 0,260724
folate_eaten_ratio_scaled,0.672269
niacin_eaten_ratio_scaled,3.577431
pantothenic_acid_eaten_ratio_scaled,2.722689
vitamin_b2_eaten_ratio_scaled,1176.470588
vitamin_b1_eaten_ratio_scaled,1.260504
all_trans_retinol_equivalents_activity_eaten_ratio_scaled,0.0
beta_carotene_eaten_ratio_scaled,0.0
vitamin_b12_eaten_ratio_scaled,5.812325
vitamin_b6_eaten_ratio_scaled,1.680672
vitamin_c_eaten_ratio_scaled,0.851541


In [46]:
food_item = high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Perch fillets (lake)'].iloc[0]
qi_including = food_item['QI']
results = []

for nutr in qualifying_nutrients:
    qi_excluding = compute_qi_excluding(food_item, qualifying_nutrients, exclude=nutr, scaling_factor=2000)
    abs_diff = qi_including - qi_excluding
    pct_diff = (abs_diff / qi_including) * 100
    results.append({
        'Nutrient Excluded': nutr,
        'QI (incl.)': qi_including,
        'QI (excl.)': qi_excluding,
        'Absolute Change': abs_diff,
        'Percent Change': pct_diff
    })


df_results = pd.DataFrame(results)
df_results.sort_values('Absolute Change', ascending=False)

Unnamed: 0,Nutrient Excluded,QI (incl.),QI (excl.),Absolute Change,Percent Change
3,vitamin_b2_eaten,61.567363,2.888246,58.679117,95.308804
10,vitamin_d_eaten,61.567363,63.746273,-2.17891,-3.539067
19,protein_eaten,61.567363,64.382775,-2.815412,-4.572897
7,vitamin_b12_eaten,61.567363,64.501839,-2.934476,-4.766285
1,niacin_eaten,61.567363,64.619465,-3.052102,-4.957337
2,pantothenic_acid_eaten,61.567363,64.664451,-3.097088,-5.030406
15,phosphorus_eaten,61.567363,64.675066,-3.107703,-5.047647
8,vitamin_b6_eaten,61.567363,64.719294,-3.151931,-5.119484
17,zinc_eaten,61.567363,64.722611,-3.155248,-5.124871
11,vitamin_e_activity_eaten,61.567363,64.729909,-3.162546,-5.136725


We observe that for the 'Perch fillets (lake)', the vitamin b2 is the only micronutrient that boost the QI value

In [47]:
high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Liver'][[i for i in df_meat_fish_seafood.columns if '_ratio_scaled' in i]].T

Unnamed: 0,154859
folate_eaten_ratio_scaled,7.621951
niacin_eaten_ratio_scaled,13.066202
pantothenic_acid_eaten_ratio_scaled,20.0
vitamin_b2_eaten_ratio_scaled,32.150776
vitamin_b1_eaten_ratio_scaled,2.54065
all_trans_retinol_equivalents_activity_eaten_ratio_scaled,242.160279
beta_carotene_eaten_ratio_scaled,0.0
vitamin_b12_eaten_ratio_scaled,279.471545
vitamin_b6_eaten_ratio_scaled,8.255159
vitamin_c_eaten_ratio_scaled,3.414634


In [48]:
food_item = high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Liver'].iloc[0]
qi_including = food_item['QI']
results = []

for nutr in qualifying_nutrients:
    qi_excluding = compute_qi_excluding(food_item, qualifying_nutrients, exclude=nutr, scaling_factor=2000)
    abs_diff = qi_including - qi_excluding
    pct_diff = (abs_diff / qi_including) * 100
    results.append({
        'Nutrient Excluded': nutr,
        'QI (incl.)': qi_including,
        'QI (excl.)': qi_excluding,
        'Absolute Change': abs_diff,
        'Percent Change': pct_diff
    })


df_results = pd.DataFrame(results)
df_results.sort_values('Absolute Change', ascending=False)

Unnamed: 0,Nutrient Excluded,QI (incl.),QI (excl.),Absolute Change,Percent Change
7,vitamin_b12_eaten,31.765218,18.728043,13.037175,41.042297
5,all_trans_retinol_equivalents_activity_eaten,31.765218,20.691794,11.073424,34.860218
3,vitamin_b2_eaten,31.765218,31.744925,0.020293,0.063883
2,pantothenic_acid_eaten,31.765218,32.38444,-0.619222,-1.949371
1,niacin_eaten,31.765218,32.749377,-0.984159,-3.098228
8,vitamin_b6_eaten,31.765218,33.002589,-1.237371,-3.895366
0,folate_eaten,31.765218,33.035916,-1.270698,-4.000282
17,zinc_eaten,31.765218,33.100101,-1.334883,-4.202342
19,protein_eaten,31.765218,33.103589,-1.338371,-4.213324
13,iron_eaten,31.765218,33.130411,-1.365193,-4.297759


Same thing, the vitamin b12 and the RAE are the two micronutrients that influence the most the QI value of 'Liver'

In [49]:
high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Sushi Nigiri "saumon & thon"'][[i for i in df_meat_fish_seafood.columns if '_ratio_scaled' in i]].T

Unnamed: 0,294168
folate_eaten_ratio_scaled,0.387435
niacin_eaten_ratio_scaled,1.323859
pantothenic_acid_eaten_ratio_scaled,0.879581
vitamin_b2_eaten_ratio_scaled,266.539743
vitamin_b1_eaten_ratio_scaled,0.52356
all_trans_retinol_equivalents_activity_eaten_ratio_scaled,0.0
beta_carotene_eaten_ratio_scaled,0.629768
vitamin_b12_eaten_ratio_scaled,5.453752
vitamin_b6_eaten_ratio_scaled,1.77205
vitamin_c_eaten_ratio_scaled,0.273647


In [51]:
food_item = high_qi_meat_fish_seafood[high_qi_meat_fish_seafood['combined_name'] == 'Sushi Nigiri "saumon & thon"'].iloc[0]
qi_including = food_item['QI']
results = []

for nutr in qualifying_nutrients:
    qi_excluding = compute_qi_excluding(food_item, qualifying_nutrients, exclude=nutr, scaling_factor=2000)
    abs_diff = qi_including - qi_excluding
    pct_diff = (abs_diff / qi_including) * 100
    results.append({
        'Nutrient Excluded': nutr,
        'QI (incl.)': qi_including,
        'QI (excl.)': qi_excluding,
        'Absolute Change': abs_diff,
        'Percent Change': pct_diff
    })


df_results = pd.DataFrame(results)
df_results.sort_values('Absolute Change', ascending=False)

Unnamed: 0,Nutrient Excluded,QI (incl.),QI (excl.),Absolute Change,Percent Change
3,vitamin_b2_eaten,17.293963,4.175764,13.118199,75.854209
11,vitamin_e_activity_eaten,17.293963,14.934217,2.359746,13.644914
7,vitamin_b12_eaten,17.293963,17.917132,-0.623169,-3.60339
8,vitamin_b6_eaten,17.293963,18.110906,-0.816943,-4.723861
19,protein_eaten,17.293963,18.111321,-0.817358,-4.726259
15,phosphorus_eaten,17.293963,18.134101,-0.840138,-4.857985
1,niacin_eaten,17.293963,18.134495,-0.840532,-4.860261
10,vitamin_d_eaten,17.293963,18.145386,-0.851423,-4.923238
2,pantothenic_acid_eaten,17.293963,18.157878,-0.863915,-4.995471
6,beta_carotene_eaten,17.293963,18.171026,-0.877063,-5.071497


Vitamin b2 is the one with the most influence