Skip to content

Commit

Permalink
fix: load both ciqual + calnut tables (#8702)
Browse files Browse the repository at this point in the history
Originally we loaded only the "extended" calnut table, which has estimates for all nutrients, but it is missing some categories, so we also load the normal ciqual table
  • Loading branch information
stephanegigandet committed Jul 19, 2023
1 parent b219145 commit e449fa8
Show file tree
Hide file tree
Showing 8 changed files with 3,377 additions and 10 deletions.
1 change: 1 addition & 0 deletions external-data/ciqual/ciqual/CIQUAL.csv
3,187 changes: 3,187 additions & 0 deletions external-data/ciqual/ciqual/CIQUAL2020_ENG_2020_07_07.csv

Large diffs are not rendered by default.

Binary file not shown.
1 change: 1 addition & 0 deletions external-data/ciqual/ciqual/CIQUAL_version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2020
146 changes: 136 additions & 10 deletions lib/ProductOpener/NutritionCiqual.pm
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,147 @@ Hash table with the Ciqual ingredient id as a key, mapped to a hash of Open Food
=head2 load_ciqual_data()
Loads the Ciqual CALNUT database.
Loads the Ciqual table + the Ciqual CALNUT extended table.
=cut

sub load_ciqual_data() {

# First, load Ciqual table that contains some nutrients for all Ciqual categories
load_ciqual_table();

# Then, load the extended Ciqual CALNUT table that contains all nutrients but only for some Ciqual categories
# If we have data in CALNUT, it overrides the possibly partial data we may have loaded from the Ciqual table
load_ciqual_calnut_table();

return;
}

# Unit factors used by both functions to load Ciqual and CALNUT tables

my %unit_factor = (
'g' => 1,
'mg' => 1000,
'mcg' => 1000 * 1000,
'µg' => 1000 * 1000,
'kj' => 1,
'kcal' => 1,
);

=head2 load_ciqual_table()
Loads the Ciqual table. Some Ciqual categories may have missing values for some nutrients.
=cut

sub load_ciqual_table() {
my $ciqual_csv_file = $data_root . "/external-data/ciqual/ciqual/CIQUAL.csv";
my $ciqual_version_file = $data_root . "/external-data/ciqual/ciqual/CIQUAL_version.txt";

my $rows_ref = [];

my $encoding = "UTF-8";

open(my $version_file, "<:encoding($encoding)", $ciqual_version_file)
or die("Cannot open $ciqual_version_file: " . $! . "\n");
chomp(my $ciqual_version = <$version_file>);
close($version_file);

$log->debug("opening ciqual CSV file", {file => $ciqual_csv_file, version => $ciqual_version})
if $log->is_debug();

# alim_grp_code alim_ssgrp_code alim_ssssgrp_code alim_grp_nom_eng alim_ssgrp_nom_eng alim_ssssgrp_nom_eng alim_code alim_nom_eng alim_nom_sci Energy, Regulation EU No 1169/2011 (kJ/100g) Energy, Regulation EU No 1169/2011 (kcal/100g) Energy, N x Jones' factor, with fibres (kJ/100g) Energy, N x Jones' factor, with fibres (kcal/100g) Water (g/100g) Protein (g/100g) Protein, crude, N x 6.25 (g/100g) Carbohydrate (g/100g) Fat (g/100g) Sugars (g/100g) fructose (g/100g) galactose (g/100g) glucose (g/100g) lactose (g/100g) maltose (g/100g) sucrose (g/100g) Starch (g/100g) Fibres (g/100g) Polyols (g/100g) Ash (g/100g) Alcohol (g/100g) Organic acids (g/100g) FA saturated (g/100g) FA mono (g/100g) FA poly (g/100g) FA 4:0 (g/100g) FA 6:0 (g/100g) FA 8:0 (g/100g) FA 10:0 (g/100g) FA 12:0 (g/100g) FA 14:0 (g/100g) FA 16:0 (g/100g) FA 18:0 (g/100g) FA 18:1 n-9 cis (g/100g) FA 18:2 9c,12c (n-6) (g/100g) FA 18:3 c9,c12,c15 (n-3) (g/100g) FA 20:4 5c,8c,11c,14c (n-6) (g/100g) FA 20:5 5c,8c,11c,14c,17c (n-3) EPA (g/100g) FA 22:6 4c,7c,10c,13c,16c,19c (n-3) DHA (g/100g) Cholesterol (mg/100g) Salt (g/100g) Calcium (mg/100g) Chloride (mg/100g) Copper (mg/100g) Iron (mg/100g) Iodine (µg/100g) Magnesium (mg/100g) Manganese (mg/100g) Phosphorus (mg/100g) Potassium (mg/100g) Selenium (µg/100g) Sodium (mg/100g) Zinc (mg/100g) Retinol (µg/100g) Beta-carotene (µg/100g) Vitamin D (µg/100g) Vitamin E (mg/100g) Vitamin K1 (µg/100g) Vitamin K2 (µg/100g) Vitamin C (mg/100g) Vitamin B1 or Thiamin (mg/100g) Vitamin B2 or Riboflavin (mg/100g) Vitamin B3 or Niacin (mg/100g) Vitamin B5 or Pantothenic acid (mg/100g) Vitamin B6 (mg/100g) Vitamin B9 or Folate (µg/100g) Vitamin B12 (µg/100g)

my $csv_options_ref = {binary => 1, sep_char => "\t"}; # should set binary attribute.

my $csv = Text::CSV->new($csv_options_ref)
or die("Cannot use CSV: " . Text::CSV->error_diag());

if (open(my $io, "<:encoding($encoding)", $ciqual_csv_file)) {

my $header_row_ref = $csv->getline($io);

# this array will contain hashmaps with a column number, corresponding nid and unit
my @nutrients = ();
my $col = 0;

# read headers to populate @nutrients, corresponding to each columns
foreach my $nutrient (@$header_row_ref) {
# Energy, Regulation EU No 1169/2011 (kJ/100g) Energy, Regulation EU No 1169/2011 (kcal/100g)
# -> Energy
$nutrient =~ s/^Energy.*\((.*)$/Energy ($1/;

if ($nutrient =~ /\s+\((g|mg|mcg|kj|kcal)\/100g\)/) {
my $nutrient_name = $`;
my $unit = $1;

# Check if we recognize the name of the ingredient
my $exists_in_taxonomy;
my $nid = canonicalize_taxonomy_tag("en", "nutrients", $nutrient_name, \$exists_in_taxonomy);
if ($exists_in_taxonomy) {
$nid =~ s/^zz://;
push @nutrients,
{
col => $col,
nid => $nid,
unit => $unit,
};
}
else {
# TODO: some nutrients are not automatically recognized yet
# (e.g. most fatty acids identified with column names like ag_18_3_a_lino_g)
$log->warning("unrecognized column name (nutrient) in CIQUAL table", {column_name => $nutrient})
if $log->is_error();
}
}
$col++;
}

my $row_ref;

while ($row_ref = $csv->getline($io)) {
my $ciqual_id = $row_ref->[6]; # alim_code
my $name_en = $row_ref->[7]; # FOOD_LABEL

$ciqual_data{$ciqual_id} = {
name_en => $name_en,
nutrients => {}
};

# fetch each nutrients we need
foreach my $nutrient_ref (@nutrients) {
my $value = $row_ref->[$nutrient_ref->{col}];

# convert values like < 0.2 to 0.2
$value =~ s/^<\s+//;

# convert "traces" to 0
$value =~ s/^traces$/0/;

# an empty value or a dash - indicates a missing value
if (($value ne "") and ($value ne '-')) {
$ciqual_data{$ciqual_id}{nutrients}{$nutrient_ref->{nid}}
= convert_string_to_number($value) / $unit_factor{$nutrient_ref->{unit}};
}
}
}
}
else {
die("Could not open CIQUAL CSV $ciqual_csv_file: $!");
}
return;
}

=head2 load_ciqual_calnut_table()
Loads the extended Ciqual CALNUT table. The CALNUT table contains values for all nutrients (with missing values extrapolated),
but it does not contain data for all CIQUAL categories.
Documentation of Ciqual CALNUT: https://ciqual.anses.fr/cms/sites/default/files/inline-files/Table%20CALNUT%202020_doc_FR_2020%2007%2007.pdf
=cut

sub load_ciqual_data() {
sub load_ciqual_calnut_table() {
my $ciqual_csv_file = $data_root . "/external-data/ciqual/calnut/CALNUT.csv.0";
my $ciqual_version_file = $data_root . "/external-data/ciqual/calnut/CALNUT_version.txt";

Expand Down Expand Up @@ -113,14 +247,6 @@ sub load_ciqual_data() {
my @nutrients = ();
my $col = 0;

my %unit_factor = (
'g' => 1,
'mg' => 1000,
'mcg' => 1000 * 1000,
'kj' => 1,
'kcal' => 1,
);

# read headers to populate @nutrients, corresponding to each columns
foreach my $nutrient (@$header_row_ref) {
# nrj_kj -> energy-kj_kj
Expand Down
2 changes: 2 additions & 0 deletions taxonomies/ingredients.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85520,6 +85520,8 @@ wikipedia:en:https://en.wikipedia.org/wiki/Tomato_sauce
carbon_footprint_fr_foodges_ingredient:fr:Sauce tomate
carbon_footprint_fr_foodges_value:fr:2.9
# ingredient/fr:préparation-à-base-de-tomates has 91 products in 5 languages @2018-12-26
ciqual_proxy_food_code:en:11107
ciqual_proxy_food_name:en:Tomato sauce, with onions, prepacked

<en:tomato sauce
fr:sauce tomate cuite
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/expected_test_results/nutrition_estimation/frik.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"estimated_nutrients" : {
"nutrients" : {
"alcohol" : 0,
"calcium" : 0.0503,
"carbohydrates" : 55.8,
"chloride" : 0.089,
"copper" : 0.001,
"energy" : 321,
"fat" : 2.25,
"fiber" : 19.3,
"fructose" : 0.5,
"glucose" : 0.5,
"iron" : 0.0052,
"lactose" : 0.5,
"magnesium" : 0.114,
"maltose" : 0.5,
"manganese" : 0.0036,
"phosphorus" : 0.321,
"polyols" : 0,
"potassium" : 0.512,
"proteins" : 9.62,
"salt" : 0.011,
"sodium" : 0.0042,
"starch" : 30.2,
"sucrose" : 0.5,
"vitamin-b6" : 0.00014,
"vitamin-e" : 0.0001,
"water" : 11.3,
"zinc" : 0.0036
},
"total" : 100,
"total_with_nutrients" : 100,
"unknown_ingredients" : {}
},
"ingredients" : [
{
"id" : "en:freekeh",
"percent_estimate" : 100,
"percent_max" : 100,
"percent_min" : 100,
"text" : "frik"
}
]
}
5 changes: 5 additions & 0 deletions tests/unit/nutrition_estimation.t
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ my @tests = (
desc => 'unknown ingredients',
product => {lc => "en", ingredients_text => "sugar 50%, strange ingredient, stranger ingredient"},
},
{
id => 'frik',
desc => 'ingredient in CIQUAL table but not in CALNUT extended table',
product => {lc => "en", ingredients_text => "frik"},
},
);

foreach my $test_ref (@tests) {
Expand Down

0 comments on commit e449fa8

Please sign in to comment.