Skip to content

Commit

Permalink
fix: assume unrecognized ingredients are not palm oil (#6713)
Browse files Browse the repository at this point in the history
* fix: assume unrecognized ingredients are not palm oil #6700

* update tests

* keep and display unknown ingredients even for palm oil free products

* update tests

* small fixes and test updates
  • Loading branch information
stephanegigandet committed May 10, 2022
1 parent 37c76c1 commit d5b9b9e
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 8 deletions.
29 changes: 24 additions & 5 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1641,7 +1641,7 @@ sub parse_ingredients_text($) {
next if (length($maybe_origin) < 4);

my $origin_id = canonicalize_taxonomy_tag($product_lc, "origins", $maybe_origin);
if (exists_taxonomy_tag("origins", $origin_id)) {
if ((exists_taxonomy_tag("origins", $origin_id)) and ($origin_id ne "en:unknown")) {

$debug_ingredients and $log->debug("ingredient includes known origin", { ingredient => $ingredient, new_ingredient => $maybe_ingredient, origin_id => $origin_id }) if $log->is_debug();

Expand Down Expand Up @@ -2831,10 +2831,26 @@ sub analyze_ingredients($) {
$property_value = "en:may-contain-" . $from_what_with_dashes ; # en:may-contain-palm-oil
$ingredients_analysis_ref->{$property_value} = $values{maybe};
}
# If some ingredients are not recognized, there is a possibility that they could be palm oil or contain palm oil
# As there are relatively few ingredients with palm oil, we assume we are able to recognize them with the taxonomy
# and that unrecognized ingredients do not contain palm oil.
# --> We mark the product as palm oil free
# Exception: If there are lots of unrecognized ingredients though (e.g. more than 1 third), it may be that the ingredients list
# is bogus (e.g. OCR errors) and the likelyhood of missing a palm oil ingredient increases.
# --> In this case, we mark the product as palm oil content unknown
elsif (defined $values{unknown_ingredients}) {
# Some ingredients were not recognized
$property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown
$ingredients_analysis_ref->{$property_value} = $values{unknown_ingredients};
$log->debug("analyze_ingredients - unknown ingredients", { unknown_ingredients_n => (scalar @{$values{unknown_ingredients}}), ingredients_n => (scalar(@{$product_ref->{ingredients}})) }) if $log->is_debug();
my $unknown_rate = (scalar @{$values{unknown_ingredients}}) / (scalar @{$product_ref->{ingredients}});
# for palm-oil, as there are few products containing it, we consider status to be unknown only if there is more than 30% unknown ingredients (which may indicates bogus ingredient list, eg. OCR errors)
if (($from_what_with_dashes eq "palm-oil") and ($unknown_rate <= 0.3)) {
$property_value = "en:" . $from_what_with_dashes . "-free"; # en:palm-oil-free
}
else {
$property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown
}
# In all cases, keep track of the unknown ingredients
$ingredients_analysis_ref->{"en:" . $from_what_with_dashes . "-content-unknown"} = $values{unknown_ingredients};
}
else {
# no yes, maybe or unknown ingredients
Expand Down Expand Up @@ -2904,15 +2920,18 @@ sub analyze_ingredients($) {
$product_ref->{ingredients_analysis} = {};

foreach my $property (@properties) {
my $property_value = $ingredients_analysis_properties_ref->{$property};
my $property_value = $ingredients_analysis_properties_ref->{$property};
if (defined $property_value) {
# Store the property value in the ingredients_analysis_tags list
push @{$product_ref->{ingredients_analysis_tags}}, $property_value;
# Store the list of ingredients that caused a product to be non vegan/vegetarian/palm oil free
# (no list when a product is vegan/vegetarian/palm oil free)
if (defined $ingredients_analysis_ref->{$property_value}) {
$product_ref->{ingredients_analysis}{$property_value} = $ingredients_analysis_ref->{$property_value};
}
# for palm-oil-free products, we can have a fraction of ingredients that have palm-oil-content-unknown
elsif (($property_value =~ /-free$/) and (defined $ingredients_analysis_ref->{$` . '-content-unknown'})) {
$product_ref->{ingredients_analysis}{$` . '-content-unknown'} = $ingredients_analysis_ref->{$` . '-content-unknown'};
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
]
},
"ingredients_analysis_tags" : [
"en:palm-oil-content-unknown",
"en:palm-oil-free",
"en:vegan",
"en:vegetarian"
],
Expand Down
2 changes: 1 addition & 1 deletion t/expected_test_results/nutriscore/fr-gaspacho.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
]
},
"ingredients_analysis_tags" : [
"en:palm-oil-content-unknown",
"en:palm-oil-free",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
Expand Down
2 changes: 1 addition & 1 deletion t/expected_test_results/recipes/nectars.guava-nectar.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
]
},
"ingredients_analysis_tags" : [
"en:palm-oil-content-unknown",
"en:palm-oil-free",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
Expand Down
9 changes: 9 additions & 0 deletions t/ingredients_analysis.t
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ my @tests = (
# check that the label overrides the en:non-vegan for "miel" / honey
# (just for testing, it should not happen)
[ { lc => "fr", labels_tags => ["en:vegan"], ingredients_text => "miel" }, [ "en:palm-oil-free", "en:vegan", "en:vegetarian"] ],

# unknown ingredients

[ { lc => "en", ingredients_text => "" }, undef ],
[ { lc => "en", ingredients_text => "unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ],
[ { lc => "en", ingredients_text => "flour, unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ],
# mark the product as palm oil free even though there is one unknown ingredients (out of many ingredients)
[ { lc => "en", ingredients_text => "flour, sugar, eggs, milk, salt, water, unknown ingredient" }, ["en:palm-oil-free", "en:non-vegan", "en:vegetarian-status-unknown"] ],

);


Expand Down
1 change: 1 addition & 0 deletions t/update_tests_results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ perl packaging.t --results expected_test_results/packaging/
perl recipes.t --results expected_test_results/recipes/
perl export.t --update-expected-results
perl import_convert_carrefour_france.t --update-expected-results
perl import_csv_file.t --update-expected-results

Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@
"html": `
[% IF panel.evaluation == "good" %]
[% lang(panel.ingredients_title_id) %]
[% property_unknown_ingredients = property %]
[% property_unknown_ingredients = property_unknown_ingredients.replace('-free', '-content-unknown') %]
[% IF product.ingredients_analysis.$property_unknown_ingredients.defined %]
<br><br>
[% lang("unrecognized_ingredients") %][% sep %]:
[% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property_unknown_ingredients) %]
[% END %]
[% ELSIF product.ingredients_analysis.$property.defined %]
<strong>[% lang(panel.ingredients_title_id) %][% sep %]:</strong>
[% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property) %]
Expand Down

0 comments on commit d5b9b9e

Please sign in to comment.