diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm
index b026b0f478349..83969d5e9b571 100644
--- a/lib/ProductOpener/Ingredients.pm
+++ b/lib/ProductOpener/Ingredients.pm
@@ -1641,7 +1641,7 @@ sub parse_ingredients_text($) {
next if (length($maybe_origin) < 4);
my $origin_id = canonicalize_taxonomy_tag($product_lc, "origins", $maybe_origin);
- if (exists_taxonomy_tag("origins", $origin_id)) {
+ if ((exists_taxonomy_tag("origins", $origin_id)) and ($origin_id ne "en:unknown")) {
$debug_ingredients and $log->debug("ingredient includes known origin", { ingredient => $ingredient, new_ingredient => $maybe_ingredient, origin_id => $origin_id }) if $log->is_debug();
@@ -2831,10 +2831,26 @@ sub analyze_ingredients($) {
$property_value = "en:may-contain-" . $from_what_with_dashes ; # en:may-contain-palm-oil
$ingredients_analysis_ref->{$property_value} = $values{maybe};
}
+ # If some ingredients are not recognized, there is a possibility that they could be palm oil or contain palm oil
+ # As there are relatively few ingredients with palm oil, we assume we are able to recognize them with the taxonomy
+ # and that unrecognized ingredients do not contain palm oil.
+ # --> We mark the product as palm oil free
+ # Exception: If there are lots of unrecognized ingredients though (e.g. more than 1 third), it may be that the ingredients list
+ # is bogus (e.g. OCR errors) and the likelyhood of missing a palm oil ingredient increases.
+ # --> In this case, we mark the product as palm oil content unknown
elsif (defined $values{unknown_ingredients}) {
# Some ingredients were not recognized
- $property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown
- $ingredients_analysis_ref->{$property_value} = $values{unknown_ingredients};
+ $log->debug("analyze_ingredients - unknown ingredients", { unknown_ingredients_n => (scalar @{$values{unknown_ingredients}}), ingredients_n => (scalar(@{$product_ref->{ingredients}})) }) if $log->is_debug();
+ my $unknown_rate = (scalar @{$values{unknown_ingredients}}) / (scalar @{$product_ref->{ingredients}});
+ # for palm-oil, as there are few products containing it, we consider status to be unknown only if there is more than 30% unknown ingredients (which may indicates bogus ingredient list, eg. OCR errors)
+ if (($from_what_with_dashes eq "palm-oil") and ($unknown_rate <= 0.3)) {
+ $property_value = "en:" . $from_what_with_dashes . "-free"; # en:palm-oil-free
+ }
+ else {
+ $property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown
+ }
+ # In all cases, keep track of the unknown ingredients
+ $ingredients_analysis_ref->{"en:" . $from_what_with_dashes . "-content-unknown"} = $values{unknown_ingredients};
}
else {
# no yes, maybe or unknown ingredients
@@ -2904,15 +2920,18 @@ sub analyze_ingredients($) {
$product_ref->{ingredients_analysis} = {};
foreach my $property (@properties) {
- my $property_value = $ingredients_analysis_properties_ref->{$property};
+ my $property_value = $ingredients_analysis_properties_ref->{$property};
if (defined $property_value) {
# Store the property value in the ingredients_analysis_tags list
push @{$product_ref->{ingredients_analysis_tags}}, $property_value;
# Store the list of ingredients that caused a product to be non vegan/vegetarian/palm oil free
- # (no list when a product is vegan/vegetarian/palm oil free)
if (defined $ingredients_analysis_ref->{$property_value}) {
$product_ref->{ingredients_analysis}{$property_value} = $ingredients_analysis_ref->{$property_value};
}
+ # for palm-oil-free products, we can have a fraction of ingredients that have palm-oil-content-unknown
+ elsif (($property_value =~ /-free$/) and (defined $ingredients_analysis_ref->{$` . '-content-unknown'})) {
+ $product_ref->{ingredients_analysis}{$` . '-content-unknown'} = $ingredients_analysis_ref->{$` . '-content-unknown'};
+ }
}
}
}
diff --git a/t/expected_test_results/ingredients/en-vegetal-ingredients.json b/t/expected_test_results/ingredients/en-vegetal-ingredients.json
index 3af7054b96e22..49e7e67f3ef76 100644
--- a/t/expected_test_results/ingredients/en-vegetal-ingredients.json
+++ b/t/expected_test_results/ingredients/en-vegetal-ingredients.json
@@ -57,7 +57,7 @@
]
},
"ingredients_analysis_tags" : [
- "en:palm-oil-content-unknown",
+ "en:palm-oil-free",
"en:vegan",
"en:vegetarian"
],
diff --git a/t/expected_test_results/nutriscore/fr-gaspacho.json b/t/expected_test_results/nutriscore/fr-gaspacho.json
index 3e44f2a36c8e0..d14d2a01b5210 100644
--- a/t/expected_test_results/nutriscore/fr-gaspacho.json
+++ b/t/expected_test_results/nutriscore/fr-gaspacho.json
@@ -156,7 +156,7 @@
]
},
"ingredients_analysis_tags" : [
- "en:palm-oil-content-unknown",
+ "en:palm-oil-free",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
diff --git a/t/expected_test_results/recipes/nectars.guava-nectar.json b/t/expected_test_results/recipes/nectars.guava-nectar.json
index 7a8343304c796..c73ee16cefed6 100644
--- a/t/expected_test_results/recipes/nectars.guava-nectar.json
+++ b/t/expected_test_results/recipes/nectars.guava-nectar.json
@@ -61,7 +61,7 @@
]
},
"ingredients_analysis_tags" : [
- "en:palm-oil-content-unknown",
+ "en:palm-oil-free",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
diff --git a/t/ingredients_analysis.t b/t/ingredients_analysis.t
index f8c769976c290..541898ac7e287 100644
--- a/t/ingredients_analysis.t
+++ b/t/ingredients_analysis.t
@@ -44,6 +44,15 @@ my @tests = (
# check that the label overrides the en:non-vegan for "miel" / honey
# (just for testing, it should not happen)
[ { lc => "fr", labels_tags => ["en:vegan"], ingredients_text => "miel" }, [ "en:palm-oil-free", "en:vegan", "en:vegetarian"] ],
+
+# unknown ingredients
+
+[ { lc => "en", ingredients_text => "" }, undef ],
+[ { lc => "en", ingredients_text => "unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ],
+[ { lc => "en", ingredients_text => "flour, unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ],
+# mark the product as palm oil free even though there is one unknown ingredients (out of many ingredients)
+[ { lc => "en", ingredients_text => "flour, sugar, eggs, milk, salt, water, unknown ingredient" }, ["en:palm-oil-free", "en:non-vegan", "en:vegetarian-status-unknown"] ],
+
);
diff --git a/t/update_tests_results.sh b/t/update_tests_results.sh
index 23d3c76c470d8..ffb82309c7b2d 100755
--- a/t/update_tests_results.sh
+++ b/t/update_tests_results.sh
@@ -20,4 +20,5 @@ perl packaging.t --results expected_test_results/packaging/
perl recipes.t --results expected_test_results/recipes/
perl export.t --update-expected-results
perl import_convert_carrefour_france.t --update-expected-results
+perl import_csv_file.t --update-expected-results
diff --git a/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json b/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json
index ee6774aadd249..dac1040462a12 100644
--- a/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json
+++ b/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json
@@ -20,6 +20,13 @@
"html": `
[% IF panel.evaluation == "good" %]
[% lang(panel.ingredients_title_id) %]
+ [% property_unknown_ingredients = property %]
+ [% property_unknown_ingredients = property_unknown_ingredients.replace('-free', '-content-unknown') %]
+ [% IF product.ingredients_analysis.$property_unknown_ingredients.defined %]
+
+ [% lang("unrecognized_ingredients") %][% sep %]:
+ [% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property_unknown_ingredients) %]
+ [% END %]
[% ELSIF product.ingredients_analysis.$property.defined %]
[% lang(panel.ingredients_title_id) %][% sep %]:
[% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property) %]