diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index b026b0f478349..83969d5e9b571 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -1641,7 +1641,7 @@ sub parse_ingredients_text($) { next if (length($maybe_origin) < 4); my $origin_id = canonicalize_taxonomy_tag($product_lc, "origins", $maybe_origin); - if (exists_taxonomy_tag("origins", $origin_id)) { + if ((exists_taxonomy_tag("origins", $origin_id)) and ($origin_id ne "en:unknown")) { $debug_ingredients and $log->debug("ingredient includes known origin", { ingredient => $ingredient, new_ingredient => $maybe_ingredient, origin_id => $origin_id }) if $log->is_debug(); @@ -2831,10 +2831,26 @@ sub analyze_ingredients($) { $property_value = "en:may-contain-" . $from_what_with_dashes ; # en:may-contain-palm-oil $ingredients_analysis_ref->{$property_value} = $values{maybe}; } + # If some ingredients are not recognized, there is a possibility that they could be palm oil or contain palm oil + # As there are relatively few ingredients with palm oil, we assume we are able to recognize them with the taxonomy + # and that unrecognized ingredients do not contain palm oil. + # --> We mark the product as palm oil free + # Exception: If there are lots of unrecognized ingredients though (e.g. more than 1 third), it may be that the ingredients list + # is bogus (e.g. OCR errors) and the likelyhood of missing a palm oil ingredient increases. + # --> In this case, we mark the product as palm oil content unknown elsif (defined $values{unknown_ingredients}) { # Some ingredients were not recognized - $property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown - $ingredients_analysis_ref->{$property_value} = $values{unknown_ingredients}; + $log->debug("analyze_ingredients - unknown ingredients", { unknown_ingredients_n => (scalar @{$values{unknown_ingredients}}), ingredients_n => (scalar(@{$product_ref->{ingredients}})) }) if $log->is_debug(); + my $unknown_rate = (scalar @{$values{unknown_ingredients}}) / (scalar @{$product_ref->{ingredients}}); + # for palm-oil, as there are few products containing it, we consider status to be unknown only if there is more than 30% unknown ingredients (which may indicates bogus ingredient list, eg. OCR errors) + if (($from_what_with_dashes eq "palm-oil") and ($unknown_rate <= 0.3)) { + $property_value = "en:" . $from_what_with_dashes . "-free"; # en:palm-oil-free + } + else { + $property_value = "en:" . $from_what_with_dashes . "-content-unknown"; # en:palm-oil-content-unknown + } + # In all cases, keep track of the unknown ingredients + $ingredients_analysis_ref->{"en:" . $from_what_with_dashes . "-content-unknown"} = $values{unknown_ingredients}; } else { # no yes, maybe or unknown ingredients @@ -2904,15 +2920,18 @@ sub analyze_ingredients($) { $product_ref->{ingredients_analysis} = {}; foreach my $property (@properties) { - my $property_value = $ingredients_analysis_properties_ref->{$property}; + my $property_value = $ingredients_analysis_properties_ref->{$property}; if (defined $property_value) { # Store the property value in the ingredients_analysis_tags list push @{$product_ref->{ingredients_analysis_tags}}, $property_value; # Store the list of ingredients that caused a product to be non vegan/vegetarian/palm oil free - # (no list when a product is vegan/vegetarian/palm oil free) if (defined $ingredients_analysis_ref->{$property_value}) { $product_ref->{ingredients_analysis}{$property_value} = $ingredients_analysis_ref->{$property_value}; } + # for palm-oil-free products, we can have a fraction of ingredients that have palm-oil-content-unknown + elsif (($property_value =~ /-free$/) and (defined $ingredients_analysis_ref->{$` . '-content-unknown'})) { + $product_ref->{ingredients_analysis}{$` . '-content-unknown'} = $ingredients_analysis_ref->{$` . '-content-unknown'}; + } } } } diff --git a/t/expected_test_results/ingredients/en-vegetal-ingredients.json b/t/expected_test_results/ingredients/en-vegetal-ingredients.json index 3af7054b96e22..49e7e67f3ef76 100644 --- a/t/expected_test_results/ingredients/en-vegetal-ingredients.json +++ b/t/expected_test_results/ingredients/en-vegetal-ingredients.json @@ -57,7 +57,7 @@ ] }, "ingredients_analysis_tags" : [ - "en:palm-oil-content-unknown", + "en:palm-oil-free", "en:vegan", "en:vegetarian" ], diff --git a/t/expected_test_results/nutriscore/fr-gaspacho.json b/t/expected_test_results/nutriscore/fr-gaspacho.json index 3e44f2a36c8e0..d14d2a01b5210 100644 --- a/t/expected_test_results/nutriscore/fr-gaspacho.json +++ b/t/expected_test_results/nutriscore/fr-gaspacho.json @@ -156,7 +156,7 @@ ] }, "ingredients_analysis_tags" : [ - "en:palm-oil-content-unknown", + "en:palm-oil-free", "en:vegan-status-unknown", "en:vegetarian-status-unknown" ], diff --git a/t/expected_test_results/recipes/nectars.guava-nectar.json b/t/expected_test_results/recipes/nectars.guava-nectar.json index 7a8343304c796..c73ee16cefed6 100644 --- a/t/expected_test_results/recipes/nectars.guava-nectar.json +++ b/t/expected_test_results/recipes/nectars.guava-nectar.json @@ -61,7 +61,7 @@ ] }, "ingredients_analysis_tags" : [ - "en:palm-oil-content-unknown", + "en:palm-oil-free", "en:vegan-status-unknown", "en:vegetarian-status-unknown" ], diff --git a/t/ingredients_analysis.t b/t/ingredients_analysis.t index f8c769976c290..541898ac7e287 100644 --- a/t/ingredients_analysis.t +++ b/t/ingredients_analysis.t @@ -44,6 +44,15 @@ my @tests = ( # check that the label overrides the en:non-vegan for "miel" / honey # (just for testing, it should not happen) [ { lc => "fr", labels_tags => ["en:vegan"], ingredients_text => "miel" }, [ "en:palm-oil-free", "en:vegan", "en:vegetarian"] ], + +# unknown ingredients + +[ { lc => "en", ingredients_text => "" }, undef ], +[ { lc => "en", ingredients_text => "unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ], +[ { lc => "en", ingredients_text => "flour, unknown ingredient" }, ["en:palm-oil-content-unknown", "en:vegan-status-unknown", "en:vegetarian-status-unknown"] ], +# mark the product as palm oil free even though there is one unknown ingredients (out of many ingredients) +[ { lc => "en", ingredients_text => "flour, sugar, eggs, milk, salt, water, unknown ingredient" }, ["en:palm-oil-free", "en:non-vegan", "en:vegetarian-status-unknown"] ], + ); diff --git a/t/update_tests_results.sh b/t/update_tests_results.sh index 23d3c76c470d8..ffb82309c7b2d 100755 --- a/t/update_tests_results.sh +++ b/t/update_tests_results.sh @@ -20,4 +20,5 @@ perl packaging.t --results expected_test_results/packaging/ perl recipes.t --results expected_test_results/recipes/ perl export.t --update-expected-results perl import_convert_carrefour_france.t --update-expected-results +perl import_csv_file.t --update-expected-results diff --git a/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json b/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json index ee6774aadd249..dac1040462a12 100644 --- a/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json +++ b/templates/api/knowledge-panels/health/ingredients/ingredients_analysis_property.tt.json @@ -20,6 +20,13 @@ "html": ` [% IF panel.evaluation == "good" %] [% lang(panel.ingredients_title_id) %] + [% property_unknown_ingredients = property %] + [% property_unknown_ingredients = property_unknown_ingredients.replace('-free', '-content-unknown') %] + [% IF product.ingredients_analysis.$property_unknown_ingredients.defined %] +

+ [% lang("unrecognized_ingredients") %][% sep %]: + [% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property_unknown_ingredients) %] + [% END %] [% ELSIF product.ingredients_analysis.$property.defined %] [% lang(panel.ingredients_title_id) %][% sep %]: [% display_taxonomy_tags_list("ingredients", product.ingredients_analysis.$property) %]