From 415d68c15428fd0aa97f053e0054478c1b9472e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Thu, 6 Jul 2023 15:47:45 +0200 Subject: [PATCH] feat: quality test for nutriscore on olive oils (#8360) Olive oil have a well defined nutriscore - spot when they do not match it. --- lib/ProductOpener/DataQualityFood.pm | 43 +++++++ lib/ProductOpener/Tags.pm | 30 +++++ taxonomies/categories.txt | 10 ++ taxonomies/data_quality.txt | 16 +++ tests/unit/dataqualityfood.t | 174 +++++++++++++++++++++++++++ 5 files changed, 273 insertions(+) diff --git a/lib/ProductOpener/DataQualityFood.pm b/lib/ProductOpener/DataQualityFood.pm index a56459859f743..f3525b2b31669 100644 --- a/lib/ProductOpener/DataQualityFood.pm +++ b/lib/ProductOpener/DataQualityFood.pm @@ -915,6 +915,49 @@ sub check_nutrition_data ($product_ref) { push @{$product_ref->{data_quality_warnings_tags}}, "en:nutrition-value-under-0-1-g-salt"; } } + + # some categories have expected nutriscore grade - push data quality error if calculated nutriscore grade differs from expected nutriscore grade or if it is not calculated + my $expected_nutriscore_grade + = get_inherited_property_from_categories_tags($product_ref, "expected_nutriscore_grade:en"); + + # we expect single letter a, b, c, d, e for nutriscore grade in the taxonomy. Case insensitive (/i). + if ((defined $expected_nutriscore_grade) and ($expected_nutriscore_grade =~ /^([a-e]){1}$/i)) { + if ( + # nutriscore not calculated but should have expected nutriscore grade + (not(defined $product_ref->{nutrition_grade_fr})) + # nutriscore calculated but unexpected nutriscore grade + or ( (defined $product_ref->{nutrition_grade_fr}) + and ($product_ref->{nutrition_grade_fr} ne $expected_nutriscore_grade)) + ) + { + push @{$product_ref->{data_quality_errors_tags}}, + "en:nutri-score-grade-from-category-does-not-match-calculated-grade"; + } + } + + # some categories have an expected ingredient - push data quality error if ingredient differs from expected ingredient + # note: we currently support only 1 expected ingredient + my $expected_ingredients = get_inherited_property_from_categories_tags($product_ref, "expected_ingredients:en"); + + if ((defined $expected_ingredients)) { + $expected_ingredients = canonicalize_taxonomy_tag("en", "ingredients", $expected_ingredients); + my $number_of_ingredients = (defined $product_ref->{ingredients}) ? @{$product_ref->{ingredients}} : 0; + + if ($number_of_ingredients == 0) { + push @{$product_ref->{data_quality_warnings_tags}}, + "en:ingredients-single-ingredient-from-category-missing"; + } + elsif ( + # more than 1 ingredient + ($number_of_ingredients > 1) + # ingredient different than expected ingredient + or not(is_a("ingredients", $product_ref->{ingredients}[0]{id}, $expected_ingredients)) + ) + { + push @{$product_ref->{data_quality_errors_tags}}, + "en:ingredients-single-ingredient-from-category-does-not-match-actual-ingredients"; + } + } } $log->debug("has_prepared_data: " . $has_prepared_data) if $log->debug(); diff --git a/lib/ProductOpener/Tags.pm b/lib/ProductOpener/Tags.pm index 0d8d149b3208f..ec47ffa4dbe5a 100644 --- a/lib/ProductOpener/Tags.pm +++ b/lib/ProductOpener/Tags.pm @@ -1339,6 +1339,36 @@ sub build_tags_taxonomy ($tagtype, $publish) { } } + elsif ($line =~ /^expected_nutriscore_grade:en:/) { + # the line should be the nutriscore grade: a, b, c, d or e + my $nutriscore_grade = $'; # everything after the matched string + + if (not($nutriscore_grade =~ /^([a-e]){1}$/i)) { + my $msg + = "expected_nutriscore_grade:en: in " + . $tagtype + . " should be followed by a single letter between a and e. expected_nutriscore_grade:en: " + . $nutriscore_grade + . " is incorrect\n"; + + $errors .= "ERROR - " . $msg; + } + } + elsif ($line =~ /^expected_ingredients:en:/) { + # the line should contain a single ingredient + my $expected_ingredients = $'; # everything after the matched string + + if ($expected_ingredients =~ /,/i) { + my $msg + = "expected_ingredients:en: in " + . $tagtype + . " should contain a single letter " + . $expected_ingredients + . " is incorrect\n"; + + $errors .= "ERROR - " . $msg; + } + } else { $log->info("unrecognized line in taxonomy", {tagtype => $tagtype, line => $line}) if $log->is_info(); } diff --git a/taxonomies/categories.txt b/taxonomies/categories.txt index 30e5c14b25aef..c39f7fd32c53d 100644 --- a/taxonomies/categories.txt +++ b/taxonomies/categories.txt @@ -36,6 +36,14 @@ stopwords:nl:bevat,en stopwords:nl_be:bevat,en stopwords:de:und,mit,von +# add following tag for category having always same nutriscore grade +# only 1 letter is allowed +# expected_nutriscore_grade:en:c +# add following tag for category having always same ingredient +# only 1 ingredient tag is allowed (use "en:olive-oil" and not "en:Olive oil") +# expected_ingredients:en: en:olive-oil + + # add following tag to ignore "Energy value in kJ does not correspond to the value calculated from the other nutrients error # only for categories having nutrients that are not displayed in the nutrition table and contributing to the energy # for example, lemon juices containing organic acid, it is forbidden to display organic acid in nutrition tables but @@ -53132,6 +53140,8 @@ agribalyse_food_code:en:17270 ciqual_food_code:en:17270 ciqual_food_name:en:Olive oil, extra virgin ciqual_food_name:fr:Huile d'olive vierge extra +expected_nutriscore_grade:en:c +expected_ingredients:en: en:olive-oil [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ], + nutrition_grade_fr => "d" +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:nutri-score-grade-from-category-does-not-match-calculated-grade', + 'Calculate nutriscore grade should be the same as the one provided in the taxonomy for this category', 1 +); +# category with expected nutriscore grade. Different nutriscore grade as compared to the expected nutriscore grade. Two specific categories +$product_ref = { + categories_tags => [ + "en:plant-based-foods-and-beverages", "en:plant-based-foods", + "en:desserts", "en:fats", + "en:frozen-foods", "en:vegetable-fats", + "en:frozen-desserts", "en:olive-tree-products", + "en:vegetable-oils", "en:ice-creams-and-sorbets", + "en:olive-oils", "en:ice-creams", + "en:ice-cream-tubs", "en:virgin-olive-oils", + "en:extra-virgin-olive-oils", "fr:glace-aux-calissons" + ], + nutrition_grade_fr => "d" +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:nutri-score-grade-from-category-does-not-match-calculated-grade', + 'Calculate nutriscore grade should be the same as the one provided in the taxonomy for this category even if some other categories tags do not have expected nutriscore grade', + 1 +); +# category with expected nutriscore grade. Not calculated (missing nutriscore grade) +$product_ref = { + categories_tags => [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:nutri-score-grade-from-category-does-not-match-calculated-grade', + 'Calculate nutriscore grade should be the same as the one provided in the taxonomy for this category', 1 +); +# category with expected nutriscore grade. Same nutriscore grade as compared to the expected nutriscore grade +$product_ref = { + categories_tags => [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ], + nutrition_grade_fr => "c" +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:nutri-score-grade-from-category-does-not-match-calculated-grade', + 'Calculate nutriscore grade should be the same as the one provided in the taxonomy for this category', 0 +); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:nutri-score-grade-from-category-does-not-match-calculated-grade', + 'Calculate nutriscore grade should be the same as the one provided in the taxonomy for this category', 0 +); + +# category with expected ingredient. Prerequisite: "expected_ingredients:en: en:olive-oil" under "en:Extra-virgin olive oils" category, in the taxonomy +# category with expected ingredient. Missing ingredients +$product_ref = { + categories_tags => [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ], + # Missing ingredients + # ingredients => [ + # {id => "en:olive-oil"} + # ] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:ingredients-single-ingredient-from-category-missing', + 'We expect the ingredient given in the taxonomy for this product', 1 +); +# category with expected ingredient. More than one ingredient +$product_ref = { + categories_tags => [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ], + ingredients => [{id => "en:extra-virgin-olive-oil"}, {id => "en:virgin-olive-oil"}] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:ingredients-single-ingredient-from-category-does-not-match-actual-ingredients', + 'We expect the ingredient given in the taxonomy for this product', 1 +); +# category with expected ingredient. Single ingredient that is a child of the expected one. +$product_ref = { + categories_tags => [ + 'en:plant-based-foods-and-beverages', 'en:plant-based-foods', + 'en:fats', 'en:vegetable-fats', + 'en:olive-tree-products', 'en:vegetable-oils', + 'en:olive-oils', 'en:virgin-olive-oils', + 'en:extra-virgin-olive-oils' + ], + ingredients => [{id => 'en:extra-virgin-olive-oil'}] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:ingredients-single-ingredient-from-category-does-not-match-actual-ingredients', + 'We expect the ingredient given in the taxonomy for this product', 0 +); +# category with expected ingredient. Single ingredient that is a child of the expected one. Two specific categories +$product_ref = { + categories_tags => [ + "en:plant-based-foods-and-beverages", "en:plant-based-foods", + "en:desserts", "en:fats", + "en:frozen-foods", "en:vegetable-fats", + "en:frozen-desserts", "en:olive-tree-products", + "en:vegetable-oils", "en:ice-creams-and-sorbets", + "en:olive-oils", "en:ice-creams", + "en:ice-cream-tubs", "en:virgin-olive-oils", + "en:extra-virgin-olive-oils", "fr:glace-aux-calissons" + ], + ingredients => [{id => 'en:extra-virgin-olive-oil'}] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:ingredients-single-ingredient-from-category-does-not-match-actual-ingredients', + 'We expect the ingredient given in the taxonomy for this product', 0 +); +# category with expected ingredient. Single ingredient identical as expected one +$product_ref = { + categories_tags => [ + "en:plant-based-foods-and-beverages", "en:plant-based-foods", + "en:desserts", "en:fats", + "en:frozen-foods", "en:vegetable-fats", + "en:frozen-desserts", "en:olive-tree-products", + "en:vegetable-oils", "en:ice-creams-and-sorbets", + "en:olive-oils", "en:ice-creams", + "en:ice-cream-tubs", "en:virgin-olive-oils", + "en:extra-virgin-olive-oils", "fr:glace-aux-calissons" + ], + ingredients => [{id => 'en:olive-oil'}] +}; +ProductOpener::DataQuality::check_quality($product_ref); +check_quality_and_test_product_has_quality_tag( + $product_ref, + 'en:ingredients-single-ingredient-from-category-does-not-match-actual-ingredients', + 'We expect the ingredient given in the taxonomy for this product', 0 +); + done_testing();