From 39a7ae2dbbef1a6c39158d5f4791e5af79763a94 Mon Sep 17 00:00:00 2001 From: moon-rabbitOFF <34795011+moon-rabbitOFF@users.noreply.github.com> Date: Thu, 16 Nov 2023 20:29:10 +0100 Subject: [PATCH] feat: adding German plurals to the oils in Ingredients.pm (#9304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adding German * Update Ingredients.pm * refactor to improve support for German * fix typos * fixes * Update lib/ProductOpener/Ingredients.pm Co-authored-by: benbenben2 <110821832+benbenben2@users.noreply.github.com> * fixes and tests --------- Co-authored-by: Stéphane Gigandet Co-authored-by: benbenben2 <110821832+benbenben2@users.noreply.github.com> --- lib/ProductOpener/Ingredients.pm | 260 ++++++++++++------ stop_words.txt | 4 + taxonomies/labels.txt | 4 +- .../ingredients/en-category-types.json | 107 +++++++ tests/unit/ingredients.t | 8 + tests/unit/ingredients_parsing.t | 10 +- 6 files changed, 301 insertions(+), 92 deletions(-) create mode 100644 tests/unit/expected_test_results/ingredients/en-category-types.json diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index a69cc02805e57..2641866854b64 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -3965,8 +3965,7 @@ sub normalize_fr_a_de_b ($a, $b) { } } -=head2 normalize_a_of_b ($lc, $a, $b, $of_bool) - +=head2 normalize_a_of_b ( $lc, $a, $b, $of_bool, $alternate_names_ref ) This function is called by normalize_enumeration() @@ -3991,6 +3990,15 @@ string, category as defined in %ingredients_categories_and_types, example: 'oil' string, type as defined in %ingredients_categories_and_types, example: 'sunflower' or 'olive' or 'palm' for 'oil (sunflower, olive and palm)' +=head4 $of_bool - indicate if we want to construct entries like " of " + +e.g. in French we combine "huile" and "olive" to "huile d'olive" +but we combine "poivron" and "rouge" to "poivron rouge". + +=head4 $alternate_names_ref + +Reference to an array of alternate names for the category + =head3 Return value =head4 combined $a and $b (or $b and $a, depending of the language), that is expected to be an ingredient @@ -3999,36 +4007,66 @@ string, comma-joined category and type, example: 'palm vegetal oil' or 'sunflowe =cut -sub normalize_a_of_b ($lc, $a, $b, $of_bool) { +sub normalize_a_of_b ($lc, $a, $b, $of_bool, $alternate_names_ref = undef) { $a =~ s/\s+$//; $b =~ s/^\s+//; + my $a_of_b; + if (($lc eq "en") or ($lc eq "hr")) { - return $b . " " . $a; + $a_of_b = $b . " " . $a; } elsif ($lc eq "es") { - return $a . " de " . $b; + $a_of_b = $a . " de " . $b; } elsif ($lc eq "fr") { $b =~ s/^(de |d')//; if (($b =~ /^(a|e|i|o|u|y|h)/i) && ($of_bool == 1)) { - return $a . " d'" . $b; + $a_of_b = $a . " d'" . $b; } elsif ($of_bool == 1) { - return $a . " de " . $b; + $a_of_b = $a . " de " . $b; } else { - return $a . " " . $b; + $a_of_b = $a . " " . $b; } } - elsif (($lc eq "pl") or ($lc eq "ru")) { - return $a . " " . $b; + elsif (($lc eq "de") or ($lc eq "ru") or ($lc eq "pl")) { + $a_of_b = $a . " " . $b; + } + else { + die("unsupported language in normalize_a_of_b: $lc, $a, $b"); + } + + # If we have alternate categories, check if $a_of_b is an existing taxonomy entry, + # otherwise check if we have entries with one of the alternate categories + + if (defined $alternate_names_ref) { + + my $name_exists; + canonicalize_taxonomy_tag($lc, "ingredients", $a_of_b, \$name_exists); + + if (not $name_exists) { + foreach my $alternate_name (@{$alternate_names_ref}) { + my $alternate_name_copy + = $alternate_name; # make a copy so that we can modify it without changing the array entry + $alternate_name_copy =~ s//$b/; + my $alternate_name_exists; + canonicalize_taxonomy_tag($lc, "ingredients", $alternate_name_copy, \$alternate_name_exists); + if ($alternate_name_exists) { + $a_of_b = $alternate_name_copy; + last; + } + } + } } + + return $a_of_b; } -=head2 normalize_enumeration ($lc, $category, $types, $of_bool) +=head2 normalize_enumeration ($lc, $category, $types, $of_bool, $alternate_names_ref = undef) This function is called by develop_ingredients_categories_and_types() @@ -4062,7 +4100,7 @@ string, comma-joined category with all elements of the types, example: 'sunflowe =cut -sub normalize_enumeration ($lc, $category, $types, $of_bool) { +sub normalize_enumeration ($lc, $category, $types, $of_bool, $alternate_names_ref = undef) { $log->debug("normalize_enumeration", {category => $category, types => $types}) if $log->is_debug(); # If there is a trailing space, save it and output it @@ -4076,7 +4114,8 @@ sub normalize_enumeration ($lc, $category, $types, $of_bool) { my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $types); - return join(", ", map {normalize_a_of_b($lc, $category, $_, $of_bool)} @list) . $trailing_space; + return + join(", ", map {normalize_a_of_b($lc, $category, $_, $of_bool, $alternate_names_ref)} @list) . $trailing_space; } # iodure et hydroxide de potassium @@ -5066,23 +5105,60 @@ This function lists each individual ingredient: =cut -# simple plural (just an additional "s" at the end) will be added in the regexp +=head3 %ingredients_categories_and_types + +For each language, we list the categories and types of ingredients that can be combined when the ingredient list +contains something like " (, and )" + +We can also provide a list of alternate_names, so that we can have a category like "oils and fats" and generate +entries like "sunflower oil", "cocoa fat" when the ingredients list contains "oils and fats (sunflower, cocoa)". + +Alternate names need to contain "" which will be replaced by the type. + +This can be especially useful in languages like German where we can create compound words with the type and the category* +like "Kokosnussöl" or "Sonnenblumenfett": + + de => [ + { + categories => ["pflanzliches Fett", "pflanzliche Öle", "pflanzliche Öle und Fette", "Fett", "Öle"], + types => ["Kokosnuss", "Palm", "Palmkern", "Raps", "Shea", "Sonnenblumen",], + # Kokosnussöl, Sonnenblumenfett + alternate_names => ["fett", "öl"], + }, + ], + +Simple plural (just an additional "s" at the end) will be added in the regexp. + +Note that a " ([list of types])" enumeration will be developed only if all the types can be matched +to the specified types in ingredients_categories_and_types. + +=cut + my %ingredients_categories_and_types = ( en => [ # oils - [ + { # categories - ["oil", "vegetable oil", "vegetal oil",], + categories => ["oil", "vegetable oil", "vegetal oil",], # types - ["colza", "olive", "palm", "rapeseed", "sunflower",], - ], + types => ["colza", "olive", "palm", "rapeseed", "sunflower",], + }, + ], + + de => [ + { + categories => ["pflanzliches Fett", "pflanzliche Öle", "pflanzliche Öle und Fette", "Fett", "Öle"], + types => ["Kokosnuss", "Palm", "Palmkern", "Raps", "Shea", "Sonnenblumen",], + # Kokosnussöl, Sonnenblumenfett + alternate_names => ["fett", "öl"], + }, ], fr => [ # huiles - [ - [ + { + categories => [ "huile", "huile végétale", "huiles végétales", @@ -5094,7 +5170,7 @@ my %ingredients_categories_and_types = ( "graisse végétale", "graisses végétales", ], - [ + types => [ "arachide", "avocat", "chanvre", "coco", "colza", "illipe", "karité", "lin", "mangue", "noisette", "noix", "noyaux de mangue", @@ -5103,28 +5179,31 @@ my %ingredients_categories_and_types = ( "sal", "sésame", "soja", "tournesol", "tournesol oléique", ] - ], + }, # (natural) extract - [ - ["extrait", "extrait naturel",], - [ + { + categories => ["extrait", "extrait naturel",], + types => [ "café", "chicorée", "curcuma", "houblon", "levure", "malt", "muscade", "poivre", "poivre noir", "romarin", "thé", "thé vert", "thym", ] - ], + }, # lecithin - [["lécithine",], ["colza", "soja", "soja sans ogm", "tournesol",]], + { + categories => ["lécithine",], + types => ["colza", "soja", "soja sans ogm", "tournesol",] + }, # natural flavouring - [ - [ + { + categories => [ "arôme naturel", "arômes naturels", "arôme artificiel", "arômes artificiels", "arômes naturels et artificiels", "arômes", ], - [ + types => [ "abricot", "ail", "amande", "amande amère", "agrumes", "aneth", "boeuf", "cacao", "cannelle", "caramel", "carotte", "carthame", @@ -5144,103 +5223,97 @@ my %ingredients_categories_and_types = ( "sauge", "saumon", "sureau", "thé", "thym", "vanille", "vanille de Madagascar", "autres agrumes", ] - ], + }, # chemical substances - [ - [ + { + categories => [ "carbonate", "carbonates acides", "chlorure", "citrate", "iodure", "nitrate", "diphosphate", "diphosphate", "phosphate", "sélénite", "sulfate", "hydroxyde", "sulphate", ], - [ + types => [ "aluminium", "ammonium", "calcium", "cuivre", "fer", "magnésium", "manganèse", "potassium", "sodium", "zinc", ] - ], + }, # peppers - [["piment", "poivron"], ["vert", "jaune", "rouge",], 0,], + {categories => ["piment", "poivron"], types => ["vert", "jaune", "rouge",], of_bool => 0,}, ], lt => [ #oils - [ - # categories - ["aliejai", "augaliniai aliejai",], - # types - ["palmių", "rapsų", "saulėgrąžų",], - ], + { + categories => ["aliejai", "augaliniai aliejai",], + types => ["palmių", "rapsų", "saulėgrąžų",], + }, ], hr => [ # malts - [ - # categories - ["slad",], - # types - ["ječmeni", "pšenični",] - ], + { + categories => ["slad",], + types => ["ječmeni", "pšenični",] + }, ], pl => [ # oils and fats - [ - # categories - ["olej", "olej roślinny", "oleje", "oleje roślinne", "tłuszcze", "tłuszcze roślinne", "tłuszcz roślinny",], - # types - [ + { + categories => [ + "olej", + "olej roślinny", + "oleje", + "oleje roślinne", + "tłuszcze", + "tłuszcze roślinne", + "tłuszcz roślinny", + ], + types => [ "rzepakowy", "z oliwek", "palmowy", "słonecznikowy", "kokosowy", "sojowy", "shea", "palmowy utwardzony", "palmowy nieutwardzony", ], - ], + }, # concentrates - [ - # categories - [ + { + categories => [ "koncentraty", "koncentraty roślinne", "soki z zagęszczonych soków z", "soki owocowe", "przeciery", "przeciery z", "soki owocowe z zagęszczonych soków owocowych", ], - # types - [ + types => [ "jabłek", "pomarańczy", "marchwi", "bananów", "brzoskwiń", "gujawy", "papai", "ananasów", "mango", "marakui", "liczi", "kiwi", "limonek", "jabłkowy", "marchwiowy", "bananowy", "pomarańczowy" ], - ], + }, # flours - [ - # categories - ["mąki", "mąka"], - # types - [ + { + categories => ["mąki", "mąka"], + types => [ "pszenna", "kukurydziana", "ryżowa", "pszenna pełnoziarnista", "orkiszowa", "żytnia", "jęczmienna", "owsiana", "jaglana", "gryczana", ], - ], + }, #meat - [ - # categories - ["mięso", "mięsa"], - # types - ["wieprzowe", "wołowe", "drobiowe", "z kurczaka", "z indyka", "cielęce"], - ], + { + categories => ["mięso", "mięsa"], + types => ["wieprzowe", "wołowe", "drobiowe", "z kurczaka", "z indyka", "cielęce"], + }, ], ru => [ # oils - [ - # categories - ["масло", "масло растительное",], - # types - [ + { + categories => ["масло", "масло растительное",], + types => [ "Подсолнечное", "Пальмовое", "Рапсовое", "Кокосовое", "горчицы", "Соевое", "Пальмоядровое", "Оливковое", "пальм", ], - ], + }, ], ); @@ -5256,7 +5329,7 @@ sub develop_ingredients_categories_and_types ($ingredients_lc, $text) { foreach my $categories_and_types_ref (@{$ingredients_categories_and_types{$ingredients_lc}}) { my $category_regexp = ""; - foreach my $category (@{$categories_and_types_ref->[0]}) { + foreach my $category (@{$categories_and_types_ref->{categories}}) { $category_regexp .= '|' . $category . '|' . $category . 's'; my $unaccented_category = unac_string_perl($category); if ($unaccented_category ne $category) { @@ -5278,7 +5351,7 @@ sub develop_ingredients_categories_and_types ($ingredients_lc, $text) { } my $type_regexp = ""; - foreach my $type (@{$categories_and_types_ref->[1]}) { + foreach my $type (@{$categories_and_types_ref->{types}}) { $type_regexp .= '|' . $type . '|' . $type . 's'; my $unaccented_type = unac_string_perl($type); if ($unaccented_type ne $type) { @@ -5288,8 +5361,8 @@ sub develop_ingredients_categories_and_types ($ingredients_lc, $text) { $type_regexp =~ s/^\|//; my $of_bool = 1; - if (defined $categories_and_types_ref->[2]) { - $of_bool = $categories_and_types_ref->[2]; + if (defined $categories_and_types_ref->{of_bool}) { + $of_bool = $categories_and_types_ref->{of_bool}; } # arôme naturel de citron-citron vert et d'autres agrumes @@ -5314,24 +5387,25 @@ sub develop_ingredients_categories_and_types ($ingredients_lc, $text) { } if ( ($ingredients_lc eq "en") + or ($ingredients_lc eq "de") or ($ingredients_lc eq "hr") or ($ingredients_lc eq "ru") or ($ingredients_lc eq "pl")) { # vegetable oil (palm, sunflower and olive) $text - =~ s/($category_regexp)(?::|\(|\[| | $of )+((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, |$and|$of|$and_of|$and_or)+)+($type_regexp)($symbols_regexp|\s)*)\b(\s?(\)|\]))?/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)(?::|\(|\[| | $of )+((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, |$and|$of|$and_of|$and_or)+)+($type_regexp)($symbols_regexp|\s)*)\b(\s?(\)|\]))?/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; # vegetable oil (palm) $text - =~ s/($category_regexp)\s?(?:\(|\[)\s?($type_regexp)\b(\s?(\)|\]))/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)\s?(?:\(|\[)\s?($type_regexp)\b(\s?(\)|\]))/normalize_enumeration($ingredients_lc,$1,$2,$of_bool,$categories_and_types_ref->{alternate_names})/ieg; # vegetable oil: palm $text - =~ s/($category_regexp)\s?(?::)\s?($type_regexp)(?=$separators|.|$)/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)\s?(?::)\s?($type_regexp)(?=$separators|.|$)/normalize_enumeration($ingredients_lc,$1,$2,$of_bool,$categories_and_types_ref->{alternate_names})/ieg; # ječmeni i pšenični slad (barley and wheat malt) $text - =~ s/((?:(?:$type_regexp)(?: |\/| \/ | - |,|, |$and|$of|$and_of|$and_or)+)+(?:$type_regexp))\s*($category_regexp)/normalize_enumeration($ingredients_lc,$2,$1,$of_bool)/ieg; + =~ s/((?:(?:$type_regexp)(?: |\/| \/ | - |,|, |$and|$of|$and_of|$and_or)+)+(?:$type_regexp))\s*($category_regexp)/normalize_enumeration($ingredients_lc,$2,$1,$of_bool,$categories_and_types_ref->{alternate_names})/ieg; } elsif ($ingredients_lc eq "fr") { # arôme naturel de pomme avec d'autres âromes @@ -5340,21 +5414,27 @@ sub develop_ingredients_categories_and_types ($ingredients_lc, $text) { $text =~ s/($category_regexp) et ($category_regexp)(?:$of)?($type_regexp)/normalize_fr_a_et_b_de_c($1, $2, $3)/ieg; - # Huiles végétales de palme, de colza et de tournesol # Carbonate de magnésium, fer élémentaire -> should not trigger carbonate de fer élémentaire. Bug #3838 # TODO 18/07/2020 remove when we have a better solution $text =~ s/fer (é|e)l(é|e)mentaire/fer_élémentaire/ig; + # $text =~ s/($category_regexp)(?::|\(|\[| | de | d')+((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, | et | de | et de | et d'| d')+)+($type_regexp)($symbols_regexp|\s)*)\b(\s?(\)|\]))?/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; + # Huiles végétales de palme, de colza et de tournesol $text - =~ s/($category_regexp)(?::|\(|\[| | de | d')+((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, | et | de | et de | et d'| d')+)+($type_regexp)($symbols_regexp|\s)*)\b(\s?(\)|\]))?/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)(?::| | de | d')+((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, | et | de | et de | et d'| d')+)+($type_regexp)($symbols_regexp|\s)*)\b/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; + + # Huiles végétales (palme, colza et tournesol) + $text + =~ s/($category_regexp)(?:\(|\[)(?:de |d')?((($type_regexp)($symbols_regexp|\s)*( |\/| \/ | - |,|, | et | de | et de | et d'| d')+)+($type_regexp)($symbols_regexp|\s)*)\b(\s?(\)|\]))/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; + $text =~ s/fer_élémentaire/fer élémentaire/ig; # huile végétale (colza) $text - =~ s/($category_regexp)\s?(?:\(|\[)\s?($type_regexp)\b(\s?(\)|\]))/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)\s?(?:\(|\[)\s?($type_regexp)\b(\s?(\)|\]))/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; # huile végétale : colza, $text - =~ s/($category_regexp)\s?(?::)\s?($type_regexp)(?=$separators|.|$)/normalize_enumeration($ingredients_lc,$1,$2,$of_bool)/ieg; + =~ s/($category_regexp)\s?(?::)\s?($type_regexp)(?=$separators|.|$)/normalize_enumeration($ingredients_lc,$1,$2,$of_bool, $categories_and_types_ref->{alternate_names})/ieg; } } diff --git a/stop_words.txt b/stop_words.txt index 5b1419f6fec65..2dac8524223a8 100644 --- a/stop_words.txt +++ b/stop_words.txt @@ -126,6 +126,7 @@ JSON jsonp kcal kJ +Kokosnussöl l'acérola lang lc @@ -177,6 +178,7 @@ PNG PNNS po poire +poivron porc Porc poudre @@ -200,6 +202,8 @@ scrypt Scrypt sirop slad +Sonnenblumen +Sonnenblumenfett sprintf ssconvert stabilisant diff --git a/taxonomies/labels.txt b/taxonomies/labels.txt index bf1c049f76556..dd6e9b7549567 100644 --- a/taxonomies/labels.txt +++ b/taxonomies/labels.txt @@ -19202,10 +19202,12 @@ origins:en: en:france fr:Blé français, Blé exclusivement cultivé en France, Blé Français sélectionné, Blé cultivé en France, 100% blé de France, Blé 100% Français, Blé de France pt:Trigo francês, trigo de França, trigo da França origins:en: en:france +ingredients:en: en:wheat -fr:Farine de blé français +fr:Farine de blé français, farine de blé de France, farine blé de France, farine de blé française pt:Farinha de trigo francesa, farinha de trigo de França, farinha de trigo da França origins:en: en:france +ingredients:en: en:wheat-flour fr:Soja Français, Soja de France, Filière Soja Français pt:Soja francêsa, soja de França, soja da França diff --git a/tests/unit/expected_test_results/ingredients/en-category-types.json b/tests/unit/expected_test_results/ingredients/en-category-types.json new file mode 100644 index 0000000000000..6a5c3e9b6347b --- /dev/null +++ b/tests/unit/expected_test_results/ingredients/en-category-types.json @@ -0,0 +1,107 @@ +{ + "ingredients" : [ + { + "from_palm_oil" : "no", + "id" : "en:rapeseed-oil", + "percent_estimate" : 62.5, + "percent_max" : 100, + "percent_min" : 25, + "text" : "Rapsöl", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "from_palm_oil" : "yes", + "id" : "en:palm-fat", + "percent_estimate" : 18.75, + "percent_max" : 50, + "percent_min" : 0, + "text" : "Palmfett", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "from_palm_oil" : "no", + "id" : "en:shea-butter", + "percent_estimate" : 9.375, + "percent_max" : 33.3333333333333, + "percent_min" : 0, + "text" : "Sheafett", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "from_palm_oil" : "no", + "id" : "en:sunflower-fat", + "percent_estimate" : 9.375, + "percent_max" : 25, + "percent_min" : 0, + "text" : "Sonnenblumenfett", + "vegan" : "yes", + "vegetarian" : "yes" + } + ], + "ingredients_analysis" : { + "en:palm-oil" : [ + "en:palm-fat" + ] + }, + "ingredients_analysis_tags" : [ + "en:palm-oil", + "en:vegan", + "en:vegetarian" + ], + "ingredients_hierarchy" : [ + "en:rapeseed-oil", + "en:oil-and-fat", + "en:vegetable-oil-and-fat", + "en:palm-fat", + "en:palm-oil-and-fat", + "en:shea-butter", + "en:vegetable-fat", + "en:sunflower-fat" + ], + "ingredients_n" : 4, + "ingredients_n_tags" : [ + "4", + "1-10" + ], + "ingredients_original_tags" : [ + "en:rapeseed-oil", + "en:palm-fat", + "en:shea-butter", + "en:sunflower-fat" + ], + "ingredients_percent_analysis" : 1, + "ingredients_tags" : [ + "en:rapeseed-oil", + "en:oil-and-fat", + "en:vegetable-oil-and-fat", + "en:palm-fat", + "en:palm-oil-and-fat", + "en:shea-butter", + "en:vegetable-fat", + "en:sunflower-fat" + ], + "ingredients_text" : "pflanzliche Öle und Fette (Raps, Palm, Shea, Sonnenblumen)", + "ingredients_with_specified_percent_n" : 0, + "ingredients_with_specified_percent_sum" : 0, + "ingredients_with_unspecified_percent_n" : 4, + "ingredients_with_unspecified_percent_sum" : 100, + "ingredients_without_ciqual_codes" : [ + "en:palm-fat", + "en:rapeseed-oil", + "en:shea-butter", + "en:sunflower-fat" + ], + "ingredients_without_ciqual_codes_n" : 4, + "known_ingredients_n" : 8, + "lc" : "de", + "nutriments" : { + "fruits-vegetables-legumes-estimate-from-ingredients_100g" : 0, + "fruits-vegetables-legumes-estimate-from-ingredients_serving" : 0, + "fruits-vegetables-nuts-estimate-from-ingredients_100g" : 62.5, + "fruits-vegetables-nuts-estimate-from-ingredients_serving" : 62.5 + }, + "unknown_ingredients_n" : 0 +} diff --git a/tests/unit/ingredients.t b/tests/unit/ingredients.t index dee6a7c861a4c..1eb1820771b28 100755 --- a/tests/unit/ingredients.t +++ b/tests/unit/ingredients.t @@ -703,6 +703,14 @@ puffed orange and caramelized unknown_fruit4.", "fruits (apple, banana and dried cherry), vegetables (pitted avocado, peeled black radish).", } ], + # category / types enumeration + [ + "en-category-types", + { + lc => "de", + ingredients_text => "pflanzliche Öle und Fette (Raps, Palm, Shea, Sonnenblumen)", + } + ], [ "fr-viande-de-boeuf-issue-d-animaux-nourris-sans-ogm", { diff --git a/tests/unit/ingredients_parsing.t b/tests/unit/ingredients_parsing.t index b882b39df5943..c1b8777200a73 100755 --- a/tests/unit/ingredients_parsing.t +++ b/tests/unit/ingredients_parsing.t @@ -326,7 +326,7 @@ my @lists = ( [ "fr", "huiles* (tournesol*, olive vierge extra), sel marin. *issus de l'agriculture biologique.", - "huiles Bio de tournesol Bio, huiles Bio d'olive vierge extra), sel marin." + "huiles Bio de tournesol Bio, huiles Bio d'olive vierge extra, sel marin." ], ["fr", "riz de Camargue (1), sel. (1): IGP : Indication Géographique Protégée.", "riz de Camargue IGP, sel."], [ @@ -628,6 +628,12 @@ my @lists = ( ["sk", "syr, E470 a E470a, mlieko.", "syr, e470, e470a, mlieko."], # normalize category and types ["fr", "Piments (vert, rouge, jaune)", "Piments vert, Piments rouge, Piments jaune"], + # New feature: + ["de", "pflanzliches Fett (Kokosnuss, Palmkern)", "Kokosnussfett, Palmkernfett"], + [ + "de", "pflanzliche Öle und Fette (Raps, Palm, Shea, Sonnenblumen)", + "Rapsöl, Palmfett, Sheafett, Sonnenblumenfett" + ], [ "fr", "Huiles végétales de palme, de colza et de tournesol", @@ -642,6 +648,8 @@ my @lists = ( ["en", "Vegetal oil (sunflower, olive and palm)", "sunflower vegetal oil, olive vegetal oil, palm vegetal oil"], ["en", "vegetable oil (palm)", "palm vegetable oil"], ["en", "vegetable oil: palm", "palm vegetable oil"], + # Should not develop the enumeration if it contains unknown types (like "sel" here) + ["fr", "Piments (vert, rouge, jaune, sel)", "Piments (vert, rouge, jaune, sel)"], );