Skip to content

Commit

Permalink
fix: use normalized ids for untaxonomized ingredients and specific in…
Browse files Browse the repository at this point in the history
…gredients (#7131)

* fix: use normalized ids for untaxonomized ingredients and specific ingredients #7063

* fix issue with specific ingredients

* fix tests

Co-authored-by: Alex Garel <alex@garel.org>
  • Loading branch information
stephanegigandet and alexgarel committed Jul 26, 2022
1 parent ad78f9e commit d57cd90
Show file tree
Hide file tree
Showing 19 changed files with 286 additions and 161 deletions.
10 changes: 3 additions & 7 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ sub parse_specific_ingredients_from_text($$$) {

# If we found an ingredient, save it in specific_ingredients
if (defined $ingredient) {
my $ingredient_id = canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient);
my $ingredient_id = get_taxonomyid($product_lc, canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient));

$matched_text =~ s/^\s+//;

Expand Down Expand Up @@ -1271,7 +1271,7 @@ sub parse_origins_from_text($$) {

my $matched_text = $matched_ingredient_ref->{matched_text};
my $ingredient = $matched_ingredient_ref->{ingredient};
my $ingredient_id = canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient);
my $ingredient_id = get_taxonomyid($product_lc, canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient));

# Remove extra spaces
$ingredient =~ s/\s+$//;
Expand Down Expand Up @@ -2075,7 +2075,7 @@ sub parse_ingredients_text($) {
if (not $skip_ingredient) {

my %ingredient = (
id => $ingredient_id,
id => get_taxonomyid($product_ref->{lc},$ingredient_id),
text => $ingredient
);

Expand Down Expand Up @@ -5749,14 +5749,10 @@ sub extract_ingredients_classes_from_text($) {
delete $product_ref->{$field . "_next_tags" };
}




if ((defined $product_ref->{ingredients_that_may_be_from_palm_oil_n}) or (defined $product_ref->{ingredients_from_palm_oil_n})) {
$product_ref->{ingredients_from_or_that_may_be_from_palm_oil_n} = $product_ref->{ingredients_that_may_be_from_palm_oil_n} + $product_ref->{ingredients_from_palm_oil_n};
}


delete $product_ref->{with_sweeteners};
if (defined $product_ref->{'additives_tags'}) {
foreach my $additive (@{$product_ref->{'additives_tags'}}) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
"vegetarian" : "yes"
},
{
"id" : "en:some unknown ingredient",
"id" : "en:some-unknown-ingredient",
"percent_estimate" : 21.875,
"percent_max" : 50,
"percent_min" : 0,
"text" : "some unknown ingredient"
},
{
"id" : "en:another unknown ingredient",
"id" : "en:another-unknown-ingredient",
"percent_estimate" : 10.9375,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
Expand Down Expand Up @@ -74,16 +74,16 @@
"en:milk"
],
"en:palm-oil-content-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegan-status-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegetarian-status-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -94,8 +94,8 @@
"ingredients_hierarchy" : [
"en:milk",
"en:dairy",
"en:some unknown ingredient",
"en:another unknown ingredient",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient",
"en:salt",
"en:sugar",
"en:added-sugar",
Expand All @@ -113,8 +113,8 @@
],
"ingredients_original_tags" : [
"en:milk",
"en:some unknown ingredient",
"en:another unknown ingredient",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient",
"en:salt",
"en:sugar",
"en:pepper",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"ingredients" : [
{
"id" : "en:peach",
"origins" : "en:spain",
"percent_estimate" : 66.6666666666667,
"percent_max" : 100,
"percent_min" : 33.3333333333333,
"text" : "Peaches",
"vegan" : "yes",
"vegetarian" : "yes"
},
{
"id" : "en:some-unknown-ingredient",
"origins" : "en:france",
"percent_estimate" : 16.6666666666667,
"percent_max" : 50,
"percent_min" : 0,
"text" : "Some unknown ingredient"
},
{
"id" : "en:another-unknown-ingredient",
"origins" : "en:malta",
"percent_estimate" : 16.6666666666667,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
"text" : "another unknown ingredient"
}
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegan-status-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegetarian-status-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
]
},
"ingredients_analysis_tags" : [
"en:palm-oil-content-unknown",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
"ingredients_hierarchy" : [
"en:peach",
"en:fruit",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_n" : 3,
"ingredients_n_tags" : [
"3",
"1-10"
],
"ingredients_original_tags" : [
"en:peach",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
"en:peach",
"en:fruit",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_text" : "Peaches. Some unknown ingredient, another unknown ingredient.\nOrigin of peaches: Spain. Origin of some unknown ingredient: France. origin of Another Unknown Ingredient: Malta",
"ingredients_with_specified_percent_n" : 0,
"ingredients_with_specified_percent_sum" : 0,
"ingredients_with_unspecified_percent_n" : 3,
"ingredients_with_unspecified_percent_sum" : 100,
"known_ingredients_n" : 2,
"lc" : "en",
"nutriments" : {
"fruits-vegetables-nuts-estimate-from-ingredients_100g" : 33.3333333333333,
"fruits-vegetables-nuts-estimate-from-ingredients_serving" : 33.3333333333333
},
"specific_ingredients" : [
{
"id" : "en:peach",
"ingredient" : "peaches",
"origins" : "en:spain",
"text" : "Origin of peaches: Spain."
},
{
"id" : "en:some-unknown-ingredient",
"ingredient" : "some unknown ingredient",
"origins" : "en:france",
"text" : "Origin of some unknown ingredient: France."
},
{
"id" : "en:another-unknown-ingredient",
"ingredient" : "Another Unknown Ingredient",
"origins" : "en:malta",
"text" : "origin of Another Unknown Ingredient: Malta"
}
],
"unknown_ingredients_n" : 2
}
24 changes: 12 additions & 12 deletions tests/unit/expected_test_results/ingredients/en-origins-u.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"ingredients" : [
{
"id" : "en:Something",
"id" : "en:something",
"ingredients" : [
{
"id" : "en:U",
"id" : "en:u",
"percent_estimate" : 100,
"percent_max" : 100,
"percent_min" : 100,
Expand All @@ -19,16 +19,16 @@
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"en:vegan-status-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"en:vegetarian-status-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -37,17 +37,17 @@
"en:vegetarian-status-unknown"
],
"ingredients_hierarchy" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"ingredients_n" : 2,
"ingredients_n_tags" : [
"2",
"1-10"
],
"ingredients_original_tags" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"vegetarian" : "en:yes"
},
{
"id" : "en:Charcoal",
"id" : "en:charcoal",
"labels" : "en:vegan",
"percent_estimate" : 20,
"percent_max" : 50,
Expand Down Expand Up @@ -53,7 +53,7 @@
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:Charcoal"
"en:charcoal"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -63,7 +63,7 @@
],
"ingredients_hierarchy" : [
"en:e428",
"en:Charcoal",
"en:charcoal",
"en:ferment",
"en:rennet",
"en:enzyme",
Expand All @@ -77,7 +77,7 @@
],
"ingredients_original_tags" : [
"en:e428",
"en:Charcoal",
"en:charcoal",
"en:ferment",
"en:rennet",
"en:flavouring"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@
"id" : "en:egg",
"ingredients" : [
{
"id" : "fr:d'élevage au sol",
"id" : "fr:d-elevage-au-sol",
"percent_estimate" : 66.6666666666667,
"percent_max" : 100,
"percent_min" : 33.3333333333333,
"text" : "d'élevage au sol"
},
{
"id" : "fr:Suisse",
"id" : "fr:suisse",
"percent_estimate" : 16.6666666666667,
"percent_max" : 50,
"percent_min" : 0,
"text" : "Suisse"
},
{
"id" : "fr:France",
"id" : "fr:france",
"percent_estimate" : 16.6666666666667,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
Expand All @@ -38,19 +38,19 @@
"en:egg"
],
"en:palm-oil-content-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"en:vegan-status-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"en:vegetarian-status-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -60,9 +60,9 @@
],
"ingredients_hierarchy" : [
"en:egg",
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"ingredients_n" : 4,
"ingredients_n_tags" : [
Expand All @@ -71,9 +71,9 @@
],
"ingredients_original_tags" : [
"en:egg",
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
Expand Down
Loading

0 comments on commit d57cd90

Please sign in to comment.