Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use normalized ids for untaxonomized ingredients and specific ingredients #7131

Merged
merged 5 commits into from
Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ sub parse_specific_ingredients_from_text($$$) {

# If we found an ingredient, save it in specific_ingredients
if (defined $ingredient) {
my $ingredient_id = canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient);
my $ingredient_id = get_taxonomyid($product_lc, canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient));

$matched_text =~ s/^\s+//;

Expand Down Expand Up @@ -1271,7 +1271,7 @@ sub parse_origins_from_text($$) {

my $matched_text = $matched_ingredient_ref->{matched_text};
my $ingredient = $matched_ingredient_ref->{ingredient};
my $ingredient_id = canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient);
my $ingredient_id = get_taxonomyid($product_lc, canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient));

# Remove extra spaces
$ingredient =~ s/\s+$//;
Expand Down Expand Up @@ -2075,7 +2075,7 @@ sub parse_ingredients_text($) {
if (not $skip_ingredient) {

my %ingredient = (
id => $ingredient_id,
id => get_taxonomyid($product_ref->{lc},$ingredient_id),
text => $ingredient
);

Expand Down Expand Up @@ -5749,14 +5749,10 @@ sub extract_ingredients_classes_from_text($) {
delete $product_ref->{$field . "_next_tags" };
}




if ((defined $product_ref->{ingredients_that_may_be_from_palm_oil_n}) or (defined $product_ref->{ingredients_from_palm_oil_n})) {
$product_ref->{ingredients_from_or_that_may_be_from_palm_oil_n} = $product_ref->{ingredients_that_may_be_from_palm_oil_n} + $product_ref->{ingredients_from_palm_oil_n};
}


delete $product_ref->{with_sweeteners};
if (defined $product_ref->{'additives_tags'}) {
foreach my $additive (@{$product_ref->{'additives_tags'}}) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
"vegetarian" : "yes"
},
{
"id" : "en:some unknown ingredient",
"id" : "en:some-unknown-ingredient",
"percent_estimate" : 21.875,
"percent_max" : 50,
"percent_min" : 0,
"text" : "some unknown ingredient"
},
{
"id" : "en:another unknown ingredient",
"id" : "en:another-unknown-ingredient",
"percent_estimate" : 10.9375,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
Expand Down Expand Up @@ -74,16 +74,16 @@
"en:milk"
],
"en:palm-oil-content-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegan-status-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegetarian-status-unknown" : [
"en:some unknown ingredient",
"en:another unknown ingredient"
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -94,8 +94,8 @@
"ingredients_hierarchy" : [
"en:milk",
"en:dairy",
"en:some unknown ingredient",
"en:another unknown ingredient",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient",
"en:salt",
"en:sugar",
"en:added-sugar",
Expand All @@ -113,8 +113,8 @@
],
"ingredients_original_tags" : [
"en:milk",
"en:some unknown ingredient",
"en:another unknown ingredient",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient",
"en:salt",
"en:sugar",
"en:pepper",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"ingredients" : [
{
"id" : "en:peach",
"origins" : "en:spain",
"percent_estimate" : 66.6666666666667,
"percent_max" : 100,
"percent_min" : 33.3333333333333,
"text" : "Peaches",
"vegan" : "yes",
"vegetarian" : "yes"
},
{
"id" : "en:some-unknown-ingredient",
"origins" : "en:france",
"percent_estimate" : 16.6666666666667,
"percent_max" : 50,
"percent_min" : 0,
"text" : "Some unknown ingredient"
},
{
"id" : "en:another-unknown-ingredient",
"origins" : "en:malta",
"percent_estimate" : 16.6666666666667,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
"text" : "another unknown ingredient"
}
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegan-status-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"en:vegetarian-status-unknown" : [
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
]
},
"ingredients_analysis_tags" : [
"en:palm-oil-content-unknown",
"en:vegan-status-unknown",
"en:vegetarian-status-unknown"
],
"ingredients_hierarchy" : [
"en:peach",
"en:fruit",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_n" : 3,
"ingredients_n_tags" : [
"3",
"1-10"
],
"ingredients_original_tags" : [
"en:peach",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
"en:peach",
"en:fruit",
"en:some-unknown-ingredient",
"en:another-unknown-ingredient"
],
"ingredients_text" : "Peaches. Some unknown ingredient, another unknown ingredient.\nOrigin of peaches: Spain. Origin of some unknown ingredient: France. origin of Another Unknown Ingredient: Malta",
"ingredients_with_specified_percent_n" : 0,
"ingredients_with_specified_percent_sum" : 0,
"ingredients_with_unspecified_percent_n" : 3,
"ingredients_with_unspecified_percent_sum" : 100,
"known_ingredients_n" : 2,
"lc" : "en",
"nutriments" : {
"fruits-vegetables-nuts-estimate-from-ingredients_100g" : 33.3333333333333,
"fruits-vegetables-nuts-estimate-from-ingredients_serving" : 33.3333333333333
},
"specific_ingredients" : [
{
"id" : "en:peach",
"ingredient" : "peaches",
"origins" : "en:spain",
"text" : "Origin of peaches: Spain."
},
{
"id" : "en:some-unknown-ingredient",
"ingredient" : "some unknown ingredient",
"origins" : "en:france",
"text" : "Origin of some unknown ingredient: France."
},
{
"id" : "en:another-unknown-ingredient",
"ingredient" : "Another Unknown Ingredient",
"origins" : "en:malta",
"text" : "origin of Another Unknown Ingredient: Malta"
}
],
"unknown_ingredients_n" : 2
}
24 changes: 12 additions & 12 deletions tests/unit/expected_test_results/ingredients/en-origins-u.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"ingredients" : [
{
"id" : "en:Something",
"id" : "en:something",
"ingredients" : [
{
"id" : "en:U",
"id" : "en:u",
"percent_estimate" : 100,
"percent_max" : 100,
"percent_min" : 100,
Expand All @@ -19,16 +19,16 @@
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"en:vegan-status-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"en:vegetarian-status-unknown" : [
"en:Something",
"en:U"
"en:something",
"en:u"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -37,17 +37,17 @@
"en:vegetarian-status-unknown"
],
"ingredients_hierarchy" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"ingredients_n" : 2,
"ingredients_n_tags" : [
"2",
"1-10"
],
"ingredients_original_tags" : [
"en:Something",
"en:U"
"en:something",
"en:u"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"vegetarian" : "en:yes"
},
{
"id" : "en:Charcoal",
"id" : "en:charcoal",
"labels" : "en:vegan",
"percent_estimate" : 20,
"percent_max" : 50,
Expand Down Expand Up @@ -53,7 +53,7 @@
],
"ingredients_analysis" : {
"en:palm-oil-content-unknown" : [
"en:Charcoal"
"en:charcoal"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -63,7 +63,7 @@
],
"ingredients_hierarchy" : [
"en:e428",
"en:Charcoal",
"en:charcoal",
"en:ferment",
"en:rennet",
"en:enzyme",
Expand All @@ -77,7 +77,7 @@
],
"ingredients_original_tags" : [
"en:e428",
"en:Charcoal",
"en:charcoal",
"en:ferment",
"en:rennet",
"en:flavouring"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@
"id" : "en:egg",
"ingredients" : [
{
"id" : "fr:d'élevage au sol",
"id" : "fr:d-elevage-au-sol",
"percent_estimate" : 66.6666666666667,
"percent_max" : 100,
"percent_min" : 33.3333333333333,
"text" : "d'élevage au sol"
},
{
"id" : "fr:Suisse",
"id" : "fr:suisse",
"percent_estimate" : 16.6666666666667,
"percent_max" : 50,
"percent_min" : 0,
"text" : "Suisse"
},
{
"id" : "fr:France",
"id" : "fr:france",
"percent_estimate" : 16.6666666666667,
"percent_max" : 33.3333333333333,
"percent_min" : 0,
Expand All @@ -38,19 +38,19 @@
"en:egg"
],
"en:palm-oil-content-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"en:vegan-status-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"en:vegetarian-status-unknown" : [
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
]
},
"ingredients_analysis_tags" : [
Expand All @@ -60,9 +60,9 @@
],
"ingredients_hierarchy" : [
"en:egg",
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"ingredients_n" : 4,
"ingredients_n_tags" : [
Expand All @@ -71,9 +71,9 @@
],
"ingredients_original_tags" : [
"en:egg",
"fr:d'élevage au sol",
"fr:Suisse",
"fr:France"
"fr:d-elevage-au-sol",
"fr:suisse",
"fr:france"
],
"ingredients_percent_analysis" : 1,
"ingredients_tags" : [
Expand Down
Loading