diff --git a/data/grammars/traces.lark b/data/grammars/traces.lark new file mode 100644 index 0000000000..e449919c7d --- /dev/null +++ b/data/grammars/traces.lark @@ -0,0 +1,84 @@ +// match any non-whitespace word that is not detected by other rule/terminal +// it has priority 0, lower than packaging related terminals that have priority +// 1, so it matches words that were not detected by other terminals +OTHER: /[^\s]+/ + + +// ## FR ## + +INGREDIENTS_FR.1: /\bfruits? [aà] coques?\b/ + | /\bcacahu[èe]tes?\b/ + | /\bnoix\b/ + | /\b(graines? de )?s[ée]sames?\b/ + | /\bsulfites?\b/ + | /\bc[ée]r[ée]ales?\b/ + | /\barachides?\b/ + | /\bgluten\b/ + | /\blait\b/ + | /\bsoja\b/ + | /\blupin\b/ + | /\boeufs?\b/ + | /\bœufs?\b/ + | /\bc[ée]leris?\b/ + | /\bcrustac[ée]s?\b/ + | /\bmollusques?\b/ + | /\bpoissons?\b/ + | /\bavoines?\b/ + | /\bmoutardes?\b/ + | /\bnoisettes?\b/ + | /\bpistaches?\b/ + | /\bamandes?\b/ + + +OF_FR.1: /\bdes?\b/ + | /\bd\'/ + | /\bd\b/ + | /\bdu\b/ + +POSSIBLE_FR.1: /\b[ée]ventuelles?\b/ + | /\bpossibles?\b/ + +PRODUCTED_FR.1: /\bfabriqu[ée]\b/ + | /\bélabor[ée]\b/ + +THAT_USES_FR.1: /\bqui utilise\b/ + | /\butilisant\b/ + +// Peut contenir des traces de fruits à coque, de cacahuete de sésame, de sulfites et de gluten +manufactured_in_fr: ("produit"i WS)? PRODUCTED_FR WS "dans" WS "un" WS "atelier" WS THAT_USES_FR WS? (":" WS)? trace_list_fr +can_contain_fr: ("peut"i WS "contenir" WS "des" WS)? "traces"i WS (POSSIBLE_FR? WS)? ("de"? WS? ":" WS?)? trace_list_fr +can_contain_2_fr: "peut"i WS "contenir" WS? (":" WS?)? trace_list_fr +contains_fr: "contient"i (WS "naturellement")? WS trace_list_fr + +trace_list_fr: (OF_FR WS?)? INGREDIENTS_FR (WS? ("," WS?)? (("et" WS)? (OF_FR WS?)?)? INGREDIENTS_FR)* +traces_fr: can_contain_fr | can_contain_2_fr | contains_fr | manufactured_in_fr + +// ## EN ## + +INGREDIENTS_EN.1: /\bnuts?\b/ + | /\bpeanuts?\b/ + | /\bsesame\b/ + | /\bsulphites?\b/ + | /\bgluten\b/ + | /\bmilk\b/ + | /\bsoya?\b/ + | /\bwheat\b/ + | /\beggs?\b/ + | /\bmustard\b/ + | /\bgluten\b/ + | /\bcelery\b/ + | /\bbarley\b/ + +// It may contain traces of nuts, peanuts, sesame, sulphites and gluten. +can_contain_en: ("it"i WS)? "may" WS "contain" WS ("traces" WS "of" WS)? trace_list_en +contain_en: "contains"i (WS "traces" WS "of")? (WS? ":" WS?)? trace_list_en +manufactured_in_en: "prepared"i WS "in" WS "premises" WS "where" WS "traces" WS "of" WS trace_list_en WS "are" WS "used" +trace_list_en: INGREDIENTS_EN (WS? ("," WS)? (("and" WS)? ("of" WS)?)? INGREDIENTS_EN)* +traces_en: can_contain_en | contain_en | manufactured_in_en + +traces: traces_fr | traces_en +start: (traces | junk | WS)+ +// all other words +junk: OTHER+ + +%import common.WS