Skip to content

Commit

Permalink
feat: script to filter and generate raw categories (with translations) (
Browse files Browse the repository at this point in the history
  • Loading branch information
raphodn committed Feb 4, 2024
1 parent 15bdfdc commit 7bed8b9
Show file tree
Hide file tree
Showing 187 changed files with 346 additions and 2 deletions.
159 changes: 159 additions & 0 deletions data/categories/filter_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import json
from openfoodfacts.taxonomy import get_taxonomy


PARENT_CATEGORIES_ID = [
"en:vegetables", # 391
"en:fruits", # 287
"en:culinary-plants", # 152
"en:nuts", # 77
"en:potatoes", # 27
"en:textured-vegetable-protein", # 2
]

EXTRA_CHILDREN = [
"en:rolled-oats",
"en:ginger",
"en:mushrooms",
]

ADDITIONAL_FILTERING = [
"Cooked ",
"Fresh ",
]


def get_languages():
with open("src/i18n/data/languages.json") as f:
return json.load(f)


def get_category_taxonomy():
return get_taxonomy("category")


def get_taxonomy_node_by_id(taxonomy, node_id):
return next((node for node in taxonomy.iter_nodes() if node.id == node_id), None)


def get_taxonomy_node_list_by_id_list(taxonomy, node_id_list):
node_list = list()
for node_id in node_id_list:
taxonomy_node = get_taxonomy_node_by_id(taxonomy, node_id)
if taxonomy_node:
node_list.append(taxonomy_node)
return node_list


def taxonomy_node_list_to_dict_list(node_list, delete_parents=False):
node_dict_list = list()
for node in node_list:
node_dict = { "id": node.id, **node.to_dict() }
if delete_parents:
del node_dict["parents"]
node_dict_list.append(node_dict)
return node_dict_list


def get_taxonomy_node_children_full_list(taxonomy, node_parent):
children_node_list = list()
for node in taxonomy.iter_nodes():
node_parents = node.get_parents_hierarchy()
if next((n for n in node_parents if n == node_parent), None):
children_node_list.append(node)
return children_node_list


def filter_categories(categories, parent_categories):
"""
How to run:
> pip install openfoodfacts
> python data/categories/filter_categories.py
"""
# get child nodes of parent_categories
node_child_list = list()
for parent_node in parent_categories:
parent_node_children = get_taxonomy_node_children_full_list(categories, parent_node)
node_child_list.extend(parent_node_children)

# add extra nodes
print("Add extra nodes:", EXTRA_CHILDREN)
for id in EXTRA_CHILDREN:
node = get_taxonomy_node_by_id(categories, id)
if node:
node_child_list.append(node)

# additional filtering
print("Additional filtering on:", ADDITIONAL_FILTERING)
node_child_list_filtered = list()
for node in node_child_list:
if not node.get_localized_name("en").startswith(tuple(ADDITIONAL_FILTERING)):
node_child_list_filtered.append(node)

# remove duplicates
node_child_list_filtered_unique = list(set(node_child_list_filtered))

# keep only ids starting with "en:"
node_child_list_filtered_unique = [node for node in node_child_list_filtered_unique if node.id.startswith("en:")]

return node_child_list_filtered_unique


def write_categories_to_files(categories):
languages = get_languages()
for language in languages:
language_code = language['code']
language_categories = list()
# for each category, get translation (or default to en)
for category in categories:
language_category_name = category['name'][language_code] if (language_code in category['name']) else category['name']['en']
language_categories.append({"id": category['id'], "name": language_category_name})
# order by name
language_categories = sorted(language_categories, key=lambda x: x['name'])
# write to file
with open(f"src/data/categories/{language['code']}.json", "w") as f:
json.dump(language_categories, f, ensure_ascii=False)


def compare_new_categories_with_old_categories():
with open("src/data/category-tags.json") as f:
old_categories = json.load(f)
print("old_categories", len(old_categories))

with open("src/data/categories/en.json") as f:
new_categories = json.load(f)
print("new_categories", len(new_categories))

# check missing in new
category_missing_in_new_list = list()
for category in old_categories:
found = next((c for c in new_categories if c['id'] == category['id']), None)
if not found:
category_missing_in_new_list.append(category)
print("missing in new", len(category_missing_in_new_list))
print(category_missing_in_new_list)

# check missing in old
category_missing_in_old_list = list()
for category in old_categories:
found = next((c for c in old_categories if c['id'] == category['id']), None)
if not found:
category_missing_in_old_list.append(category)
print("missing in old", len(category_missing_in_old_list))


if __name__ == "__main__":
# init
CATEGORIES_FULL = get_category_taxonomy()
print("Total number of categories:", len(CATEGORIES_FULL))
PARENT_CATEGORIES = get_taxonomy_node_list_by_id_list(CATEGORIES_FULL, PARENT_CATEGORIES_ID)
print("Filter with the following parent categories:", PARENT_CATEGORIES)

categories_filtered = filter_categories(CATEGORIES_FULL, PARENT_CATEGORIES)
categories_filtered_to_dict_list = taxonomy_node_list_to_dict_list(list(categories_filtered), delete_parents=True)
print("Categories remaining:", len(categories_filtered_to_dict_list))

write_categories_to_files(categories_filtered_to_dict_list)
print("Wrote to language files")

# compare_new_categories_with_old_categories()
1 change: 1 addition & 0 deletions src/data/categories/aa.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ab.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/af.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ak.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/am.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/an.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ar.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/as.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/av.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ay.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/az.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ba.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/be.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bh.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bm.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/br.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/bs.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ca.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ce.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ch.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/co.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/cr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/cs.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/cu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/cv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/cy.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/da.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/de.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/dv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/dz.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ee.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/el.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/en.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/eo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/es.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/et.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/eu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fa.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ff.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fj.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/fy.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ga.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/gd.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/gl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/gn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/gu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/gv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ha.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/he.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/hi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ho.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/hr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ht.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/hu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/hy.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/hz.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ia.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/id.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ie.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ig.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ii.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ik.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/io.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/is.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/it.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/iu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ja.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/jv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ka.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ki.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kj.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kk.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/km.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ko.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ks.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ku.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/kw.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ky.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/la.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lb.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/li.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ln.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lt.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/lv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mh.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mk.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ml.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ms.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/mt.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/my.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/na.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nb.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nd.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ne.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ng.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/no.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/nv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ny.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/oc.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/oj.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/om.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/or.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/os.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/pa.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/pi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/pl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ps.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/pt.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/qu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/rm.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/rn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ro.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ru.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/rw.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sa.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sc.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sd.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/se.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sh.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/si.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sk.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sm.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/so.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sq.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ss.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/st.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/su.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sv.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/sw.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ta.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/te.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tg.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/th.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ti.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tk.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tl.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tn.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/to.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ts.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tt.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/tw.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ty.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ug.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/uk.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ur.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/uz.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/ve.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/vi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/vo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/wa.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/wo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/xh.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/yi.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/yo.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/za.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/zh.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/data/categories/zu.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/views/UserSettings.vue
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@
import { mapStores } from 'pinia'
import { useAppStore } from '../store'
import constants from '../constants'
import localeManager from "../i18n/localeManager.js"
import languageData from '../i18n/data/languages.json';
import localeManager from '../i18n/localeManager.js'
import languageData from '../i18n/data/languages.json'
export default {
Expand Down

0 comments on commit 7bed8b9

Please sign in to comment.