Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Adding a Knowledge Panel for data-quality #10

Merged
merged 22 commits into from
Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions app/knowledge_panels.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import Union

from urllib.parse import urlencode

from .models import HungerGameFilter
from .models import HungerGameFilter, country_to_ISO_code
from .off import dataQuality


def hunger_game_kp(
Expand Down Expand Up @@ -39,3 +38,50 @@ def hunger_game_kp(
],
},
}


def data_quality_kp(
facet,
value: Union[str, None] = None,
country: Union[str, None] = None,
):
"""
Get data corresponding to differnet facet
"""
path = ""
description = ""
if facet == "country":
country = value
country_code = country_to_ISO_code(value=value)
url = f"https://{country_code}-en.openfoodfacts.org"
path = ""
facet = value = None
if country is not None:
country_code = country_to_ISO_code(value=country)
url = f"https://{country_code}-en.openfoodfacts.org"
path = ""
description += country
if country is None:
url = "https://world.openfoodfacts.org/"
if facet is not None:
path += facet
description += f"{facet}"
if value is not None:
path += f"/{value}"
description += f" {value}"
description = f"Data-quality issues related to {description}"
(quality_html, source_url) = dataQuality(url=url, path=path)

return {
"Quality": {
"title": "Data-quality issues",
"subtitle": f"{description}",
"source_url": f"{source_url}/data-quality",
"elements": [
{
"element_type": "text",
"text_element": quality_html,
}
],
},
}
17 changes: 12 additions & 5 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import logging
from typing import Union

from fastapi import FastAPI

from .knowledge_panels import hunger_game_kp
from .knowledge_panels import data_quality_kp, hunger_game_kp
from .models import FacetName, HungerGameFilter

app = FastAPI()
Expand All @@ -21,14 +20,22 @@ def knowledge_panel(
facet_value: Union[str, None] = None,
country: Union[str, None] = None,
):
# FacetName is the model that have list of values
# facet_value are the list of values connecting to FacetName eg:- category/beer, here beer is the value
"""
FacetName is the model that have list of values
facet_value are the list of values connecting to FacetName eg:- category/beer, here beer is the value
"""
panels = []
if facet_name in HungerGameFilter.list():
panels.append(
hunger_game_kp(
hunger_game_filter=facet_name, value=facet_value, country=country
)
)
try:
panels.append(
data_quality_kp(facet=facet_name, value=facet_value, country=country)
)
except Exception as Argument:
logging.exception("error occued while appending data-quality-kp")

return {"knowledge_panels": panels}
12 changes: 10 additions & 2 deletions app/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from enum import Enum

from pydantic import BaseModel
import pycountry


class FacetName(str, Enum):
Expand Down Expand Up @@ -38,3 +37,12 @@ class HungerGameFilter(str, Enum):
@staticmethod
def list():
return [c.value for c in HungerGameFilter]


def country_to_ISO_code(value: str):
"""
Helper function that return ISO code for country
"""
country_data = pycountry.countries.get(name=value)
country_iso_code = country_data.alpha_2
return country_iso_code.lower()
21 changes: 21 additions & 0 deletions app/off.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from urllib.parse import urljoin
import requests


def dataQuality(url, path):
"""
Helper function to return issues for data-quality
"""
source_url = urljoin(url, path)
quality_url = f"{source_url}/data-quality.json"
response_API = requests.get(quality_url)
data = response_API.json()
total_issues = data["count"]
tags = data["tags"]
html = "\n".join(
f'<li><a href="{tag["url"]}">{tag["products"]} products with {tag["name"]}</a></li>'
for tag in tags[0:3]
)
expected_html = f"<p>The total number of issues are {total_issues},here couples of issues</p><ul>{html}</ul>"
alexgarel marked this conversation as resolved.
Show resolved Hide resolved

return expected_html, source_url
Binary file modified requirements.txt
Binary file not shown.
137 changes: 137 additions & 0 deletions tests/test_knowledge_panels.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from app.main import hunger_game_kp
import requests
import app.main
from .test_utils import mock_get_factory, tidy_html


def test_hunger_game_kp_with_filter_value_and_country():
Expand Down Expand Up @@ -91,3 +94,137 @@ def test_hunger_game_kp_label_with_value():
]
}
}


def test_data_quality_kp_with_country(monkeypatch):
expected_url = "https://tr-en.openfoodfacts.org/data-quality.json"
expected_json = {
"count": 125,
"tags": [
{
"id": "en:ecoscore-production-system-no-label",
"known": 0,
"name": "ecoscore-production-system-no-label",
"products": 1407,
"url": "https://tr-en.openfoodfacts.org/data-quality/ecoscore-production-system-no-label",
},
{
"id": "en:no-packaging-data",
"known": 0,
"name": "no-packaging-data",
"products": 1358,
"url": "https://tr-en.openfoodfacts.org/data-quality/no-packaging-data",
},
{
"id": "en:ecoscore-packaging-packaging-data-missing",
"known": 0,
"name": "ecoscore-packaging-packaging-data-missing",
"products": 1341,
"url": "https://tr-en.openfoodfacts.org/data-quality/ecoscore-packaging-packaging-data-missing",
},
],
}
monkeypatch.setattr(requests, "get", mock_get_factory(expected_url, expected_json))
result = app.main.data_quality_kp(
facet="country", value="Turkey", country="Hungary"
)
first_element = result["Quality"]["elements"][0]
first_element["text_element"] = tidy_html(first_element["text_element"])
expected_text = """
<p>
The total number of issues are 125,here couples of issues
</p>
<ul>
<li>
<a href="https://tr-en.openfoodfacts.org/data-quality/ecoscore-production-system-no-label">1407 products with ecoscore-production-system-no-label</a>
</li>
<li>
<a href="https://tr-en.openfoodfacts.org/data-quality/no-packaging-data">1358 products with no-packaging-data</a>
</li>
<li>
<a href="https://tr-en.openfoodfacts.org/data-quality/ecoscore-packaging-packaging-data-missing">1341 products with ecoscore-packaging-packaging-data-missing</a>
</li>
</ul>
"""
# assert html separately to have better output in case of error
assert first_element["text_element"] == tidy_html(expected_text)
# now replace it for concision of output
first_element["text_element"] = "ok"
assert result == {
"Quality": {
"title": "Data-quality issues",
"subtitle": "Data-quality issues related to Turkey",
"source_url": "https://tr-en.openfoodfacts.org/data-quality",
"elements": [
{
"element_type": "text",
"text_element": "ok",
}
],
}
}


def test_data_quality_kp_with_all_three_values(monkeypatch):
expected_url = "https://world.openfoodfacts.org/brand/lidl/data-quality.json"
expected_json = {
"count": 173,
"tags": [
{
"id": "en:ecoscore-origins-of-ingredients-origins-are-100-percent-unknown",
"known": 0,
"name": "ecoscore-origins-of-ingredients-origins-are-100-percent-unknown",
"products": 6473,
"url": "https://world.openfoodfacts.org/brand/lidl/data-quality/ecoscore-origins-of-ingredients-origins-are-100-percent-unknown",
},
{
"id": "en:ecoscore-production-system-no-label",
"known": 0,
"name": "ecoscore-production-system-no-label",
"products": 6467,
"url": "https://world.openfoodfacts.org/brand/lidl/data-quality/ecoscore-production-system-no-label",
},
{
"id": "en:no-packaging-data",
"known": 0,
"name": "no-packaging-data",
"products": 5041,
"url": "https://world.openfoodfacts.org/brand/lidl/data-quality/no-packaging-data",
},
],
}
monkeypatch.setattr(requests, "get", mock_get_factory(expected_url, expected_json))
result = app.main.data_quality_kp(facet="brand", value="lidl")
first_element = result["Quality"]["elements"][0]
first_element["text_element"] = tidy_html(first_element["text_element"])
expected_text = """
<p>The total number of issues are 173,here couples of issues</p>
<ul>
<li>
<a href="https://world.openfoodfacts.org/brand/lidl/data-quality/ecoscore-origins-of-ingredients-origins-are-100-percent-unknown">6473 products with ecoscore-origins-of-ingredients-origins-are-100-percent-unknown</a>
</li>
<li>
<a href="https://world.openfoodfacts.org/brand/lidl/data-quality/ecoscore-production-system-no-label">6467 products with ecoscore-production-system-no-label</a>
</li>
<li>
<a href="https://world.openfoodfacts.org/brand/lidl/data-quality/no-packaging-data">5041 products with no-packaging-data</a>
</li>
</ul>
"""
# assert html separately to have better output in case of error
assert first_element["text_element"] == tidy_html(expected_text)
# now replace it for concision of output
first_element["text_element"] = "ok"
assert result == {
"Quality": {
"title": "Data-quality issues",
"subtitle": "Data-quality issues related to brand lidl",
"source_url": "https://world.openfoodfacts.org/brand/lidl/data-quality",
"elements": [
{
"element_type": "text",
"text_element": "ok",
}
],
}
}
58 changes: 10 additions & 48 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from curses import panel
from urllib import response
import json

from app.main import app, knowledge_panel
import app.main
from app.main import app
from fastapi.testclient import TestClient

from .test_utils import mock_get_factory

client = TestClient(app)


Expand All @@ -19,54 +19,16 @@ def test_hello():
def test_knowledge_panel():
response = client.get("/knowledge_panel?facet_name=origin")
assert response.status_code == 200
response_body = response.json()
assert response_body["knowledge_panels"] == []


def test_knowledge_panel_badendpoint():
response = client.get("/knowledge_panel_bad")
assert response.status_code == 404


def test_knowledge_panel_ctegory_with_value_and_country():
assert knowledge_panel(
facet_name="category", facet_value="chocolate", country="belgium"
) == {
"knowledge_panels": [
{
"hunger-game": {
"elements": [
{
"element_type": "text",
"text_element": {
"html": "<p><a href='https://hunger.openfoodfacts.org/questions?country=belgium&type=category&value_tag=chocolate'>Answer robotoff questions about chocolate category</a></p>\n"
},
}
]
}
}
]
}


def test_knowledge_panel_ctegory_with_country():
assert knowledge_panel(facet_name="category", country="india") == {
"knowledge_panels": [
{
"hunger-game": {
"elements": [
{
"element_type": "text",
"text_element": {
"html": "<p><a href='https://hunger.openfoodfacts.org/questions?country=india&type=category'>Answer robotoff questions about category</a></p>\n"
},
}
]
}
}
]
}


def test_knowledge_panel_with_allergen():
assert knowledge_panel(facet_name="allergen") == {"knowledge_panels": []}
def test_knowledge_panel_with_facet():
response = client.get(
"/knowledge_panel?facet_name=packaging&facet_value=plastic-box&country=Germany"
)
assert response.status_code == 200
assert response.json()
29 changes: 29 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from bs4 import BeautifulSoup


class MockResponse:
def __init__(self, json_content):
self.json_content = json_content

def json(self):
return self.json_content


def mock_get_factory(target_url, json_content):
"""
generate a mock to patch request.get with a json response
"""

def mock_get(url):
assert url == target_url
return MockResponse(json_content)

return mock_get


def tidy_html(html):
"""
Helper function that return pretiffy html
"""
html = BeautifulSoup(html, "html.parser").prettify()
return html.strip()