Skip to content

Commit

Permalink
change the NINJS ingest parser to use the translations of the respect…
Browse files Browse the repository at this point in the history
…ive language from the respective CV [SDESK-7169] (#2525)

* change the NINJS ingest parser to use the translations of the respective language from the respective CV [SDESK-7169]

* update types

* update logic for anpa_category and update tests

* fix mypy

* update types

* address comment

* update func to retrive translations attribute

* optimize code

* remove unused imports
  • Loading branch information
devketanpro committed Feb 21, 2024
1 parent 8b4d2b1 commit caa7480
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 8 deletions.
23 changes: 18 additions & 5 deletions superdesk/io/feed_parsers/ninjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from superdesk.io.feed_parsers import FeedParser
from superdesk.utc import utc
from superdesk.metadata.utils import generate_tag_from_url
from typing import Optional, Dict, List, Any
from superdesk import get_resource_service

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,7 +99,7 @@ def _transform_from_ninjs(self, ninjs):
item["genre"] = self._format_qcodes(ninjs["genre"])

if ninjs.get("service"):
item["anpa_category"] = self._format_qcodes(ninjs["service"])
item["anpa_category"] = self._format_qcodes(ninjs["service"], "categories")

if ninjs.get("subject"):
item["subject"] = self._format_qcodes(ninjs["subject"])
Expand Down Expand Up @@ -182,12 +184,23 @@ def parse_renditions(self, renditions):
rend[rendition_name] = parsed_rendition
return rend

def _format_qcodes(self, items):
def _format_qcodes(self, items: List[Dict[str, Any]], cv_name: Optional[str] = None) -> List[Dict[str, Any]]:
subjects = []
cv = get_resource_service("vocabularies").find_one(req=None, _id=cv_name) or {}
cv_items = {item["qcode"]: item for item in cv.get("items") or []}

for item in items:
subject = {"name": item.get("name"), "qcode": item.get("code")}
if item.get("scheme"):
subject["scheme"] = item.get("scheme")
if cv_items.get(item.get("code")):
subject = cv_items[item["code"]]
else:
subject = {
"name": item.get("name"),
"qcode": item.get("code"),
}
if not subject.get("translations") and item.get("translations"):
subject["translations"] = item["translations"]
if not subject.get("scheme") and item.get("scheme"):
subject["scheme"] = item["scheme"]
subjects.append(subject)

return subjects
Expand Down
21 changes: 20 additions & 1 deletion tests/io/feed_parsers/ninjs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,19 @@


class NINJSTestCase(TestCase):
vocab = [{"_id": "genre", "items": [{"name": "Current"}]}]
vocab = [
{"_id": "genre", "items": [{"name": "Current"}]},
{
"_id": "categories",
"items": [
{
"name": "Advisory",
"qcode": "m",
"translations": {"name": {"en": "Advisory", "fr": "Avis"}},
},
],
},
]

def setUp(self):
with self.app.app_context():
Expand Down Expand Up @@ -46,6 +58,13 @@ def test_headline(self):
self.assertEqual(self.items[0]["original_source"], "AAP")
self.assertEqual("2017-08-24T04:38:34+00:00", self.items[0]["versioncreated"].isoformat())

def test_translated_value(self):
self.assertEqual(self.items[0].get("headline"), "headline")
self.assertEqual(
self.items[0].get("anpa_category"),
[{"name": "Advisory", "qcode": "m", "translations": {"name": {"en": "Advisory", "fr": "Avis"}}}],
)


class AssociatedTestCase(NINJSTestCase):
filename = "ninjs2.json"
Expand Down
5 changes: 3 additions & 2 deletions tests/io/fixtures/ninjs1.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@
"firstcreated": "2017-08-24T04:37:19+0000",
"description_html": "<p>abstract</p>",
"slugline": "slugline",
"language": "fr",
"service": [
{
"code": "i",
"name": "International News"
"name": "Advisory",
"code": "m"
}
],
"usageterms": "Usage",
Expand Down

0 comments on commit caa7480

Please sign in to comment.