-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #556 from marwoodandrew/sd-5402
[SD-5402] Category mapping for PA ingest
- Loading branch information
Showing
3 changed files
with
83 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8; -*- | ||
# | ||
# This file is part of Superdesk. | ||
# | ||
# Copyright 2013, 2014, 2015 Sourcefabric z.u. and contributors. | ||
# | ||
# For the full copyright and license information, please see the | ||
# AUTHORS and LICENSE files distributed with this source code, or | ||
# at https://www.sourcefabric.org/superdesk/license | ||
|
||
from superdesk.io.feed_parsers.nitf import NITFFeedParser | ||
from superdesk.io import register_feed_parser | ||
|
||
|
||
class PAFeedParser(NITFFeedParser): | ||
""" | ||
NITF Parser extension for Press Association, it maps the category meta tag to an anpa category | ||
""" | ||
NAME = 'pa_nitf' | ||
|
||
def _category_mapping(self, elem): | ||
""" | ||
Map the category supplied by PA to a best guess anpa_category in the system | ||
:param elem: | ||
:return: anpa category list qcode | ||
""" | ||
if elem.get('content') is not None: | ||
category = elem.get('content')[:1].upper() | ||
if category in {'S', 'R', 'F'}: | ||
return [{'qcode': 'S'}] | ||
if category == 'Z': | ||
return [{'qcode': 'V'}] | ||
return [{'qcode': 'I'}] | ||
|
||
def __init__(self): | ||
self.MAPPING = {'anpa_category': {'xpath': "head/meta/[@name='category']", 'filter': self._category_mapping}} | ||
super().__init__() | ||
|
||
register_feed_parser(PAFeedParser.NAME, PAFeedParser()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8; -*- | ||
# | ||
# This file is part of Superdesk. | ||
# | ||
# Copyright 2013, 2014, 2015 Sourcefabric z.u. and contributors. | ||
# | ||
# For the full copyright and license information, please see the | ||
# AUTHORS and LICENSE files distributed with this source code, or | ||
# at https://www.sourcefabric.org/superdesk/license | ||
|
||
import os | ||
from test_factory import SuperdeskTestCase | ||
from superdesk.io.feed_parsers.pa_nitf import PAFeedParser | ||
from superdesk.etree import etree | ||
|
||
|
||
class PANITFFileTestCase(SuperdeskTestCase): | ||
|
||
vocab = [{'_id': 'genre', 'items': [{'name': 'Current'}]}] | ||
|
||
def setUp(self): | ||
super().setUp() | ||
with self.app.app_context(): | ||
self.app.data.insert('vocabularies', self.vocab) | ||
dirname = os.path.dirname(os.path.realpath(__file__)) | ||
fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename)) | ||
provider = {'name': 'Test'} | ||
with open(fixture) as f: | ||
self.nitf = f.read() | ||
self.item = PAFeedParser().parse(etree.fromstring(self.nitf), provider) | ||
|
||
|
||
class PAFileWithNoSubjects(PANITFFileTestCase): | ||
|
||
filename = 'pa2.xml' | ||
|
||
def test_headline(self): | ||
self.assertEqual(self.item.get('headline'), '1 SOCCER INT-Teams') | ||
|
||
def test_anpa_category(self): | ||
self.assertEqual(self.item.get('anpa_category'), [{'qcode': 'S'}]) |