Skip to content

Commit

Permalink
Merge pull request #556 from marwoodandrew/sd-5402
Browse files Browse the repository at this point in the history
[SD-5402] Category mapping for PA ingest
  • Loading branch information
ioanpocol committed Sep 6, 2016
2 parents 6b47760 + 5f40bbf commit 7c77d1e
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 0 deletions.
1 change: 1 addition & 0 deletions superdesk/io/feed_parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,4 @@ class EmailFeedParser(FeedParser, metaclass=ABCMeta):
import superdesk.io.feed_parsers.afp_newsml_1_2 # NOQA
import superdesk.io.feed_parsers.scoop_newsml_2_0 # NOQA
import superdesk.io.feed_parsers.ap_anpa # NOQA
import superdesk.io.feed_parsers.pa_nitf # NOQA
40 changes: 40 additions & 0 deletions superdesk/io/feed_parsers/pa_nitf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8; -*-
#
# This file is part of Superdesk.
#
# Copyright 2013, 2014, 2015 Sourcefabric z.u. and contributors.
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

from superdesk.io.feed_parsers.nitf import NITFFeedParser
from superdesk.io import register_feed_parser


class PAFeedParser(NITFFeedParser):
"""
NITF Parser extension for Press Association, it maps the category meta tag to an anpa category
"""
NAME = 'pa_nitf'

def _category_mapping(self, elem):
"""
Map the category supplied by PA to a best guess anpa_category in the system
:param elem:
:return: anpa category list qcode
"""
if elem.get('content') is not None:
category = elem.get('content')[:1].upper()
if category in {'S', 'R', 'F'}:
return [{'qcode': 'S'}]
if category == 'Z':
return [{'qcode': 'V'}]
return [{'qcode': 'I'}]

def __init__(self):
self.MAPPING = {'anpa_category': {'xpath': "head/meta/[@name='category']", 'filter': self._category_mapping}}
super().__init__()

register_feed_parser(PAFeedParser.NAME, PAFeedParser())
42 changes: 42 additions & 0 deletions tests/io/feed_parsers/pa_nitf_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- coding: utf-8; -*-
#
# This file is part of Superdesk.
#
# Copyright 2013, 2014, 2015 Sourcefabric z.u. and contributors.
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

import os
from test_factory import SuperdeskTestCase
from superdesk.io.feed_parsers.pa_nitf import PAFeedParser
from superdesk.etree import etree


class PANITFFileTestCase(SuperdeskTestCase):

vocab = [{'_id': 'genre', 'items': [{'name': 'Current'}]}]

def setUp(self):
super().setUp()
with self.app.app_context():
self.app.data.insert('vocabularies', self.vocab)
dirname = os.path.dirname(os.path.realpath(__file__))
fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
provider = {'name': 'Test'}
with open(fixture) as f:
self.nitf = f.read()
self.item = PAFeedParser().parse(etree.fromstring(self.nitf), provider)


class PAFileWithNoSubjects(PANITFFileTestCase):

filename = 'pa2.xml'

def test_headline(self):
self.assertEqual(self.item.get('headline'), '1 SOCCER INT-Teams')

def test_anpa_category(self):
self.assertEqual(self.item.get('anpa_category'), [{'qcode': 'S'}])

0 comments on commit 7c77d1e

Please sign in to comment.