Skip to content

Commit

Permalink
doc(ingest): add initial ingest docs
Browse files Browse the repository at this point in the history
  • Loading branch information
petrjasek committed Oct 5, 2016
1 parent 28ae1a4 commit b2d1639
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 35 deletions.
7 changes: 0 additions & 7 deletions docs/apps.publish.formatters.newsml_g2_formatter.rst

This file was deleted.

1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Contents:
.. toctree::
:maxdepth: 3

ingest
publish
cache

Expand Down
86 changes: 86 additions & 0 deletions docs/ingest.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
Ingest
======

.. module:: superdesk.io

With ingest you can import content into Superdesk. It supports multiple
formats and ways of delivery.

Ingest is running using celery, an update is triggered every 30s.

.. autofunction:: update_ingest

It iterates over all providers and check if provider is not closed, and
then checks ``last_updated`` time and schedule to realise if provider should
be updated now or later. If now it runs another celery task for each so it
can execute multiple updates in parallel.

.. autofunction:: update_provider

Once provider is updated, ``last_updated`` time is updated and it will ignore
that provider for some time according to ``schedule``.

Ingest Provider
---------------

Ingest provider specifies configuration for single ingest channel.

.. autoclass :: IngestProviderResource
Feeding Services
----------------

Handle transport protocols when ingesting.

.. module:: superdesk.io.feeding_services

.. autoclass:: EmailFeedingService

.. autoclass:: FileFeedingService

.. autoclass:: FTPFeedingService

.. autoclass:: HTTPFeedingService

.. autoclass:: RSSFeedingService

Add new Service
^^^^^^^^^^^^^^^

.. autofunction:: superdesk.io.register_feeding_service

Feed Parsers
------------

Parse items from services.

.. module:: superdesk.io.feed_parsers

.. autoclass:: ANPAFeedParser

.. autoclass:: IPTC7901FeedParser

.. autoclass:: NewsMLOneFeedParser

.. autoclass:: NewsMLTwoFeedParser

.. autoclass:: NITFFeedParser

.. autoclass:: EMailRFC822FeedParser

.. autoclass:: WENNFeedParser

.. autoclass:: DPAIPTC7901FeedParser

.. autoclass:: AFPNewsMLOneFeedParser

.. autoclass:: ScoopNewsMLTwoFeedParser

.. autoclass:: AP_ANPAFeedParser

.. autoclass:: PAFeedParser

Add new Parser
^^^^^^^^^^^^^^

.. autofunction:: superdesk.io.register_feed_parser
5 changes: 3 additions & 2 deletions superdesk/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
from superdesk.io.register import feeding_service_errors, publish_errors # noqa

from superdesk.io.commands.add_provider import AddProvider # noqa
from superdesk.io.commands.update_ingest import UpdateIngest
from superdesk.io.commands.update_ingest import UpdateIngest, update_provider # noqa
from superdesk.io.commands.remove_expired_content import RemoveExpiredContent
from superdesk.io.ingest_provider_model import IngestProviderResource, IngestProviderService


logger = logging.getLogger(__name__)


def init_app(app):
from .ingest_provider_model import IngestProviderResource, IngestProviderService
endpoint_name = 'ingest_providers'
service = IngestProviderService(endpoint_name, backend=superdesk.get_backend())
IngestProviderResource(endpoint_name, app=app, service=service)
Expand Down Expand Up @@ -103,6 +103,7 @@ def register_feed_parser(parser_name, parser_class):

@celery.task(soft_time_limit=15)
def update_ingest():
"""Check ingest providers and trigger an update when appropriate."""
UpdateIngest().run()


Expand Down
11 changes: 3 additions & 8 deletions superdesk/io/commands/update_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,13 @@ def run(self, provider_name=None):
update_provider.apply_async(expires=get_task_ttl(provider), kwargs=kwargs)


@celery.task(soft_time_limit=1800, bind=True)
def update_provider(self, provider, rule_set=None, routing_scheme=None):
@celery.task(soft_time_limit=1800)
def update_provider(provider, rule_set=None, routing_scheme=None):
"""Fetch items from ingest provider, ingest them into Superdesk and update the provider.
:param self:
:type self:
:param provider: Ingest Provider Details
:type provider: dict :py:class:`superdesk.io.ingest_provider_model.IngestProviderResource`
:param provider: Ingest Provider data
:param rule_set: Translation Rule Set if one is associated with Ingest Provider.
:type rule_set: dict :py:class:`apps.rules.rule_sets.RuleSetsResource`
:param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
:type routing_scheme: dict :py:class:`apps.rules.routing_rules.RoutingRuleSchemeResource`
"""
lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD])

Expand Down
24 changes: 12 additions & 12 deletions superdesk/io/feed_parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ class EmailFeedParser(FeedParser, metaclass=ABCMeta):


# must be imported for registration
import superdesk.io.feed_parsers.anpa # NOQA
import superdesk.io.feed_parsers.iptc7901 # NOQA
import superdesk.io.feed_parsers.newsml_1_2 # NOQA
import superdesk.io.feed_parsers.newsml_2_0 # NOQA
import superdesk.io.feed_parsers.nitf # NOQA
import superdesk.io.feed_parsers.rfc822 # NOQA
import superdesk.io.feed_parsers.wenn_parser # NOQA
import superdesk.io.feed_parsers.dpa_iptc7901 # NOQA
import superdesk.io.feed_parsers.afp_newsml_1_2 # NOQA
import superdesk.io.feed_parsers.scoop_newsml_2_0 # NOQA
import superdesk.io.feed_parsers.ap_anpa # NOQA
import superdesk.io.feed_parsers.pa_nitf # NOQA
from superdesk.io.feed_parsers.anpa import ANPAFeedParser # NOQA
from superdesk.io.feed_parsers.iptc7901 import IPTC7901FeedParser # NOQA
from superdesk.io.feed_parsers.newsml_1_2 import NewsMLOneFeedParser # NOQA
from superdesk.io.feed_parsers.newsml_2_0 import NewsMLTwoFeedParser # NOQA
from superdesk.io.feed_parsers.nitf import NITFFeedParser # NOQA
from superdesk.io.feed_parsers.rfc822 import EMailRFC822FeedParser # NOQA
from superdesk.io.feed_parsers.wenn_parser import WENNFeedParser # NOQA
from superdesk.io.feed_parsers.dpa_iptc7901 import DPAIPTC7901FeedParser # NOQA
from superdesk.io.feed_parsers.afp_newsml_1_2 import AFPNewsMLOneFeedParser # NOQA
from superdesk.io.feed_parsers.scoop_newsml_2_0 import ScoopNewsMLTwoFeedParser # NOQA
from superdesk.io.feed_parsers.ap_anpa import AP_ANPAFeedParser # NOQA
from superdesk.io.feed_parsers.pa_nitf import PAFeedParser # NOQA
10 changes: 5 additions & 5 deletions superdesk/io/feeding_services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def get_feed_parser(self, provider, article=None):


# must be imported for registration
import superdesk.io.feeding_services.email # NOQA
import superdesk.io.feeding_services.file_service # NOQA
import superdesk.io.feeding_services.ftp # NOQA
import superdesk.io.feeding_services.http_service # NOQA
import superdesk.io.feeding_services.rss # NOQA
from superdesk.io.feeding_services.email import EmailFeedingService # NOQA
from superdesk.io.feeding_services.file_service import FileFeedingService # NOQA
from superdesk.io.feeding_services.ftp import FTPFeedingService # NOQA
from superdesk.io.feeding_services.http_service import HTTPFeedingService # NOQA
from superdesk.io.feeding_services.rss import RSSFeedingService # NOQA
25 changes: 24 additions & 1 deletion superdesk/io/ingest_provider_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,29 @@


class IngestProviderResource(Resource):
"""Ingest provider model
:param name: provider name
:param source: populates item source field
:param feeding_service: feeding service name
:param feed_parser: feed parser name
:param content_types: list of content types of items to ingest from provider
:param allow_remove_ingested: allow deleting of items from ingest
:param content_expiry: ttl for ingested items in minutes
:param config: provider specific config
:param ingested_count: number of items ingested so far
:param tokens: auth tokens used by provider
:param is_closed: provider closed status
:param update_schedule: update schedule, will run every x hours x minutes x seconds
:param idle_time: usual idle time for provider, if there is no item after that it will warn
:param last_updated: last update timestamp
:param rule_set: rule sets used when ingesting
:param routing_scheme: routing scheme used when ingesting
:param notifications: set when notification should be sent for this provider
:param last_closed: info when and by whom provider was closed last time
:param last_opened: info when and by whom provider was opened last time
:param critical_errors: error codes which are considered critical and should close provider
"""

def __init__(self, endpoint_name, app, service, endpoint_schema=None):
self.schema = {
Expand Down Expand Up @@ -98,6 +121,7 @@ def __init__(self, endpoint_name, app, service, endpoint_schema=None):
'last_updated': {'type': 'datetime'},
'last_item_update': {'type': 'datetime'},
'rule_set': Resource.rel('rule_sets', nullable=True),
'routing_scheme': Resource.rel('routing_schemes', nullable=True),
'notifications': {
'type': 'dict',
'schema': {
Expand All @@ -107,7 +131,6 @@ def __init__(self, endpoint_name, app, service, endpoint_schema=None):
'on_error': {'type': 'boolean', 'default': True}
}
},
'routing_scheme': Resource.rel('routing_schemes', nullable=True),
'last_closed': {
'type': 'dict',
'schema': {
Expand Down

0 comments on commit b2d1639

Please sign in to comment.