Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDESK-7156] Allow FeedingServices to provide request kwards for association downloading #2555

Merged
merged 8 commits into from
Apr 16, 2024
7 changes: 3 additions & 4 deletions superdesk/io/commands/update_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,12 +628,11 @@ def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=N
_ingest_cancel(item, feeding_service)

rend = item.get("renditions", {})
request_kwargs = feeding_service.get_request_kwargs()
if rend:
baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
if baseImageRend and not baseImageRend.get("media"): # if there is media should be processed already
href = feeding_service.prepare_href(baseImageRend["href"], rend.get("mimetype"))
update_renditions(item, href, old_item, request_kwargs)
update_renditions(item, href, old_item, feeding_service=feeding_service)

# if the item has associated media
for key, assoc in item.get("associations", {}).items():
Expand All @@ -651,7 +650,7 @@ def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=N
if _is_new_version(assoc, ingested) and assoc.get("renditions"): # new version
logger.info("new assoc version - re-transfer renditions for %s", assoc_name)
try:
transfer_renditions(assoc["renditions"], request_kwargs)
transfer_renditions(assoc["renditions"], feeding_service=feeding_service)
except SuperdeskApiError:
logger.exception(
"failed to update associated item renditions",
Expand All @@ -667,7 +666,7 @@ def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=N
if assoc.get("renditions") and has_system_renditions(assoc): # all set, just download
logger.info("new association with system renditions - transfer %s", assoc_name)
try:
transfer_renditions(assoc["renditions"], request_kwargs)
transfer_renditions(assoc["renditions"], feeding_service=feeding_service)
except SuperdeskApiError:
logger.exception(
"failed to download renditions",
Expand Down
13 changes: 11 additions & 2 deletions superdesk/io/feeding_services/http_base_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

from typing import List, Dict, Optional, Union
from typing import List, Dict, Optional, Tuple, Any
from io import BytesIO
import traceback
import requests
from superdesk.errors import IngestApiError, SuperdeskIngestError
from superdesk.io.feeding_services import FeedingService
from superdesk.media.media_operations import download_file_from_url


class HTTPFeedingServiceBase(FeedingService):
Expand Down Expand Up @@ -111,6 +113,7 @@ class HTTPFeedingServiceBase(FeedingService):
def __init__(self):
super().__init__()
self.token = None
self.session = requests.Session()

@property
def auth_info(self):
Expand Down Expand Up @@ -190,7 +193,7 @@ def get_url(self, url=None, **kwargs):
request_kwargs.setdefault("timeout", self.HTTP_TIMEOUT)

try:
response = requests.get(url, **request_kwargs)
response = self.session.get(url, **request_kwargs)
except requests.exceptions.Timeout as exception:
raise IngestApiError.apiTimeoutError(exception, self.provider)
except requests.exceptions.ConnectionError as exception:
Expand All @@ -212,6 +215,12 @@ def get_url(self, url=None, **kwargs):

return response

def download_file(self, url: str, **kwargs: Dict[str, Any]) -> Tuple[BytesIO, str, str]:
request_kwargs = self.get_request_kwargs()
request_kwargs.update(kwargs)
request_kwargs.setdefault("timeout", self.HTTP_TIMEOUT)
return download_file_from_url(url, request_kwargs, self.session)

def update(self, provider, update):
self.provider = provider
self.validate_config()
Expand Down
20 changes: 16 additions & 4 deletions superdesk/media/media_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license


from typing import Dict, Any, Optional, Tuple
import arrow
import magic
import base64
Expand All @@ -28,6 +28,7 @@
from superdesk.errors import SuperdeskApiError
from flask import current_app as app
from mimetypes import guess_extension
from superdesk import __version__ as superdesk_version

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,21 +56,32 @@ def fix_content_type(content_type, content):
return str(content_type)


def download_file_from_url(url, request_kwargs=None):
def download_file_from_url(
url: str, request_kwargs: Optional[Dict[str, Any]] = None, session: Optional[requests.Session] = None
) -> Tuple[BytesIO, str, str]:
"""Download file from given url.

In case url is relative it will prefix it with current host.

:param url: file url
:param request_kwargs: Additional keyword arguments to pass to requests.Session.request
:param session: requests.Session instance (one will be created if not supplied)
"""

if not request_kwargs:
request_kwargs = {}

request_kwargs.setdefault("timeout", (5, 25))
request_kwargs.setdefault("headers", {})
request_kwargs["headers"]["User-Agent"] = f"Superdesk-{superdesk_version}"

if session is None:
session = requests.Session()

try:
rv = requests.get(url, headers={"User-Agent": "Superdesk-1.0"}, timeout=(5, 25), **request_kwargs)
rv = session.get(url, **request_kwargs)
except requests.exceptions.MissingSchema: # any route will do here, we only need host
rv = requests.get(urljoin(url_for("static", filename="x", _external=True), url), timeout=15, **request_kwargs)
rv = session.get(urljoin(url_for("static", filename="x", _external=True), url), timeout=15, **request_kwargs)
if rv.status_code not in (200, 201):
raise SuperdeskApiError.internalError("Failed to retrieve file from URL: %s" % url)
content = BytesIO(rv.content)
Expand Down
14 changes: 10 additions & 4 deletions superdesk/media/renditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def get_renditions_spec(without_internal_renditions=False, no_custom_crops=False
return rendition_spec


def update_renditions(item, href, old_item, request_kwargs=None):
def update_renditions(item, href, old_item, request_kwargs=None, feeding_service=None):
"""Update renditions for an item.

If the old_item has renditions uploaded in to media then the old rendition details are
Expand All @@ -367,7 +367,10 @@ def update_renditions(item, href, old_item, request_kwargs=None):
item["filemeta_json"] = old_item.get("filemeta_json")
return

content, filename, content_type = download_file_from_url(href, request_kwargs)
if feeding_service is not None and getattr(feeding_service, "download_file"):
content, filename, content_type = feeding_service.download_file(href, **request_kwargs or {})
else:
content, filename, content_type = download_file_from_url(href, request_kwargs)
file_type, ext = content_type.split("/")
metadata = process_file(content, file_type)
file_guid = app.media.put(content, filename=filename, content_type=content_type, metadata=metadata)
Expand All @@ -386,7 +389,7 @@ def update_renditions(item, href, old_item, request_kwargs=None):
raise


def transfer_renditions(renditions, request_kwargs=None):
def transfer_renditions(renditions, request_kwargs=None, feeding_service=None):
"""Transfer the passed renditions to localy held renditions

Download the renditions as passed and upload them to this instances storage
Expand All @@ -403,7 +406,10 @@ def transfer_renditions(renditions, request_kwargs=None):
rend["href"] = app.media.url_for_media(rend["media"], local.content_type)
continue

content, filename, content_type = download_file_from_url(rend.get("href"), request_kwargs)
if feeding_service is not None and getattr(feeding_service, "download_file"):
content, filename, content_type = feeding_service.download_file(rend.get("href"), **request_kwargs or {})
else:
content, filename, content_type = download_file_from_url(rend.get("href"), request_kwargs)
MarkLark86 marked this conversation as resolved.
Show resolved Hide resolved
file_type, ext = content_type.split("/")
metadata = process_file(content, file_type)
file_guid = app.media.put(content, filename=filename, content_type=content_type, metadata=metadata)
Expand Down