From 427785d2fa1bef60b9bedb784194c02c5e0183a6 Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Wed, 27 May 2020 16:24:14 -0400 Subject: [PATCH 1/3] Add sentry logger as extension Signed-off-by: Yohanna Lisnichuk --- kingfisher_scrapy/extensions.py | 20 ++++++++++++++++++++ kingfisher_scrapy/settings.py | 4 ++++ requirements.in | 3 ++- requirements.txt | 10 ++++------ requirements_dev.txt | 3 ++- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/kingfisher_scrapy/extensions.py b/kingfisher_scrapy/extensions.py index 10d77e243..8583a9818 100644 --- a/kingfisher_scrapy/extensions.py +++ b/kingfisher_scrapy/extensions.py @@ -1,10 +1,13 @@ # https://docs.scrapy.org/en/latest/topics/extensions.html#writing-your-own-extension import json +import logging import os +import sentry_sdk from scrapy import signals from scrapy.exceptions import NotConfigured +from sentry_sdk.integrations.logging import LoggingIntegration from kingfisher_scrapy.items import File, FileError, FileItem from kingfisher_scrapy.kingfisher_process import Client @@ -157,3 +160,20 @@ def _request(self, item, spider, method, *args, name='API'): if not response.ok: spider.logger.warning( 'Failed to post [{}]. {} status code: {}'.format(item['url'], name, response.status_code)) + + +# https://stackoverflow.com/questions/25262765/handle-all-exception-in-scrapy-with-sentry +class SentryLogging: + """ + Send exceptions and errors to Sentry. + """ + + @classmethod + def from_crawler(cls, crawler): + sentry_dsn = crawler.settings.get('SENTRY_DSN', None) + if sentry_dsn is None: + raise NotConfigured + extension = cls() + # by default only the errors are sent you sentry https://docs.sentry.io/platforms/python/logging/ + sentry_sdk.init(sentry_dsn) + return extension diff --git a/kingfisher_scrapy/settings.py b/kingfisher_scrapy/settings.py index a37532416..577bb8dbe 100644 --- a/kingfisher_scrapy/settings.py +++ b/kingfisher_scrapy/settings.py @@ -71,6 +71,8 @@ # request is sent to Kingfisher Process. 'kingfisher_scrapy.extensions.KingfisherFilesStore': 100, 'kingfisher_scrapy.extensions.KingfisherProcessAPI': 500, + # before any other extension + 'kingfisher_scrapy.extensions.SentryLogging': -1, } # Configure item pipelines @@ -79,6 +81,8 @@ # 'kingfisher_scrapy.pipelines.KingfisherScrapyPipeline': 300, #} +# To send the logs to sentry +SENTRY_DSN = os.getenv('SENTRY_DSN') # To send items to Kingfishet Process, set this to, for example, "http://kingfisher.example.com" (no trailing slash). KINGFISHER_API_URI = os.getenv('KINGFISHER_API_URI') # Set this to the same value as Kingfisher Process' `API_KEYS` setting. diff --git a/requirements.in b/requirements.in index 34c50ac3d..7232d9e51 100644 --- a/requirements.in +++ b/requirements.in @@ -5,4 +5,5 @@ rarfile requests Scrapy scrapyd-client -ijson>=3 \ No newline at end of file +ijson>=3 +sentry-sdk \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 16b7f6ad4..0a6b82762 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,10 +6,9 @@ # attrs==19.3.0 # via automat, service-identity, twisted automat==0.8.0 # via twisted -certifi==2019.11.28 # via requests +certifi==2019.11.28 # via requests, sentry-sdk cffi==1.13.2 # via cryptography chardet==3.0.4 # via requests -click==7.1.2 # via pip-tools constantly==15.1.0 # via twisted cryptography==2.8 # via pyopenssl, scrapy, service-identity cssselect==1.1.0 # via parsel, scrapy @@ -19,7 +18,6 @@ ijson==3.0.3 incremental==17.5.0 # via twisted lxml==4.4.2 # via parsel, scrapy parsel==1.5.2 # via scrapy -pip-tools==5.1.0 protego==0.1.16 # via scrapy pyasn1-modules==0.2.7 # via service-identity pyasn1==0.4.8 # via pyasn1-modules, service-identity @@ -32,13 +30,13 @@ rarfile==3.1 requests==2.22.0 scrapy==1.8.0 scrapyd-client==1.1.0 +sentry-sdk==0.14.4 service-identity==18.1.0 # via scrapy -six==1.13.0 # via automat, cryptography, parsel, pip-tools, protego, pyhamcrest, pyopenssl, scrapy, scrapyd-client, w3lib +six==1.13.0 # via automat, cryptography, parsel, protego, pyhamcrest, pyopenssl, scrapy, scrapyd-client, w3lib twisted==20.3.0 # via scrapy -urllib3==1.25.7 # via requests +urllib3==1.25.7 # via requests, sentry-sdk w3lib==1.21.0 # via parsel, scrapy zope.interface==4.7.1 # via scrapy, twisted # The following packages are considered to be unsafe in a requirements file: -# pip # setuptools diff --git a/requirements_dev.txt b/requirements_dev.txt index 1be8d3f0c..644d918fd 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -9,7 +9,7 @@ automat==0.8.0 certifi==2019.11.28 cffi==1.13.2 chardet==3.0.4 -click==7.1.2 +click==7.1.2 # via pip-tools constantly==15.1.0 coverage==5.0.3 # via coveralls, pytest-cov coveralls==2.0.0 @@ -49,6 +49,7 @@ rarfile==3.1 requests==2.22.0 scrapy==1.8.0 scrapyd-client==1.1.0 +sentry-sdk==0.14.4 service-identity==18.1.0 six==1.13.0 twisted==20.3.0 From 65df39500f5aba4d09d74d994cbddccd59c5503a Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Wed, 27 May 2020 16:26:58 -0400 Subject: [PATCH 2/3] Remove unused imports from extensions Signed-off-by: Yohanna Lisnichuk --- kingfisher_scrapy/extensions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/kingfisher_scrapy/extensions.py b/kingfisher_scrapy/extensions.py index 8583a9818..2417890ff 100644 --- a/kingfisher_scrapy/extensions.py +++ b/kingfisher_scrapy/extensions.py @@ -1,13 +1,11 @@ # https://docs.scrapy.org/en/latest/topics/extensions.html#writing-your-own-extension import json -import logging import os import sentry_sdk from scrapy import signals from scrapy.exceptions import NotConfigured -from sentry_sdk.integrations.logging import LoggingIntegration from kingfisher_scrapy.items import File, FileError, FileItem from kingfisher_scrapy.kingfisher_process import Client From 9975b29a1de2938d99b18b93548e5248756f5dd6 Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Wed, 27 May 2020 16:33:13 -0400 Subject: [PATCH 3/3] Update test_extension identation Signed-off-by: Yohanna Lisnichuk --- tests/test_extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 216dff325..332001430 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -6,7 +6,7 @@ import pytest from scrapy.exceptions import NotConfigured -from kingfisher_scrapy.extensions import KingfisherProcessAPI, KingfisherFilesStore +from kingfisher_scrapy.extensions import KingfisherFilesStore, KingfisherProcessAPI from kingfisher_scrapy.items import FileError from tests import spider_with_crawler