Skip to content

Commit

Permalink
Merge pull request #718 from open-contracting/remove-kingfisher-prefix
Browse files Browse the repository at this point in the history
chore: Remove Kingfisher prefix
  • Loading branch information
yolile committed May 21, 2021
2 parents c7343ba + 3fc06b0 commit 4265ea3
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 37 deletions.
2 changes: 1 addition & 1 deletion kingfisher_scrapy/commands/crawlall.py
Expand Up @@ -40,7 +40,7 @@ def run(self, args, opts):
if opts.dry_run:
kwargs['sample'] = 1
else:
extensions['kingfisher_scrapy.extensions.KingfisherFilesStore'] = 100
extensions['kingfisher_scrapy.extensions.FilesStore'] = 100

if opts.sample:
kwargs['sample'] = opts.sample
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/commands/pluck.py
Expand Up @@ -36,7 +36,7 @@ def run(self, args, opts):
# Disable Telnet extensions.
self.settings.set('EXTENSIONS', {
'scrapy.extensions.telnet.TelnetConsole': None,
'kingfisher_scrapy.extensions.KingfisherPluck': 1,
'kingfisher_scrapy.extensions.Pluck': 1,
})
if opts.max_bytes:
self.settings.set('KINGFISHER_PLUCK_MAX_BYTES', opts.max_bytes)
Expand Down
6 changes: 3 additions & 3 deletions kingfisher_scrapy/extensions.py
Expand Up @@ -15,7 +15,7 @@


# https://docs.scrapy.org/en/latest/topics/extensions.html#writing-your-own-extension
class KingfisherPluck:
class Pluck:
def __init__(self, directory, max_bytes):
self.directory = directory
self.max_bytes = max_bytes
Expand Down Expand Up @@ -75,7 +75,7 @@ def _write(self, spider, value):
f.write(f'{value},{spider.name}\n')


class KingfisherFilesStore:
class FilesStore:
def __init__(self, directory):
self.directory = directory

Expand Down Expand Up @@ -133,7 +133,7 @@ def _write_file(self, path, data):
json.dump(data, f, default=util.default)


class KingfisherItemCount:
class ItemCount:
def __init__(self, stats):
self.stats = stats

Expand Down
12 changes: 6 additions & 6 deletions kingfisher_scrapy/items.py
Expand Up @@ -3,34 +3,34 @@
import scrapy


class KingfisherItem(scrapy.Item):
class Item(scrapy.Item):
file_name = scrapy.Field()
url = scrapy.Field()
validate = True


class File(KingfisherItem):
class File(Item):
data = scrapy.Field()
data_type = scrapy.Field()
encoding = scrapy.Field()

# Added by the KingfisherFilesStore extension, for the KingfisherProcessAPI extension to read the file.
# Added by the FilesStore extension, for the KingfisherProcessAPI extension to read the file.
path = scrapy.Field()
files_store = scrapy.Field()


class FileItem(KingfisherItem):
class FileItem(Item):
number = scrapy.Field()
data = scrapy.Field()
data_type = scrapy.Field()
encoding = scrapy.Field()

# Added by the KingfisherFilesStore extension, for the KingfisherProcessAPI extension to read the file.
# Added by the FilesStore extension, for the KingfisherProcessAPI extension to read the file.
path = scrapy.Field()
files_store = scrapy.Field()


class FileError(KingfisherItem):
class FileError(Item):
errors = scrapy.Field()


Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/log_formatter.py
@@ -1,8 +1,8 @@
# https://docs.scrapy.org/en/latest/topics/logging.html#custom-log-formats
from scrapy.logformatter import LogFormatter
from scrapy.logformatter import LogFormatter as _LogFormatter


class KingfisherLogFormatter(LogFormatter):
class LogFormatter(_LogFormatter):
# https://docs.scrapy.org/en/latest/_modules/scrapy/logformatter.html#LogFormatter.scraped
def scraped(self, item, *args):
return self._omit_data('scraped', item, *args)
Expand Down
10 changes: 5 additions & 5 deletions kingfisher_scrapy/settings.py
Expand Up @@ -78,12 +78,12 @@
#}
EXTENSIONS = {
'kingfisher_scrapy.extensions.SentryLogging': -1,
'kingfisher_scrapy.extensions.KingfisherPluck': 1,
# `KingfisherFilesStore` must run before `KingfisherProcessAPI`, because the file needs to be written before the
'kingfisher_scrapy.extensions.Pluck': 1,
# `FilesStore` must run before `KingfisherProcessAPI`, because the file needs to be written before the
# request is sent to Kingfisher Process.
'kingfisher_scrapy.extensions.KingfisherFilesStore': 100,
'kingfisher_scrapy.extensions.FilesStore': 100,
'kingfisher_scrapy.extensions.KingfisherProcessAPI': 500,
'kingfisher_scrapy.extensions.KingfisherItemCount': 600,
'kingfisher_scrapy.extensions.ItemCount': 600,
}

# Configure item pipelines
Expand All @@ -107,7 +107,7 @@
# instead of files to Kingfisher Process' API. To enable that, set this to the absolute path to the `FILES_STORE`.
KINGFISHER_API_LOCAL_DIRECTORY = os.getenv('KINGFISHER_API_LOCAL_DIRECTORY')

LOG_FORMATTER = 'kingfisher_scrapy.log_formatter.KingfisherLogFormatter'
LOG_FORMATTER = 'kingfisher_scrapy.log_formatter.LogFormatter'

KINGFISHER_PARAGUAY_HACIENDA_REQUEST_TOKEN = os.getenv('KINGFISHER_PARAGUAY_HACIENDA_REQUEST_TOKEN')
KINGFISHER_PARAGUAY_HACIENDA_CLIENT_SECRET = os.getenv('KINGFISHER_PARAGUAY_HACIENDA_CLIENT_SECRET')
Expand Down
10 changes: 5 additions & 5 deletions tests/extensions/test_kingfisher_files_store.py
Expand Up @@ -5,7 +5,7 @@
import pytest
from scrapy.exceptions import NotConfigured

from kingfisher_scrapy.extensions import KingfisherFilesStore
from kingfisher_scrapy.extensions import FilesStore
from kingfisher_scrapy.items import File, FileItem
from tests import spider_with_crawler, spider_with_files_store

Expand All @@ -14,7 +14,7 @@ def test_from_crawler_missing_arguments():
spider = spider_with_crawler()

with pytest.raises(NotConfigured) as excinfo:
KingfisherFilesStore.from_crawler(spider.crawler)
FilesStore.from_crawler(spider.crawler)

assert str(excinfo.value) == 'FILES_STORE is not set.'

Expand All @@ -25,7 +25,7 @@ def test_from_crawler_missing_arguments():
])
def test_item_scraped_with_build_file_from_response(sample, path, tmpdir):
spider = spider_with_files_store(tmpdir, sample=sample)
extension = KingfisherFilesStore.from_crawler(spider.crawler)
extension = FilesStore.from_crawler(spider.crawler)

response = Mock()
response.body = b'{"key": "value"}'
Expand Down Expand Up @@ -55,7 +55,7 @@ def test_item_scraped_with_build_file_from_response(sample, path, tmpdir):
])
def test_item_scraped_with_file_and_file_item(sample, directory, data, item, expected_file_name, tmpdir):
spider = spider_with_files_store(tmpdir, sample=sample)
extension = KingfisherFilesStore.from_crawler(spider.crawler)
extension = FilesStore.from_crawler(spider.crawler)
path = os.path.join(directory, expected_file_name)
original_file_name = item['file_name']
item['data'] = data
Expand All @@ -72,7 +72,7 @@ def test_item_scraped_with_build_file_and_existing_directory():
with TemporaryDirectory() as tmpdirname:
files_store = os.path.join(tmpdirname, 'data')
spider = spider_with_crawler(settings={'FILES_STORE': files_store})
extension = KingfisherFilesStore.from_crawler(spider.crawler)
extension = FilesStore.from_crawler(spider.crawler)
item = spider.build_file(file_name='file.json', data=b'{"key": "value"}')

os.makedirs(os.path.join(files_store, 'test', '20010203_040506'))
Expand Down
8 changes: 4 additions & 4 deletions tests/extensions/test_kingfisher_item_count.py
@@ -1,11 +1,11 @@
from kingfisher_scrapy.extensions import KingfisherItemCount
from kingfisher_scrapy.extensions import ItemCount
from kingfisher_scrapy.items import FileError, FileItem
from tests import spider_with_crawler


def test_item_scraped_file(caplog):
spider = spider_with_crawler()
item_extension = KingfisherItemCount.from_crawler(spider.crawler)
item_extension = ItemCount.from_crawler(spider.crawler)
item = spider.build_file(file_name='file.json', url='https://example.com/remote.json', data=b'{"key": "value"}',
data_type='release_package')

Expand All @@ -18,7 +18,7 @@ def test_item_scraped_file(caplog):

def test_item_scraped_file_item(caplog):
spider = spider_with_crawler()
item_extension = KingfisherItemCount.from_crawler(spider.crawler)
item_extension = ItemCount.from_crawler(spider.crawler)
item = FileItem({
'number': 1,
'file_name': 'file.json',
Expand All @@ -37,7 +37,7 @@ def test_item_scraped_file_item(caplog):

def test_item_scraped_file_error(caplog):
spider = spider_with_crawler()
item_extension = KingfisherItemCount.from_crawler(spider.crawler)
item_extension = ItemCount.from_crawler(spider.crawler)
item = FileError({
'url': 'https://example.com/remote.json',
'errors': {'http_code': 404},
Expand Down
16 changes: 8 additions & 8 deletions tests/extensions/test_kingfisher_pluck.py
Expand Up @@ -7,15 +7,15 @@
from scrapy.exceptions import StopDownload

from kingfisher_scrapy.base_spider import BaseSpider, CompressedFileSpider
from kingfisher_scrapy.extensions import KingfisherPluck
from kingfisher_scrapy.extensions import Pluck
from kingfisher_scrapy.items import PluckedItem
from tests import spider_with_crawler


def test_disabled():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname})
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)
item = PluckedItem({'value': '2020-10-01'})

extension.item_scraped(item, spider)
Expand All @@ -27,7 +27,7 @@ def test_disabled():
def test_item_scraped():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname}, release_pointer='/date')
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)
item = PluckedItem({'value': '2020-10-01'})

extension.item_scraped(item, spider)
Expand All @@ -45,7 +45,7 @@ def test_item_scraped():
def test_spider_closed_with_items():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname}, release_pointer='/date')
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)
item = PluckedItem({'value': '2020-10-01'})

extension.item_scraped(item, spider)
Expand All @@ -58,7 +58,7 @@ def test_spider_closed_with_items():
def test_spider_closed_without_items():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname}, release_pointer='/date')
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)

extension.spider_closed(spider, 'itemcount')

Expand All @@ -70,7 +70,7 @@ def test_bytes_received_stop_download():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname,
'KINGFISHER_PLUCK_MAX_BYTES': 1}, release_pointer='/date')
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)
request = Request('http://example.com', meta={'file_name': 'test.json'})

with pytest.raises(StopDownload):
Expand All @@ -83,7 +83,7 @@ def test_bytes_received_dont_stop_download():
with TemporaryDirectory() as tmpdirname:
spider = spider_with_crawler(settings={'KINGFISHER_PLUCK_PATH': tmpdirname,
'KINGFISHER_PLUCK_MAX_BYTES': 10}, release_pointer='/date')
extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)
request = Request('http://example.com', meta={'file_name': 'test.json'})

extension.bytes_received(data=b'12345', spider=spider, request=request)
Expand All @@ -107,7 +107,7 @@ def test_bytes_received_ignored_requests(test_request, spider_class, attributes)
for attr, value in attributes.items():
setattr(spider, attr, value)

extension = KingfisherPluck.from_crawler(spider.crawler)
extension = Pluck.from_crawler(spider.crawler)

extension.bytes_received(data=b'12345', spider=spider, request=test_request)

Expand Down
4 changes: 2 additions & 2 deletions tests/extensions/test_kingfisher_process_api.py
Expand Up @@ -8,7 +8,7 @@
from scrapy.http import Request, Response
from twisted.python.failure import Failure

from kingfisher_scrapy.extensions import KingfisherFilesStore, KingfisherProcessAPI
from kingfisher_scrapy.extensions import FilesStore, KingfisherProcessAPI
from kingfisher_scrapy.items import FileError, FileItem
from tests import spider_with_crawler, spider_with_files_store

Expand Down Expand Up @@ -78,7 +78,7 @@ def test_item_scraped_file(sample, is_sample, path, note, encoding, encoding2, d
**kwargs,
)

store_extension = KingfisherFilesStore.from_crawler(spider.crawler)
store_extension = FilesStore.from_crawler(spider.crawler)
store_extension.item_scraped(item, spider)

response = yield extension.item_scraped(item, spider)
Expand Down

0 comments on commit 4265ea3

Please sign in to comment.