From 0aaae11afbb3c7716af22cfd2d4cba9b5a0495b0 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:55:05 -0400 Subject: [PATCH 1/2] chore(ecuador_sercop_bulk): Use join() instead of overriding build_request --- kingfisher_scrapy/spiders/ecuador_sercop_bulk.py | 7 ++----- kingfisher_scrapy/util.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/kingfisher_scrapy/spiders/ecuador_sercop_bulk.py b/kingfisher_scrapy/spiders/ecuador_sercop_bulk.py index ea6e5d40..f1078619 100644 --- a/kingfisher_scrapy/spiders/ecuador_sercop_bulk.py +++ b/kingfisher_scrapy/spiders/ecuador_sercop_bulk.py @@ -1,5 +1,5 @@ from kingfisher_scrapy.base_spiders import CompressedFileSpider, PeriodicSpider -from kingfisher_scrapy.util import components +from kingfisher_scrapy.util import components, join class EcuadorSERCOPBulk(CompressedFileSpider, PeriodicSpider): @@ -29,7 +29,4 @@ class EcuadorSERCOPBulk(CompressedFileSpider, PeriodicSpider): # PeriodicSpider pattern = 'https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/download'\ '?type=json&year={0:%Y}&month={0:%m}&method=all' - formatter = staticmethod(components(-1)) - - def build_request(self, url, formatter, **kwargs): - return super().build_request(url, formatter, meta={'file_name': f'{formatter(url)}.zip'}, **kwargs) + formatter = staticmethod(join(components(-1), extension='zip')) diff --git a/kingfisher_scrapy/util.py b/kingfisher_scrapy/util.py index d64adfb1..73e95dc1 100644 --- a/kingfisher_scrapy/util.py +++ b/kingfisher_scrapy/util.py @@ -56,7 +56,7 @@ def wrapper(url): def join(*functions, extension=None): """ - Returns a function that joins the given functions' outputs. + Returns a function that joins the given functions' outputs and sets the file extension, if provided. >>> join(components(-1), parameters('page'))('http://example.com/api/planning.json?page=1') 'planning-page-1' From 2ffdaa5998d60ec241197ba9f922aab1d8625184 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:58:41 -0400 Subject: [PATCH 2/2] docs: Add note about syncing with other parts of code --- kingfisher_scrapy/base_spiders/base_spider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kingfisher_scrapy/base_spiders/base_spider.py b/kingfisher_scrapy/base_spiders/base_spider.py index 957ff4f7..5fa90033 100644 --- a/kingfisher_scrapy/base_spiders/base_spider.py +++ b/kingfisher_scrapy/base_spiders/base_spider.py @@ -283,6 +283,7 @@ def build_request(self, url, formatter, **kwargs): assert kwargs['meta']['file_name'] else: meta['file_name'] = formatter(url) + # Other extensions are related to the Unflatten pipeline and CompressedFileSpider base class. if not meta['file_name'].endswith(('.json', '.csv', '.xlsx', '.rar', '.zip')): meta['file_name'] += '.json' if 'meta' in kwargs: