diff --git a/kingfisher_scrapy/base_spider.py b/kingfisher_scrapy/base_spider.py index 45346f40..89fcd49f 100644 --- a/kingfisher_scrapy/base_spider.py +++ b/kingfisher_scrapy/base_spider.py @@ -214,7 +214,7 @@ def parse_zipfile(self, response, data_type, file_format=None, encoding='utf-8') encoding=encoding, file_name=filename) else: yield self.build_file(data.read(), filename, data_type=data_type, url=response.request.url, - encoding=encoding) + encoding=encoding) else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/items.py b/kingfisher_scrapy/items.py index 070d009c..6a9740b9 100644 --- a/kingfisher_scrapy/items.py +++ b/kingfisher_scrapy/items.py @@ -14,7 +14,6 @@ class File(scrapy.Item): files_store = scrapy.Field() - class FileItem(scrapy.Item): number = scrapy.Field() file_name = scrapy.Field() diff --git a/kingfisher_scrapy/spiders/afghanistan_records.py b/kingfisher_scrapy/spiders/afghanistan_records.py index 3de94ef5..3fb4d385 100644 --- a/kingfisher_scrapy/spiders/afghanistan_records.py +++ b/kingfisher_scrapy/spiders/afghanistan_records.py @@ -46,10 +46,10 @@ def parse_record(self, response): url = response.request.url # This is dangerous as we might get stuck in a loop here if we always get a 429 response. Try this for now. yield scrapy.Request( - url=url, - meta={'kf_filename': url.split('/')[-1]+'.json'}, - callback=self.parse_record, - dont_filter=True, - ) + url=url, + meta={'kf_filename': url.split('/')[-1]+'.json'}, + callback=self.parse_record, + dont_filter=True, + ) else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/armenia.py b/kingfisher_scrapy/spiders/armenia.py index dfd9eb05..98a68043 100644 --- a/kingfisher_scrapy/spiders/armenia.py +++ b/kingfisher_scrapy/spiders/armenia.py @@ -20,7 +20,7 @@ def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') json_data = json.loads(response.text) if not (self.sample): diff --git a/kingfisher_scrapy/spiders/australia_nsw.py b/kingfisher_scrapy/spiders/australia_nsw.py index 53a101b0..b16b270e 100644 --- a/kingfisher_scrapy/spiders/australia_nsw.py +++ b/kingfisher_scrapy/spiders/australia_nsw.py @@ -67,7 +67,7 @@ def parse_list(self, response): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/canada_buyandsell.py b/kingfisher_scrapy/spiders/canada_buyandsell.py index 8c74db39..a2e9d2d0 100644 --- a/kingfisher_scrapy/spiders/canada_buyandsell.py +++ b/kingfisher_scrapy/spiders/canada_buyandsell.py @@ -30,6 +30,6 @@ def start_requests(self): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/chile_base.py b/kingfisher_scrapy/spiders/chile_base.py index c4846767..b04d113a 100644 --- a/kingfisher_scrapy/spiders/chile_base.py +++ b/kingfisher_scrapy/spiders/chile_base.py @@ -27,9 +27,9 @@ def get_year_month_until(self): def get_sample_request(self): return scrapy.Request( - url=self.base_list_url.format(2017, 10, 0, 10), - meta={'year': 2017, 'month': 10} - ) + url=self.base_list_url.format(2017, 10, 0, 10), + meta={'year': 2017, 'month': 10} + ) def start_requests(self): if self.sample: @@ -85,4 +85,4 @@ def base_parse(self, response, package_type): return [self.build_file_error_from_response(response, errors={'http_code': data['status']})] else: return [self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='%s_package' % package_type)] + data_type='%s_package' % package_type)] diff --git a/kingfisher_scrapy/spiders/colombia.py b/kingfisher_scrapy/spiders/colombia.py index 28d0b7c9..5f6328da 100644 --- a/kingfisher_scrapy/spiders/colombia.py +++ b/kingfisher_scrapy/spiders/colombia.py @@ -45,7 +45,7 @@ def parse(self, response): elif response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') if not self.sample: yield self.next_link(response) diff --git a/kingfisher_scrapy/spiders/dominican_republic.py b/kingfisher_scrapy/spiders/dominican_republic.py index e5119f85..c40f75f1 100644 --- a/kingfisher_scrapy/spiders/dominican_republic.py +++ b/kingfisher_scrapy/spiders/dominican_republic.py @@ -38,7 +38,7 @@ def parse(self, response): for f in tmpfile.infolist(): with tmpfile.open(f) as jsonFile: yield self.build_file(jsonFile.read(), f.filename, data_type='release_package', - url=response.request.url) + url=response.request.url) os.remove(file.name) else: filename = response.request.url.split('/')[-1] diff --git a/kingfisher_scrapy/spiders/mexico_cdmx.py b/kingfisher_scrapy/spiders/mexico_cdmx.py index f5a6eb14..9021e2ab 100644 --- a/kingfisher_scrapy/spiders/mexico_cdmx.py +++ b/kingfisher_scrapy/spiders/mexico_cdmx.py @@ -34,6 +34,6 @@ def parse_list(self, response): def parse_record(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py b/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py index 016b9e77..dff27093 100644 --- a/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py +++ b/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py @@ -15,7 +15,7 @@ def start_requests(self): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/mexico_jalisco.py b/kingfisher_scrapy/spiders/mexico_jalisco.py index 2a573736..bbdcdeef 100644 --- a/kingfisher_scrapy/spiders/mexico_jalisco.py +++ b/kingfisher_scrapy/spiders/mexico_jalisco.py @@ -41,13 +41,13 @@ def parse_record_package(self, response): callback=self.parse_release_package ) yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='record_package') + data_type='record_package') else: yield self.build_file_error_from_response(response) def parse_release_package(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/moldova.py b/kingfisher_scrapy/spiders/moldova.py index 23088091..1e9e36bf 100644 --- a/kingfisher_scrapy/spiders/moldova.py +++ b/kingfisher_scrapy/spiders/moldova.py @@ -27,7 +27,7 @@ def parse(self, response): if response.status == 200: if response.request.meta['data']: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='record_package') + data_type='record_package') else: self.build_file_from_response(response, response.request.meta['kf_filename']) json_data = json.loads(response.text) diff --git a/kingfisher_scrapy/spiders/moldova_old.py b/kingfisher_scrapy/spiders/moldova_old.py index b14ca93e..332e6777 100644 --- a/kingfisher_scrapy/spiders/moldova_old.py +++ b/kingfisher_scrapy/spiders/moldova_old.py @@ -22,6 +22,6 @@ def start_requests(self): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/paraguay_hacienda.py b/kingfisher_scrapy/spiders/paraguay_hacienda.py index b9f1f25e..dc84a7a9 100644 --- a/kingfisher_scrapy/spiders/paraguay_hacienda.py +++ b/kingfisher_scrapy/spiders/paraguay_hacienda.py @@ -21,7 +21,7 @@ class ParaguayHacienda(BaseSpider): custom_settings = { 'DOWNLOADER_MIDDLEWARES': { - 'kingfisher_scrapy.middlewares.ParaguayAuthMiddleware': 543, + 'kingfisher_scrapy.middlewares.ParaguayAuthMiddleware': 543, }, 'CONCURRENT_REQUESTS': 1, } @@ -81,7 +81,7 @@ def parse(self, response): ) else: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/scotland.py b/kingfisher_scrapy/spiders/scotland.py index 255d7eb2..9b9ff623 100644 --- a/kingfisher_scrapy/spiders/scotland.py +++ b/kingfisher_scrapy/spiders/scotland.py @@ -54,6 +54,6 @@ def start_requests(self): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: yield self.build_file_error_from_response(response) diff --git a/kingfisher_scrapy/spiders/test_fail.py b/kingfisher_scrapy/spiders/test_fail.py index 374bfd34..97b73489 100644 --- a/kingfisher_scrapy/spiders/test_fail.py +++ b/kingfisher_scrapy/spiders/test_fail.py @@ -35,7 +35,7 @@ def start_requests(self): def parse(self, response): if response.status == 200: yield self.build_file_from_response(response, response.request.meta['kf_filename'], - data_type='release_package') + data_type='release_package') else: diff --git a/tests/test_extensions.py b/tests/test_extensions.py index b4a0a7e8..827c86ad 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -70,7 +70,7 @@ def test_item_scraped_file(sample, is_sample, path, note, encoding, encoding2, d if encoding: kwargs['encoding'] = encoding item = spider.build_file(b'{"key": "value"}', 'file.json', url='https://example.com/remote.json', - data_type='release_package', post_to_api=post_to_api, **kwargs) + data_type='release_package', post_to_api=post_to_api, **kwargs) store_extension.item_scraped(item, spider)