Remove keyword for positional "url" argument in scrapy.Request

open-contracting · May 30, 2020 · b27b868 · b27b868
1 parent d3810c0
commit b27b868
Show file tree

Hide file tree

Showing 32 changed files with 64 additions and 106 deletions.
diff --git a/docs/writing-spiders.rst b/docs/writing-spiders.rst
@@ -60,7 +60,7 @@ Here is a sample:
         def start_requests(self):
             # This API only has one URL to get. Make a request for that, and set a filename
             yield scrapy.Request(
-                url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
+                'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
                 meta={'kf_filename': '13-14.json'}
             )
 

diff --git a/kingfisher_scrapy/base_spider.py b/kingfisher_scrapy/base_spider.py
@@ -274,10 +274,7 @@ class MySpider(LinksSpider):
             data_type = 'release_package'
 
             def start_requests(self):
-                yield scrapy.Request(
-                    url='https://example.com/api/packages.json',
-                    meta={'kf_filename': 'page1.json'}
-                )
+                yield scrapy.Request('https://example.com/api/packages.json', meta={'kf_filename': 'page1.json'})
     """
 
     @handle_error

diff --git a/kingfisher_scrapy/spiders/afghanistan_records.py b/kingfisher_scrapy/spiders/afghanistan_records.py
@@ -12,7 +12,7 @@ class AfghanistanRecords(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://ocds.ageops.net/api/ocds/records',
+            'https://ocds.ageops.net/api/ocds/records',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )
@@ -24,10 +24,7 @@ def parse_list(self, response):
             files_urls = [files_urls[0]]
 
         for file_url in files_urls:
-            yield scrapy.Request(
-                url=file_url,
-                meta={'kf_filename': file_url.split('/')[-1] + '.json'},
-            )
+            yield scrapy.Request(file_url, meta={'kf_filename': file_url.split('/')[-1] + '.json'})
 
     @handle_error
     def parse(self, response):

diff --git a/kingfisher_scrapy/spiders/afghanistan_releases.py b/kingfisher_scrapy/spiders/afghanistan_releases.py
@@ -12,7 +12,7 @@ class AfghanistanReleases(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://ocds.ageops.net/api/ocds/releases/dates',
+            'https://ocds.ageops.net/api/ocds/releases/dates',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )
@@ -25,7 +25,7 @@ def parse_list(self, response):
 
         for file_url in files_urls:
             yield scrapy.Request(
-                url=file_url,
+                file_url,
                 meta={'kf_filename': file_url.split('/')[-1] + '.json'},
                 callback=self.parse_release_list
             )
@@ -37,10 +37,7 @@ def parse_release_list(self, response):
             files_urls = [files_urls[0]]
 
         for file_url in files_urls:
-            yield scrapy.Request(
-                url=file_url,
-                meta={'kf_filename': file_url.split('/')[-1] + '.json'},
-            )
+            yield scrapy.Request(file_url, meta={'kf_filename': file_url.split('/')[-1] + '.json'})
 
     @handle_error
     def parse(self, response):

diff --git a/kingfisher_scrapy/spiders/argentina_buenos_aires.py b/kingfisher_scrapy/spiders/argentina_buenos_aires.py
@@ -25,7 +25,7 @@ class ArgentinaBuenosAires(ZipSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://data.buenosaires.gob.ar/api/3/action/package_show?id=buenos-aires-compras',
+            'https://data.buenosaires.gob.ar/api/3/action/package_show?id=buenos-aires-compras',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )

diff --git a/kingfisher_scrapy/spiders/argentina_vialidad.py b/kingfisher_scrapy/spiders/argentina_vialidad.py
@@ -9,7 +9,7 @@ class ArgentinaVialidad(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://datosabiertos.vialidad.gob.ar/api/ocds/package/all',
+            'https://datosabiertos.vialidad.gob.ar/api/ocds/package/all',
             meta={'kf_filename': 'all.json'}
         )
 

diff --git a/kingfisher_scrapy/spiders/armenia.py b/kingfisher_scrapy/spiders/armenia.py
@@ -11,10 +11,7 @@ class Armenia(BaseSpider):
     name = 'armenia'
 
     def start_requests(self):
-        yield scrapy.Request(
-            url='https://armeps.am/ocds/release',
-            meta={'kf_filename': 'page1.json'}
-        )
+        yield scrapy.Request('https://armeps.am/ocds/release', meta={'kf_filename': 'page1.json'})
 
     @handle_error
     def parse(self, response):
@@ -24,7 +21,4 @@ def parse(self, response):
         if not (self.sample):
             if 'next_page' in json_data and 'uri' in json_data['next_page']:
                 url = json_data['next_page']['uri']
-                yield scrapy.Request(
-                    url=url,
-                    meta={'kf_filename': hashlib.md5(url.encode('utf-8')).hexdigest()+'.json'}
-                )
+                yield scrapy.Request(url, meta={'kf_filename': hashlib.md5(url.encode('utf-8')).hexdigest()+'.json'})
diff --git a/kingfisher_scrapy/spiders/australia.py b/kingfisher_scrapy/spiders/australia.py
@@ -10,17 +10,17 @@ class Australia(LinksSpider):
     data_type = 'release_package'
 
     def start_requests(self):
+        url_prefix = 'https://api.tenders.gov.au/ocds/findByDates/contractPublished/'
+
         if self.sample:
             yield scrapy.Request(
-                url='https://api.tenders.gov.au/ocds/findByDates/contractPublished/2018-01-01T00:00:00Z/2018-12-31T23'
-                    ':59:59Z',
+                url_prefix + '2018-01-01T00:00:00Z/2018-12-31T23:59:59Z',
                 meta={'kf_filename': 'year-2018.json'}
             )
         else:
             current_year = datetime.datetime.now().year + 1
             for year in range(2004, current_year):
                 yield scrapy.Request(
-                    url='https://api.tenders.gov.au/ocds/findByDates/contractPublished/'
-                        '{}-01-01T00:00:00Z/{}-12-31T23:59:59Z'.format(year, year),
+                    url_prefix + '{}-01-01T00:00:00Z/{}-12-31T23:59:59Z'.format(year, year),
                     meta={'kf_filename': 'year-{}.json'.format(year)}
                 )
diff --git a/kingfisher_scrapy/spiders/canada_buyandsell.py b/kingfisher_scrapy/spiders/canada_buyandsell.py
@@ -9,21 +9,21 @@ class CanadaBuyAndSell(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
+            'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
             meta={'kf_filename': '13-14.json'}
         )
         if self.sample:
             return
         yield scrapy.Request(
-            url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-14-15.json',
+            'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-14-15.json',
             meta={'kf_filename': '14-15.json'}
         )
         yield scrapy.Request(
-            url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json',
+            'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json',
             meta={'kf_filename': '15-16.json'}
         )
         yield scrapy.Request(
-            url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-16-17.json',
+            'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-16-17.json',
             meta={'kf_filename': '16-17.json'}
         )
 

diff --git a/kingfisher_scrapy/spiders/canada_montreal.py b/kingfisher_scrapy/spiders/canada_montreal.py
@@ -12,7 +12,7 @@ class CanadaMontreal(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d' % self.page_limit,
+            'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d' % self.page_limit,
             meta={'kf_filename': 'page0.json'}
         )
 
@@ -30,7 +30,7 @@ def parse(self, response):
                 url = 'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d&offset=%d' % \
                       (self.page_limit, offset)
                 yield scrapy.Request(
-                    url=url,
+                    url,
                     meta={'kf_filename': 'page' + str(offset) + '.json'}
                 )
                 offset += self.page_limit
diff --git a/kingfisher_scrapy/spiders/chile_base.py b/kingfisher_scrapy/spiders/chile_base.py
@@ -28,7 +28,7 @@ def get_year_month_until(self):
     def start_requests(self):
         if self.sample:
             yield scrapy.Request(
-                url=self.base_list_url.format(2017, 10, 0, 10),
+                self.base_list_url.format(2017, 10, 0, 10),
                 meta={'kf_filename': 'list-2017-10.json', 'year': 2017, 'month': 10},
             )
             return
@@ -40,7 +40,7 @@ def start_requests(self):
                 if (until_year - 1) == year and month > until_month:
                     break
                 yield scrapy.Request(
-                    url=self.base_list_url.format(year, month, 0, self.limit),
+                    self.base_list_url.format(year, month, 0, self.limit),
                     meta={'kf_filename': 'list-{}-{:02d}.json'.format(year, month), 'year': year, 'month': month},
                 )
 
@@ -51,7 +51,7 @@ def base_parse(self, response, package_type):
             for data_item in data['data']:
                 if package_type == 'record':
                     yield_list.append(scrapy.Request(
-                        url=self.record_url % data_item['ocid'].replace('ocds-70d2nz-', ''),
+                        self.record_url % data_item['ocid'].replace('ocds-70d2nz-', ''),
                         meta={'kf_filename': 'data-%s-%s.json' % (data_item['ocid'], package_type)}
                     ))
                 else:
@@ -67,15 +67,15 @@ def base_parse(self, response, package_type):
                         if 'url' in stage:
                             name = stage.replace('url', '')
                             yield_list.append(scrapy.Request(
-                                url=data_item[stage],
+                                data_item[stage],
                                 meta={'kf_filename': 'data-%s-%s.json' % (data_item['ocid'], name)}
                             ))
             if 'pagination' in data and (data['pagination']['offset'] + self.limit) < data['pagination']['total']:
                 year = response.request.meta['year']
                 month = response.request.meta['month']
                 offset = data['pagination']['offset']
                 yield_list.append(scrapy.Request(
-                    url=self.base_list_url.format(year, month, self.limit + offset, self.limit),
+                    self.base_list_url.format(year, month, self.limit + offset, self.limit),
                     meta={'year': year, 'month': month}
                 ))
             return yield_list

diff --git a/kingfisher_scrapy/spiders/colombia.py b/kingfisher_scrapy/spiders/colombia.py
@@ -21,10 +21,7 @@ def start_requests(self):
         start_page = 1
         if hasattr(self, 'page'):
             start_page = int(self.page)
-        yield scrapy.Request(
-            url=base_url % start_page,
-            meta={'kf_filename': 'page{}.json'.format(start_page)}
-        )
+        yield scrapy.Request(base_url % start_page, meta={'kf_filename': 'page{}.json'.format(start_page)})
 
     def parse(self, response):
         # In Colombia, every day at certain hour they run a process in their system that drops the database and make

diff --git a/kingfisher_scrapy/spiders/colombia_bulk.py b/kingfisher_scrapy/spiders/colombia_bulk.py
@@ -27,7 +27,7 @@ class ColombiaBulk(ZipSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://www.colombiacompra.gov.co/transparencia/datos-json',
+            'https://www.colombiacompra.gov.co/transparencia/datos-json',
             meta={'kf_filename': 'list.html'},
             callback=self.parse_list,
         )

diff --git a/kingfisher_scrapy/spiders/france.py b/kingfisher_scrapy/spiders/france.py
@@ -12,7 +12,7 @@ class France(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://www.data.gouv.fr/api/1/datasets/?organization=534fff75a3a7292c64a77de4',
+            'https://www.data.gouv.fr/api/1/datasets/?organization=534fff75a3a7292c64a77de4',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list,
         )

diff --git a/kingfisher_scrapy/spiders/georgia_opendata.py b/kingfisher_scrapy/spiders/georgia_opendata.py
@@ -12,7 +12,4 @@ class GeorgiaOpenData(ZipSpider):
     download_timeout = 1200  # 20min
 
     def start_requests(self):
-        yield scrapy.Request(
-            url='http://opendata.spa.ge/json/allTenders.zip',
-            meta={'kf_filename': 'all.json'}
-        )
+        yield scrapy.Request('http://opendata.spa.ge/json/allTenders.zip', meta={'kf_filename': 'all.json'})
diff --git a/kingfisher_scrapy/spiders/georgia_records.py b/kingfisher_scrapy/spiders/georgia_records.py
@@ -8,7 +8,4 @@ class GeorgiaRecords(LinksSpider):
     data_type = 'record_package'
 
     def start_requests(self):
-        yield scrapy.Request(
-            url='https://odapi.spa.ge/api/records.json',
-            meta={'kf_filename': 'page1.json'}
-        )
+        yield scrapy.Request('https://odapi.spa.ge/api/records.json', meta={'kf_filename': 'page1.json'})
diff --git a/kingfisher_scrapy/spiders/georgia_releases.py b/kingfisher_scrapy/spiders/georgia_releases.py
@@ -8,7 +8,4 @@ class GeorgiaReleases(LinksSpider):
     data_type = 'release_package'
 
     def start_requests(self):
-        yield scrapy.Request(
-            url='https://odapi.spa.ge/api/releases.json',
-            meta={'kf_filename': 'page1.json'}
-        )
+        yield scrapy.Request('https://odapi.spa.ge/api/releases.json', meta={'kf_filename': 'page1.json'})
diff --git a/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py b/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py
@@ -14,7 +14,7 @@ class MexicoAdministracionPublicaFederal(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://api.datos.gob.mx/v1/contratacionesabiertas',
+            'https://api.datos.gob.mx/v1/contratacionesabiertas',
             meta={'kf_filename': 'page1.json'}
         )
 
@@ -32,7 +32,7 @@ def parse(self, response):
             limit = data['pagination']['pageSize']
             while ((page - 1) * limit) < total:
                 yield scrapy.Request(
-                    url='https://api.datos.gob.mx/v1/contratacionesabiertas?page=%d' % page,
+                    'https://api.datos.gob.mx/v1/contratacionesabiertas?page=%d' % page,
                     meta={'kf_filename': 'page' + str(page) + '.json'}
                 )
                 page += 1
diff --git a/kingfisher_scrapy/spiders/mexico_cdmx.py b/kingfisher_scrapy/spiders/mexico_cdmx.py
@@ -11,7 +11,7 @@ class MexicoCDMXSource(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos',
+            'http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )
@@ -24,7 +24,7 @@ def parse_list(self, response):
 
         for data_item in data:
             yield scrapy.Request(
-                url=data_item['uri'],
+                data_item['uri'],
                 meta={'kf_filename': 'id%s.json' % data_item['id']},
                 callback=self.parse_record
             )

diff --git a/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py b/kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py
@@ -9,7 +9,7 @@ class MexicoGrupoAeroporto(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='http://gacmda.gacm.mx:8880/files/opendata/coleccion/concentrado05032019RELEASE.json',
+            'http://gacmda.gacm.mx:8880/files/opendata/coleccion/concentrado05032019RELEASE.json',
             meta={'kf_filename': 'concentrado05032019RELEASE.json'}
         )
 

diff --git a/kingfisher_scrapy/spiders/mexico_inai.py b/kingfisher_scrapy/spiders/mexico_inai.py
@@ -12,7 +12,7 @@ class MexicoINAI(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://datos.gob.mx/busca/api/3/action/package_search?q=organization:inai&rows=500',
+            'https://datos.gob.mx/busca/api/3/action/package_search?q=organization:inai&rows=500',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )
@@ -25,7 +25,7 @@ def parse_list(self, response):
                 if resource['format'] == 'JSON':
                     kf_filename = 'redirect-' + hashlib.md5(resource['url'].encode('utf-8')).hexdigest() + '.json'
                     yield scrapy.Request(
-                        url=resource['url'],
+                        resource['url'],
                         meta={
                             'kf_filename': kf_filename,
                             'dont_redirect': True
@@ -37,7 +37,7 @@ def parse_redirect(self, response):
         if response.status == 301:
             url = response.headers['Location'].decode("utf-8").replace("open?", "uc?export=download&")
             yield scrapy.Request(
-                url=url,
+                url,
                 meta={'kf_filename': 'data-' + hashlib.md5(url.encode('utf-8')).hexdigest() + '.json'},
                 callback=self.parse
             )

diff --git a/kingfisher_scrapy/spiders/mexico_jalisco.py b/kingfisher_scrapy/spiders/mexico_jalisco.py
@@ -12,7 +12,7 @@ class MexicoJalisco(BaseSpider):
 
     def start_requests(self):
         yield scrapy.Request(
-            url='https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts',
+            'https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts',
             meta={'kf_filename': 'list.json'},
             callback=self.parse_list
         )
@@ -24,7 +24,7 @@ def parse_list(self, response):
             datas = [datas[0]]
         for data in datas:
             yield scrapy.Request(
-                url=data['URIContract'],
+                data['URIContract'],
                 meta={'kf_filename': 'id%s.json' % data['ocid']},
                 callback=self.parse_record_package
             )
@@ -35,7 +35,7 @@ def parse_record_package(self, response):
         if 'packages' in json_data:
             for url in json_data['packages']:
                 yield scrapy.Request(
-                    url=url,
+                    url,
                     meta={'kf_filename': 'packages-%s.json' % hashlib.md5(url.encode('utf-8')).hexdigest()},
                     callback=self.parse_release_package
                 )