Skip to content

Commit

Permalink
Remove keyword for positional "url" argument in scrapy.Request
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 30, 2020
1 parent d3810c0 commit b27b868
Show file tree
Hide file tree
Showing 32 changed files with 64 additions and 106 deletions.
2 changes: 1 addition & 1 deletion docs/writing-spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Here is a sample:
def start_requests(self):
# This API only has one URL to get. Make a request for that, and set a filename
yield scrapy.Request(
url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
meta={'kf_filename': '13-14.json'}
)
Expand Down
5 changes: 1 addition & 4 deletions kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,7 @@ class MySpider(LinksSpider):
data_type = 'release_package'
def start_requests(self):
yield scrapy.Request(
url='https://example.com/api/packages.json',
meta={'kf_filename': 'page1.json'}
)
yield scrapy.Request('https://example.com/api/packages.json', meta={'kf_filename': 'page1.json'})
"""

@handle_error
Expand Down
7 changes: 2 additions & 5 deletions kingfisher_scrapy/spiders/afghanistan_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class AfghanistanRecords(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://ocds.ageops.net/api/ocds/records',
'https://ocds.ageops.net/api/ocds/records',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand All @@ -24,10 +24,7 @@ def parse_list(self, response):
files_urls = [files_urls[0]]

for file_url in files_urls:
yield scrapy.Request(
url=file_url,
meta={'kf_filename': file_url.split('/')[-1] + '.json'},
)
yield scrapy.Request(file_url, meta={'kf_filename': file_url.split('/')[-1] + '.json'})

@handle_error
def parse(self, response):
Expand Down
9 changes: 3 additions & 6 deletions kingfisher_scrapy/spiders/afghanistan_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class AfghanistanReleases(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://ocds.ageops.net/api/ocds/releases/dates',
'https://ocds.ageops.net/api/ocds/releases/dates',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand All @@ -25,7 +25,7 @@ def parse_list(self, response):

for file_url in files_urls:
yield scrapy.Request(
url=file_url,
file_url,
meta={'kf_filename': file_url.split('/')[-1] + '.json'},
callback=self.parse_release_list
)
Expand All @@ -37,10 +37,7 @@ def parse_release_list(self, response):
files_urls = [files_urls[0]]

for file_url in files_urls:
yield scrapy.Request(
url=file_url,
meta={'kf_filename': file_url.split('/')[-1] + '.json'},
)
yield scrapy.Request(file_url, meta={'kf_filename': file_url.split('/')[-1] + '.json'})

@handle_error
def parse(self, response):
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/argentina_buenos_aires.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ArgentinaBuenosAires(ZipSpider):

def start_requests(self):
yield scrapy.Request(
url='https://data.buenosaires.gob.ar/api/3/action/package_show?id=buenos-aires-compras',
'https://data.buenosaires.gob.ar/api/3/action/package_show?id=buenos-aires-compras',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/argentina_vialidad.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class ArgentinaVialidad(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://datosabiertos.vialidad.gob.ar/api/ocds/package/all',
'https://datosabiertos.vialidad.gob.ar/api/ocds/package/all',
meta={'kf_filename': 'all.json'}
)

Expand Down
10 changes: 2 additions & 8 deletions kingfisher_scrapy/spiders/armenia.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ class Armenia(BaseSpider):
name = 'armenia'

def start_requests(self):
yield scrapy.Request(
url='https://armeps.am/ocds/release',
meta={'kf_filename': 'page1.json'}
)
yield scrapy.Request('https://armeps.am/ocds/release', meta={'kf_filename': 'page1.json'})

@handle_error
def parse(self, response):
Expand All @@ -24,7 +21,4 @@ def parse(self, response):
if not (self.sample):
if 'next_page' in json_data and 'uri' in json_data['next_page']:
url = json_data['next_page']['uri']
yield scrapy.Request(
url=url,
meta={'kf_filename': hashlib.md5(url.encode('utf-8')).hexdigest()+'.json'}
)
yield scrapy.Request(url, meta={'kf_filename': hashlib.md5(url.encode('utf-8')).hexdigest()+'.json'})
8 changes: 4 additions & 4 deletions kingfisher_scrapy/spiders/australia.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ class Australia(LinksSpider):
data_type = 'release_package'

def start_requests(self):
url_prefix = 'https://api.tenders.gov.au/ocds/findByDates/contractPublished/'

if self.sample:
yield scrapy.Request(
url='https://api.tenders.gov.au/ocds/findByDates/contractPublished/2018-01-01T00:00:00Z/2018-12-31T23'
':59:59Z',
url_prefix + '2018-01-01T00:00:00Z/2018-12-31T23:59:59Z',
meta={'kf_filename': 'year-2018.json'}
)
else:
current_year = datetime.datetime.now().year + 1
for year in range(2004, current_year):
yield scrapy.Request(
url='https://api.tenders.gov.au/ocds/findByDates/contractPublished/'
'{}-01-01T00:00:00Z/{}-12-31T23:59:59Z'.format(year, year),
url_prefix + '{}-01-01T00:00:00Z/{}-12-31T23:59:59Z'.format(year, year),
meta={'kf_filename': 'year-{}.json'.format(year)}
)
8 changes: 4 additions & 4 deletions kingfisher_scrapy/spiders/canada_buyandsell.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ class CanadaBuyAndSell(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
meta={'kf_filename': '13-14.json'}
)
if self.sample:
return
yield scrapy.Request(
url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-14-15.json',
'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-14-15.json',
meta={'kf_filename': '14-15.json'}
)
yield scrapy.Request(
url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json',
'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json',
meta={'kf_filename': '15-16.json'}
)
yield scrapy.Request(
url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-16-17.json',
'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-16-17.json',
meta={'kf_filename': '16-17.json'}
)

Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/spiders/canada_montreal.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class CanadaMontreal(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d' % self.page_limit,
'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d' % self.page_limit,
meta={'kf_filename': 'page0.json'}
)

Expand All @@ -30,7 +30,7 @@ def parse(self, response):
url = 'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit=%d&offset=%d' % \
(self.page_limit, offset)
yield scrapy.Request(
url=url,
url,
meta={'kf_filename': 'page' + str(offset) + '.json'}
)
offset += self.page_limit
10 changes: 5 additions & 5 deletions kingfisher_scrapy/spiders/chile_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_year_month_until(self):
def start_requests(self):
if self.sample:
yield scrapy.Request(
url=self.base_list_url.format(2017, 10, 0, 10),
self.base_list_url.format(2017, 10, 0, 10),
meta={'kf_filename': 'list-2017-10.json', 'year': 2017, 'month': 10},
)
return
Expand All @@ -40,7 +40,7 @@ def start_requests(self):
if (until_year - 1) == year and month > until_month:
break
yield scrapy.Request(
url=self.base_list_url.format(year, month, 0, self.limit),
self.base_list_url.format(year, month, 0, self.limit),
meta={'kf_filename': 'list-{}-{:02d}.json'.format(year, month), 'year': year, 'month': month},
)

Expand All @@ -51,7 +51,7 @@ def base_parse(self, response, package_type):
for data_item in data['data']:
if package_type == 'record':
yield_list.append(scrapy.Request(
url=self.record_url % data_item['ocid'].replace('ocds-70d2nz-', ''),
self.record_url % data_item['ocid'].replace('ocds-70d2nz-', ''),
meta={'kf_filename': 'data-%s-%s.json' % (data_item['ocid'], package_type)}
))
else:
Expand All @@ -67,15 +67,15 @@ def base_parse(self, response, package_type):
if 'url' in stage:
name = stage.replace('url', '')
yield_list.append(scrapy.Request(
url=data_item[stage],
data_item[stage],
meta={'kf_filename': 'data-%s-%s.json' % (data_item['ocid'], name)}
))
if 'pagination' in data and (data['pagination']['offset'] + self.limit) < data['pagination']['total']:
year = response.request.meta['year']
month = response.request.meta['month']
offset = data['pagination']['offset']
yield_list.append(scrapy.Request(
url=self.base_list_url.format(year, month, self.limit + offset, self.limit),
self.base_list_url.format(year, month, self.limit + offset, self.limit),
meta={'year': year, 'month': month}
))
return yield_list
Expand Down
5 changes: 1 addition & 4 deletions kingfisher_scrapy/spiders/colombia.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ def start_requests(self):
start_page = 1
if hasattr(self, 'page'):
start_page = int(self.page)
yield scrapy.Request(
url=base_url % start_page,
meta={'kf_filename': 'page{}.json'.format(start_page)}
)
yield scrapy.Request(base_url % start_page, meta={'kf_filename': 'page{}.json'.format(start_page)})

def parse(self, response):
# In Colombia, every day at certain hour they run a process in their system that drops the database and make
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/colombia_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ColombiaBulk(ZipSpider):

def start_requests(self):
yield scrapy.Request(
url='https://www.colombiacompra.gov.co/transparencia/datos-json',
'https://www.colombiacompra.gov.co/transparencia/datos-json',
meta={'kf_filename': 'list.html'},
callback=self.parse_list,
)
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/france.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class France(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://www.data.gouv.fr/api/1/datasets/?organization=534fff75a3a7292c64a77de4',
'https://www.data.gouv.fr/api/1/datasets/?organization=534fff75a3a7292c64a77de4',
meta={'kf_filename': 'list.json'},
callback=self.parse_list,
)
Expand Down
5 changes: 1 addition & 4 deletions kingfisher_scrapy/spiders/georgia_opendata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,4 @@ class GeorgiaOpenData(ZipSpider):
download_timeout = 1200 # 20min

def start_requests(self):
yield scrapy.Request(
url='http://opendata.spa.ge/json/allTenders.zip',
meta={'kf_filename': 'all.json'}
)
yield scrapy.Request('http://opendata.spa.ge/json/allTenders.zip', meta={'kf_filename': 'all.json'})
5 changes: 1 addition & 4 deletions kingfisher_scrapy/spiders/georgia_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,4 @@ class GeorgiaRecords(LinksSpider):
data_type = 'record_package'

def start_requests(self):
yield scrapy.Request(
url='https://odapi.spa.ge/api/records.json',
meta={'kf_filename': 'page1.json'}
)
yield scrapy.Request('https://odapi.spa.ge/api/records.json', meta={'kf_filename': 'page1.json'})
5 changes: 1 addition & 4 deletions kingfisher_scrapy/spiders/georgia_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,4 @@ class GeorgiaReleases(LinksSpider):
data_type = 'release_package'

def start_requests(self):
yield scrapy.Request(
url='https://odapi.spa.ge/api/releases.json',
meta={'kf_filename': 'page1.json'}
)
yield scrapy.Request('https://odapi.spa.ge/api/releases.json', meta={'kf_filename': 'page1.json'})
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class MexicoAdministracionPublicaFederal(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://api.datos.gob.mx/v1/contratacionesabiertas',
'https://api.datos.gob.mx/v1/contratacionesabiertas',
meta={'kf_filename': 'page1.json'}
)

Expand All @@ -32,7 +32,7 @@ def parse(self, response):
limit = data['pagination']['pageSize']
while ((page - 1) * limit) < total:
yield scrapy.Request(
url='https://api.datos.gob.mx/v1/contratacionesabiertas?page=%d' % page,
'https://api.datos.gob.mx/v1/contratacionesabiertas?page=%d' % page,
meta={'kf_filename': 'page' + str(page) + '.json'}
)
page += 1
4 changes: 2 additions & 2 deletions kingfisher_scrapy/spiders/mexico_cdmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class MexicoCDMXSource(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos',
'http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand All @@ -24,7 +24,7 @@ def parse_list(self, response):

for data_item in data:
yield scrapy.Request(
url=data_item['uri'],
data_item['uri'],
meta={'kf_filename': 'id%s.json' % data_item['id']},
callback=self.parse_record
)
Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/mexico_grupo_aeroporto.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class MexicoGrupoAeroporto(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='http://gacmda.gacm.mx:8880/files/opendata/coleccion/concentrado05032019RELEASE.json',
'http://gacmda.gacm.mx:8880/files/opendata/coleccion/concentrado05032019RELEASE.json',
meta={'kf_filename': 'concentrado05032019RELEASE.json'}
)

Expand Down
6 changes: 3 additions & 3 deletions kingfisher_scrapy/spiders/mexico_inai.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class MexicoINAI(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://datos.gob.mx/busca/api/3/action/package_search?q=organization:inai&rows=500',
'https://datos.gob.mx/busca/api/3/action/package_search?q=organization:inai&rows=500',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand All @@ -25,7 +25,7 @@ def parse_list(self, response):
if resource['format'] == 'JSON':
kf_filename = 'redirect-' + hashlib.md5(resource['url'].encode('utf-8')).hexdigest() + '.json'
yield scrapy.Request(
url=resource['url'],
resource['url'],
meta={
'kf_filename': kf_filename,
'dont_redirect': True
Expand All @@ -37,7 +37,7 @@ def parse_redirect(self, response):
if response.status == 301:
url = response.headers['Location'].decode("utf-8").replace("open?", "uc?export=download&")
yield scrapy.Request(
url=url,
url,
meta={'kf_filename': 'data-' + hashlib.md5(url.encode('utf-8')).hexdigest() + '.json'},
callback=self.parse
)
Expand Down
6 changes: 3 additions & 3 deletions kingfisher_scrapy/spiders/mexico_jalisco.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class MexicoJalisco(BaseSpider):

def start_requests(self):
yield scrapy.Request(
url='https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts',
'https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts',
meta={'kf_filename': 'list.json'},
callback=self.parse_list
)
Expand All @@ -24,7 +24,7 @@ def parse_list(self, response):
datas = [datas[0]]
for data in datas:
yield scrapy.Request(
url=data['URIContract'],
data['URIContract'],
meta={'kf_filename': 'id%s.json' % data['ocid']},
callback=self.parse_record_package
)
Expand All @@ -35,7 +35,7 @@ def parse_record_package(self, response):
if 'packages' in json_data:
for url in json_data['packages']:
yield scrapy.Request(
url=url,
url,
meta={'kf_filename': 'packages-%s.json' % hashlib.md5(url.encode('utf-8')).hexdigest()},
callback=self.parse_release_package
)
Expand Down

0 comments on commit b27b868

Please sign in to comment.