Skip to content

Commit

Permalink
Add comments to each URL
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed Mar 29, 2021
1 parent a832478 commit e2ecb5b
Show file tree
Hide file tree
Showing 9 changed files with 11 additions and 1 deletion.
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/chile_compra_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,5 @@ def build_file(self, file_name=None, url=None, data=None, **kwargs):
return super().build_file(file_name=file_name, url=url, data=data, **kwargs)

def get_formatter(self):
# URL looks like https://ocds.blob.core.windows.net/ocds/202103.zip
return components(-1)
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/chile_compra_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ class ChileCompraRecords(ChileCompraBaseSpider):

def handle_item(self, item):
url = 'https://apis.mercadopublico.cl/OCDS/data/record/' + item['ocid'].replace('ocds-70d2nz-', '')
# URL looks like http://api.mercadopublico.cl/APISOCDS/OCDS/listaOCDSAgnoMesTratoDirecto/2021/03/31500/100
yield self.build_request(url, formatter=components(-2))
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/chile_compra_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ class ChileCompraReleases(ChileCompraBaseSpider):
def handle_item(self, item):
for key in item:
if key.startswith('url'):
# URL looks like http://api.mercadopublico.cl/APISOCDS/OCDS/listaOCDSAgnoMes/2021/03/9500/100
yield self.build_request(item[key], formatter=components(-2))
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ def parse_list(self, response):
date = int(resource['url'][-9:-5])
if not (self.from_date.year <= date <= self.until_date.year):
continue
# URL looks like https://pjcrdatosabiertos.blob.core.windows.net/datosabiertos/OpenContracting/2021
# .json
yield self.build_request(resource['url'], formatter=components(-1))
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/dominican_republic.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def parse_list(self, response):
for url in urls:
if '/JSON_DGCP_' in url:
if self.from_date and self.until_date:
# URL looks like https://www.dgcp.gob.do/new_dgcp/documentos/andres/JSON_DGCP_2019.rar
date = int(url[-8:-4])
if not (self.from_date.year <= date <= self.until_date.year):
continue
Expand Down
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/indonesia_bandung.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,5 @@ def parse_list(self, response):
callback=self.parse_list)

def get_formatter(self):
# URL looks like https://birms.bandung.go.id/api/newcontract/ocds-afzrfb-b-2021-131978
return components(-1)
1 change: 1 addition & 0 deletions kingfisher_scrapy/spiders/nepal_portal.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ class NepalPortal(PeriodicSpider):
pattern = 'http://ppip.gov.np/bulk-download/{}'

def get_formatter(self):
# URL looks like http://ppip.gov.np/bulk-download/2018
return components(-1)
2 changes: 2 additions & 0 deletions kingfisher_scrapy/spiders/portugal.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ def parse_list(self, response):
description = resource['description']
if description and 'ocds' in description.lower():
# Presently, only one URL matches.
# URL looks like https://dados.gov.pt/s/resources/ocds-portal-base-www-base-gov-pt/20201007-153042
# /base2-pt-ocds-202010.zip
yield self.build_request(resource['url'], formatter=components(-2))
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/zambia.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def parse_list(self, response):
urls = response.json()['packagesPerMonth']
for url in urls:
if self.from_date and self.until_date:
# URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7
year = int(url[69:73])
month = int(url[74:])
if not ((self.from_date.year <= year <= self.until_date.year)
and (self.from_date.month <= month <= self.until_date.month)):
continue
# URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7
yield self.build_request(url, formatter=join(components(-2), extension='zip'))

0 comments on commit e2ecb5b

Please sign in to comment.