Skip to content

Commit

Permalink
Merge pull request #857 from open-contracting/822-italy
Browse files Browse the repository at this point in the history
update italy spider
  • Loading branch information
yolile committed Nov 5, 2021
2 parents 99de36b + f3d3dd6 commit 1500886
Showing 1 changed file with 9 additions and 11 deletions.
20 changes: 9 additions & 11 deletions kingfisher_scrapy/spiders/italy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class Italy(SimpleSpider):
AppaltiPOP
Bulk download documentation
https://www.appaltipop.it/it/download
Swagger API documentation
https://www.appaltipop.it/api/v1/
"""
name = 'italy'

Expand All @@ -18,9 +20,8 @@ class Italy(SimpleSpider):

def start_requests(self):
yield scrapy.Request(
# From https://www.appaltipop.it/it/download
'https://www.appaltipop.it/_next/data/LxpUO4Pg-S_nnq33fzaED/it/tenders.json',
meta={'file_name': 'tenders.json'},
'https://www.appaltipop.it/api/v1/buyers',
meta={'file_name': 'buyers.json'},
callback=self.parse_list
)

Expand All @@ -29,17 +30,14 @@ def parse_list(self, response):
data = response.json()
# The data looks like:
# {
# "pageProps": {
# other fields,
# "buyers": [ ... ],
# other fields
# },
# "__N_SSG": ...
# "total": { ... },
# max_score: ...,
# hits: [ ... ]
# }

for buyer in data['pageProps']['buyers']:
for buyer in data['hits']:
# The first resource in the list is the OCDS JSON, the second one a XLSX file
resource = buyer['appaltipop:releases/0/buyer/dataSource/resources'][0]
resource = buyer['_source']['appaltipop:releases/0/buyer/dataSource/resources'][0]

# The JSON file path looks like 'data/IT-CF-01232710374/ocds.json'
file_path = resource['appaltipop:releases/0/buyer/resource/url']
Expand Down

0 comments on commit 1500886

Please sign in to comment.