Skip to content

Commit

Permalink
refactor: italy anac spider
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravf95 committed Nov 25, 2021
1 parent 2dbee7d commit 66a63af
Showing 1 changed file with 19 additions and 13 deletions.
32 changes: 19 additions & 13 deletions kingfisher_scrapy/spiders/italy_anac.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
from kingfisher_scrapy.base_spider import PeriodicSpider
from kingfisher_scrapy.util import components
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error

class ItalyANAC(PeriodicSpider):

class ItalyANAC(SimpleSpider):
"""
Domain
Italy ANAC
API documentation
https://dati.anticorruzione.it/opendata/about
Bulk download documentation
https://dati.anticorruzione.it/opendata/organization/anticorruzione
"""
name = 'italy_anac'
download_timeout = 99999

# BaseSpider
date_format = 'year-month'
default_from_date = '2018-01'
default_until_date = '2020-12'

# PeriodicSpider
pattern = 'https://dati.anticorruzione.it/' \
'opendata/download/dataset/ocds/filesystem/bulk/{0.year:d}/{0.month:02d}.json'
formatter = staticmethod(components(-1))

# SimpleSpider
data_type = 'release_package'

def start_requests(self):
url = 'https://dati.anticorruzione.it/opendata/api/3/action/package_search?q=ocds'
yield scrapy.Request(url, meta={'file_name': 'list.json'}, callback=self.parse_list)

@handle_http_error
def parse_list(self, response):
data = response.json()
for result in data['result']['results']:
for resource in result['resources']:
if resource['format'].upper() == 'JSON':
yield self.build_request(resource['url'], formatter=components(-2))

0 comments on commit 66a63af

Please sign in to comment.