Skip to content

Commit

Permalink
feat(italy_anac): set release ids as ocid when no ocid is present
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Nov 13, 2023
1 parent 360c7c4 commit 43fb860
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 10 deletions.
13 changes: 12 additions & 1 deletion kingfisher_scrapy/spiders/italy_anac.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import scrapy

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error
from kingfisher_scrapy.util import components, handle_http_error, json_dumps


class ItalyANAC(SimpleSpider):
Expand Down Expand Up @@ -30,3 +30,14 @@ def parse_list(self, response):
for resource in result['resources']:
if resource['format'].upper() == 'JSON':
yield self.build_request(resource['url'], formatter=components(-2))

@handle_http_error
def parse(self, response):
data = response.json()
for release in data['releases']:
# There are some releases without an ocid which causes Kingfisher process to fail. We use the release
# id, which has the ocds-hu01ve-7608611-01 format, as a fallback.
if 'ocid' not in release:
release['ocid'] = '-'.join(release['id'].split('-')[:3])
response = response.replace(body=json_dumps(data))
yield from super().parse(response)
9 changes: 7 additions & 2 deletions kingfisher_scrapy/spiders/paraguay_dncp_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.exceptions import AccessTokenError, MissingEnvVarError
from kingfisher_scrapy.util import (components, date_range_by_interval, handle_http_error, parameters,
replace_parameters)
from kingfisher_scrapy.util import (
components,
date_range_by_interval,
handle_http_error,
parameters,
replace_parameters,
)


class ParaguayDNCPBase(SimpleSpider):
Expand Down
11 changes: 9 additions & 2 deletions kingfisher_scrapy/spiders/ukraine.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import scrapy

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import (append_path_components, browser_user_agent, components, handle_http_error, join,
parameters, replace_parameters)
from kingfisher_scrapy.util import (
append_path_components,
browser_user_agent,
components,
handle_http_error,
join,
parameters,
replace_parameters,
)


class Ukraine(SimpleSpider):
Expand Down
12 changes: 9 additions & 3 deletions tests/test_spidermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@

from kingfisher_scrapy.base_spiders import CompressedFileSpider, SimpleSpider
from kingfisher_scrapy.items import File, FileError, FileItem
from kingfisher_scrapy.spidermiddlewares import (AddPackageMiddleware, ConcatenatedJSONMiddleware,
LineDelimitedMiddleware, ReadDataMiddleware, ResizePackageMiddleware,
RetryDataErrorMiddleware, RootPathMiddleware)
from kingfisher_scrapy.spidermiddlewares import (
AddPackageMiddleware,
ConcatenatedJSONMiddleware,
LineDelimitedMiddleware,
ReadDataMiddleware,
ResizePackageMiddleware,
RetryDataErrorMiddleware,
RootPathMiddleware,
)
from tests import response_fixture, spider_with_crawler


Expand Down
11 changes: 9 additions & 2 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
import pytest
import scrapy

from kingfisher_scrapy.util import (components, date_range_by_interval, get_parameter_value, handle_http_error, join,
parameters, replace_parameters)
from kingfisher_scrapy.util import (
components,
date_range_by_interval,
get_parameter_value,
handle_http_error,
join,
parameters,
replace_parameters,
)
from tests import spider_with_crawler


Expand Down

0 comments on commit 43fb860

Please sign in to comment.