Skip to content

Commit

Permalink
Merge pull request #1081 from open-contracting/1080-kingfisher-proces…
Browse files Browse the repository at this point in the history
…s-update

feat: update kingfisher process URLs to match new structure
  • Loading branch information
yolile committed Apr 23, 2024
2 parents 064489d + b43cf91 commit 3d9fcb1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
12 changes: 7 additions & 5 deletions kingfisher_scrapy/extensions/kingfisher_process_api2.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,19 @@ def spider_opened(self, spider):
'source_id': spider.name,
'data_version': spider.get_start_time('%Y-%m-%d %H:%M:%S'),
'sample': bool(spider.sample),
'note': spider.kingfisher_process_note,
'job': getattr(spider, '_job', None),
'upgrade': spider.ocds_version == '1.0',
}

if spider.kingfisher_process_note:
data['note'] = spider.kingfisher_process_note
if hasattr(spider, '_job'):
data['job'] = spider._job

for step in spider.kingfisher_process_steps:
data[step] = True

# This request must be synchronous, to have the collection ID for the item_scraped handler.
response = self._post_synchronous(spider, 'api/v1/create_collection', data)
response = self._post_synchronous(spider, "/api/collections/", data)

if response.ok:
from twisted.internet import reactor
Expand Down Expand Up @@ -139,8 +142,7 @@ def spider_closed(self, spider, reason):
if spider.pluck or spider.kingfisher_process_keep_collection_open:
return

response = self._post_synchronous(spider, 'api/v1/close_collection', {
'collection_id': self.collection_id,
response = self._post_synchronous(spider, f'/api/collections/{self.collection_id}/close/', {
'reason': reason,
'stats': json.loads(json.dumps(self.stats.get_stats(), default=str)) # for datetime objects
})
Expand Down
16 changes: 10 additions & 6 deletions tests/extensions/test_kingfisher_process_api2.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,14 @@ def test_spider_opened(
expected = {
'source_id': 'test',
'sample': is_sample,
'note': note,
'job': job,
'upgrade': upgrade,
}

if note:
expected['note'] = note
if job:
expected['job'] = job

for step in expected_steps:
expected[step] = True

Expand All @@ -189,7 +193,7 @@ def test_spider_opened(
assert mock.call_count == call_count

data_version = calls[0].args[2].pop('data_version')
assert calls[0].args[1:] == ('api/v1/create_collection', expected)
assert calls[0].args[1:] == ('/api/collections/', expected)

if crawl_time:
assert data_version == '2020-01-01 00:00:00'
Expand All @@ -198,7 +202,7 @@ def test_spider_opened(

if call_count == 2:
calls[1].args[2].pop('stats') # pop() ensures its presence
assert calls[1].args[1:] == ('api/v1/close_collection', {'collection_id': 1, 'reason': 'finished'})
assert calls[1].args[1:] == ('/api/collections/1/close/', {'reason': 'finished'})

for levelname, message in messages:
assert any(r.name == 'test' and r.levelname == levelname and r.message == message for r in caplog.records)
Expand All @@ -208,7 +212,7 @@ def test_spider_opened(
@pytest_twisted.inlineCallbacks
def test_spider_closed_error(tmpdir, caplog):
# We can't mock disconnect_and_join(), etc. as it must run to isolate tests. That said, if the tests run, then
# we know the connection is closed and and the thread is terminated.
# we know the connection is closed and the thread is terminated.

create_response = Response(status_code=200, content={'collection_id': 1})
close_response = Response(status_code=500) # error
Expand All @@ -225,7 +229,7 @@ def test_spider_closed_error(tmpdir, caplog):
assert mock.call_count == 2

calls[1].args[2].pop('stats') # pop() ensures its presence
assert calls[1].args[1:] == ('api/v1/close_collection', {'collection_id': 1, 'reason': 'finished'})
assert calls[1].args[1:] == ('/api/collections/1/close/', {'reason': 'finished'})

assert any(
r.name == 'test' and r.levelname == 'ERROR' and r.message == 'Failed to close collection: HTTP 500 (null) ({})'
Expand Down

0 comments on commit 3d9fcb1

Please sign in to comment.