diff --git a/kingfisher_scrapy/extensions/kingfisher_process_api2.py b/kingfisher_scrapy/extensions/kingfisher_process_api2.py index b406473c..700d6c47 100644 --- a/kingfisher_scrapy/extensions/kingfisher_process_api2.py +++ b/kingfisher_scrapy/extensions/kingfisher_process_api2.py @@ -50,6 +50,8 @@ class KingfisherProcessAPI2: This extension ignores items generated by the :ref:`pluck` command. """ + base_url = "/api/collections" + def __init__(self, url, stats, rabbit_url, rabbit_exchange_name, rabbit_routing_key): self.url = url self.stats = stats @@ -93,16 +95,19 @@ def spider_opened(self, spider): 'source_id': spider.name, 'data_version': spider.get_start_time('%Y-%m-%d %H:%M:%S'), 'sample': bool(spider.sample), - 'note': spider.kingfisher_process_note, - 'job': getattr(spider, '_job', None), 'upgrade': spider.ocds_version == '1.0', } + if spider.kingfisher_process_note: + data['note'] = spider.kingfisher_process_note + if getattr(spider, '_job', None): + data['job'] = getattr(spider, '_job', None) + for step in spider.kingfisher_process_steps: data[step] = True # This request must be synchronous, to have the collection ID for the item_scraped handler. - response = self._post_synchronous(spider, 'api/v1/create_collection', data) + response = self._post_synchronous(spider, f"{self.base_url}/", data) if response.ok: from twisted.internet import reactor @@ -139,8 +144,7 @@ def spider_closed(self, spider, reason): if spider.pluck or spider.kingfisher_process_keep_collection_open: return - response = self._post_synchronous(spider, 'api/v1/close_collection', { - 'collection_id': self.collection_id, + response = self._post_synchronous(spider, f'{self.base_url}/{self.collection_id}/close/', { 'reason': reason, 'stats': json.loads(json.dumps(self.stats.get_stats(), default=str)) # for datetime objects }) diff --git a/tests/extensions/test_kingfisher_process_api2.py b/tests/extensions/test_kingfisher_process_api2.py index ff7d86d6..96f40e72 100644 --- a/tests/extensions/test_kingfisher_process_api2.py +++ b/tests/extensions/test_kingfisher_process_api2.py @@ -177,10 +177,14 @@ def test_spider_opened( expected = { 'source_id': 'test', 'sample': is_sample, - 'note': note, - 'job': job, 'upgrade': upgrade, } + + if note: + expected['note'] = note + if job: + expected['job'] = job + for step in expected_steps: expected[step] = True @@ -189,7 +193,7 @@ def test_spider_opened( assert mock.call_count == call_count data_version = calls[0].args[2].pop('data_version') - assert calls[0].args[1:] == ('api/v1/create_collection', expected) + assert calls[0].args[1:] == (f'{KingfisherProcessAPI2.base_url}/', expected) if crawl_time: assert data_version == '2020-01-01 00:00:00' @@ -198,7 +202,7 @@ def test_spider_opened( if call_count == 2: calls[1].args[2].pop('stats') # pop() ensures its presence - assert calls[1].args[1:] == ('api/v1/close_collection', {'collection_id': 1, 'reason': 'finished'}) + assert calls[1].args[1:] == (f'{KingfisherProcessAPI2.base_url}/1/close/', {'reason': 'finished'}) for levelname, message in messages: assert any(r.name == 'test' and r.levelname == levelname and r.message == message for r in caplog.records) @@ -208,7 +212,7 @@ def test_spider_opened( @pytest_twisted.inlineCallbacks def test_spider_closed_error(tmpdir, caplog): # We can't mock disconnect_and_join(), etc. as it must run to isolate tests. That said, if the tests run, then - # we know the connection is closed and and the thread is terminated. + # we know the connection is closed and the thread is terminated. create_response = Response(status_code=200, content={'collection_id': 1}) close_response = Response(status_code=500) # error @@ -225,7 +229,7 @@ def test_spider_closed_error(tmpdir, caplog): assert mock.call_count == 2 calls[1].args[2].pop('stats') # pop() ensures its presence - assert calls[1].args[1:] == ('api/v1/close_collection', {'collection_id': 1, 'reason': 'finished'}) + assert calls[1].args[1:] == (f'{KingfisherProcessAPI2.base_url}/1/close/', {'reason': 'finished'}) assert any( r.name == 'test' and r.levelname == 'ERROR' and r.message == 'Failed to close collection: HTTP 500 (null) ({})'