Skip to content

Commit

Permalink
test: Upgrade ocdskit. Test for ocid presence in merge warnings.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jun 13, 2023
1 parent 7fee0a9 commit 6236f5f
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 6 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -95,7 +95,7 @@ lxml==4.9.2
# scrapy
ocdsextensionregistry==0.2.2
# via ocdskit
ocdskit==1.1.6
ocdskit==1.1.7
# via -r requirements.in
ocdsmerge==0.6.4
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt.sha256
@@ -1 +1 @@
a2bcc63876e1476cb6303aa1c82b57b4d54b1bcb372d025f2a01827918bb3221 requirements.txt
acf1c2a3ac73d374efb3e47f304a53944145d03b393fe32a9b4550a79d5310f2 requirements.txt
2 changes: 1 addition & 1 deletion requirements_dev.txt
Expand Up @@ -165,7 +165,7 @@ ocdsextensionregistry==0.2.2
# via
# -r requirements.txt
# ocdskit
ocdskit==1.1.6
ocdskit==1.1.7
# via -r requirements.txt
ocdsmerge==0.6.4
# via
Expand Down
54 changes: 51 additions & 3 deletions tests/extensions/test_database_store.py
Expand Up @@ -5,6 +5,7 @@

import psycopg2
import pytest
from ocdsmerge.exceptions import DuplicateIdValueWarning
from scrapy.exceptions import NotConfigured

from kingfisher_scrapy.extensions import DatabaseStore, FilesStore
Expand Down Expand Up @@ -66,7 +67,7 @@ def test_spider_opened_no_resume(cursor, caplog, tmpdir, from_date, default_from

assert table_exists == 'test'
assert spider.from_date == from_date
assert [record.message for record in caplog.records][-5:] == messages
assert [record.message for record in caplog.records] == messages


@pytest.mark.skipif(SKIP_TEST_IF, reason='KINGFISHER_COLLECT_DATABASE_URL must be set')
Expand Down Expand Up @@ -94,12 +95,59 @@ def test_spider_opened_resume(caplog, tmpdir):
extension.spider_opened(spider)

assert spider.from_date == datetime(2021, 5, 26, 0, 0)
assert [record.message for record in caplog.records][-5:] == [
assert [record.message for record in caplog.records] == [
'Getting the date from which to resume the crawl from the test table',
'Resuming the crawl from 2021-05-26',
]


@pytest.mark.skipif(SKIP_TEST_IF, reason='KINGFISHER_COLLECT_DATABASE_URL must be set')
def test_spider_closed_warnings(caplog, tmpdir):
spider = spider_with_crawler(crawl_time='2021-05-25T00:00:00',
settings={'DATABASE_URL': DATABASE_URL, 'FILES_STORE': tmpdir})
spider.data_type = 'release_package'
spider.compile_releases = True

extension = DatabaseStore.from_crawler(spider.crawler)

files_store_extension = FilesStore.from_crawler(spider.crawler)

response = Mock()
response.body = b'{"releases":[{"ocid":"x","date":"2021-05-26T10:00:00Z","parties":[{"id":"x"},{"id":"x"}]}]}'
response.request = Mock()
response.request.url = 'https://example.com/remote.json'
response.request.meta = {'file_name': 'file.json'}
item = spider.build_file_from_response(response, file_name='file-x.json', data_type='release_package')
files_store_extension.item_scraped(item, spider)

response.body = b'{"releases":[{"ocid":"y","date":"2021-05-26T10:00:00Z","parties":[{"id":"y"}]}]}'
item = spider.build_file_from_response(response, file_name='file-y.json', data_type='release_package')
files_store_extension.item_scraped(item, spider)

response.body = b'{"releases":[{"ocid":"z","date":"2021-05-26T10:00:00Z","parties":[{"id":"z"},{"id":"z"}]}]}'
item = spider.build_file_from_response(response, file_name='file-z.json', data_type='release_package')
files_store_extension.item_scraped(item, spider)

extension.spider_opened(spider)
caplog.clear()

with pytest.warns(DuplicateIdValueWarning) as records:
with caplog.at_level(logging.INFO):
extension.spider_closed(spider, 'finished')

assert spider.from_date == datetime(2021, 5, 26, 0, 0)
assert [record.message for record in caplog.records] == [
f'Reading the {tmpdir}/test/20210525_000000 crawl directory with the empty prefix',
'Creating generator of compiled releases',
f'Writing the JSON data to the {tmpdir}/test/20210525_000000/data.csv CSV file',
'Replacing the JSON data in the test table',
]
assert [record.message for record in records] == [
("x: Multiple objects have the `id` value 'x' in the `parties` array"),
("z: Multiple objects have the `id` value 'z' in the `parties` array"),
]


@pytest.mark.skipif(SKIP_TEST_IF, reason='KINGFISHER_COLLECT_DATABASE_URL must be set')
@pytest.mark.parametrize('data,data_type,sample,compile_releases', [
(b'{"releases": [{"date": "2021-05-26T10:00:00Z"}]}', 'release_package', None, False),
Expand Down Expand Up @@ -158,7 +206,7 @@ def test_spider_closed(cursor, caplog, tmpdir, data, data_type, sample, compile_
]
if compile_releases:
expected_messages.insert(1, 'Creating generator of compiled releases')
assert [record.message for record in caplog.records][-5:] == expected_messages
assert [record.message for record in caplog.records] == expected_messages


@pytest.mark.skipif(SKIP_TEST_IF, reason='KINGFISHER_COLLECT_DATABASE_URL must be set')
Expand Down

0 comments on commit 6236f5f

Please sign in to comment.