Skip to content

Commit

Permalink
feat(database_store): Use new convert_exceptions_to_warnings argument
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 8, 2024
1 parent 5a99e73 commit b462246
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 6 deletions.
16 changes: 13 additions & 3 deletions kingfisher_scrapy/extensions/database_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ijson
import psycopg2.sql
from ocdskit.combine import merge
from ocdskit.exceptions import MergeErrorWarning
from scrapy import signals
from scrapy.exceptions import NotConfigured

Expand Down Expand Up @@ -114,16 +115,25 @@ def spider_closed(self, spider, reason):
data,
force_version=spider.database_store_force_version,
ignore_version=spider.database_store_ignore_version,
convert_exceptions_to_warnings=True,
)

filename = os.path.join(crawl_directory, 'data.csv')
spider.logger.info('Writing the JSON data to the %s CSV file', filename)
count = 0
with open(filename, 'w') as f:
writer = csv.writer(f)
for item in data:
writer.writerow([util.json_dumps(item, ensure_ascii=False).replace(r'\u0000', '')])
count += 1
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", category=MergeErrorWarning)

for item in data:
writer.writerow([util.json_dumps(item, ensure_ascii=False).replace(r'\u0000', '')])
count += 1

logger.error(
"%d OCIDs can't be merged due to structural errors",
len(list(_ for _ in w if issubclass(warning.category, MergeErrorWarning)))
)

spider.logger.info('Replacing the JSON data in the %s table (%s rows)', table_name, count)
self.connection = psycopg2.connect(self.database_url)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ lxml==4.9.2
# scrapy
ocdsextensionregistry==0.2.2
# via ocdskit
ocdskit[perf]==1.1.12
ocdskit[perf]==1.1.13
# via -r requirements.in
ocdsmerge==0.6.6
# via ocdskit
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt.sha256
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ef31a10a64f9faaa049c269043e542fd9af14075b6e52449fcaae7781c344ff5 requirements.txt
7d5a501d8ad0320c4158ade883d999fd08c2a6a36fb32959e3bca0b7c594d83a requirements.txt
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ ocdsextensionregistry==0.2.2
# via
# -r requirements.txt
# ocdskit
ocdskit[perf]==1.1.12
ocdskit[perf]==1.1.13
# via -r requirements.txt
ocdsmerge==0.6.6
# via
Expand Down

0 comments on commit b462246

Please sign in to comment.