Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com>
  • Loading branch information
yolile and jpmckinney committed May 26, 2021
1 parent 1244bd0 commit 6cd15ab
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __init__(self, sample=None, note=None, from_date=None, until_date=None, craw
self.release_pointer = release_pointer
self.truncate = int(truncate) if truncate else None

# DatabaseStore-related argument
# DatabaseStore-related argument.
self.compile = compile_releases == 'true'

self.query_string_parameters = {}
Expand Down
10 changes: 5 additions & 5 deletions kingfisher_scrapy/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,12 @@ def _write_file(self, path, data):

class DatabaseStore:
"""
If DATABASE_URL is set and crawl_time used, store the spider data in a PostgresSQL database, incrementally
If the ``DATABASE_URL`` Scrapy setting and the ``crawl_time`` spider argument are set, store the spider data in a
PostgresSQL database, incrementally.
A table with a \"data\" column is created if it doesn't exist, named after the spider. If the table isn't empty,
the crawl starts with the `from_date` spider argument set to the maximum value of the `date` field of the OCDS data
stored in the \"data\" column. If the spider returns records, each record must set the `compiledRelease` field."
A table with a "data" column is created if it doesn't exist, named after the spider. If the table isn't empty, the
crawl starts with the ``from_date`` spider argument set to the maximum value of the ``date`` field of the OCDS data
stored in the "data" column. If the spider returns records, each record must set the ``compiledRelease`` field.
"""

connection = None
Expand Down Expand Up @@ -188,7 +189,6 @@ def format_from_date(self, date, date_format, valid_formats):
return date[:4]

def spider_opened(self, spider):

if not spider.crawl_time:
self.data_use_error = 'The crawl_time argument must be set'

Expand Down

0 comments on commit 6cd15ab

Please sign in to comment.