Skip to content

Commit

Permalink
spiders: print location of stored files
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravf95 committed Aug 3, 2022
1 parent fba225e commit 0e5c224
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
5 changes: 5 additions & 0 deletions kingfisher_scrapy/extensions/files_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def from_crawler(cls, crawler):

extension = cls(directory)
crawler.signals.connect(extension.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(extension.spider_closed, signal=signals.spider_closed)

return extension

Expand All @@ -43,6 +44,10 @@ def spider_opened(self, spider):
path = os.path.join(self.relative_crawl_directory(spider), 'scrapyd-job.txt')
self._write_file(path, spider._job)

def spider_closed(self, spider):
path = os.path.join(self.directory, self.relative_crawl_directory(spider))
spider.logger.info(f'The download data has been stored in this directory {path}')

def item_scraped(self, item, spider):
"""
If the item is a File or FileItem, writes its data to the filename in the crawl's directory.
Expand Down
5 changes: 4 additions & 1 deletion kingfisher_scrapy/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@
# https://docs.scrapy.org/en/latest/topics/media-pipeline.html#std:setting-FILES_STORE
FILES_STORE = os.getenv('FILES_STORE', 'data')

# https://docs.scrapy.org/en/latest/topics/stats.html#stats-collection
STATS_DUMP = os.getenv('KINGFISHER_STATS_DUMP', False)

# https://docs.scrapy.org/en/latest/topics/spider-middleware.html#httperror-allow-all
HTTPERROR_ALLOW_ALL = True

Expand All @@ -169,4 +172,4 @@
# Scrapyd won't have (and doesn't need) access to this module.
if os.getenv('SCRAPY_PROJECT') is None:
# https://docs.scrapy.org/en/latest/topics/commands.html#commands-module
COMMANDS_MODULE = 'kingfisher_scrapy.commands'
COMMANDS_MODULE = 'kingfisher_scrapy.commands'

0 comments on commit 0e5c224

Please sign in to comment.