diff --git a/kingfisher_scrapy/extensions/files_store.py b/kingfisher_scrapy/extensions/files_store.py index 393ad917d..908924f82 100644 --- a/kingfisher_scrapy/extensions/files_store.py +++ b/kingfisher_scrapy/extensions/files_store.py @@ -35,6 +35,7 @@ def from_crawler(cls, crawler): extension = cls(directory) crawler.signals.connect(extension.item_scraped, signal=signals.item_scraped) + crawler.signals.connect(extension.spider_closed, signal=signals.spider_closed) return extension @@ -43,6 +44,10 @@ def spider_opened(self, spider): path = os.path.join(self.relative_crawl_directory(spider), 'scrapyd-job.txt') self._write_file(path, spider._job) + def spider_closed(self, spider): + path = os.path.join(self.directory, self.relative_crawl_directory(spider)) + spider.logger.info(f'The download data has been stored in this directory {path}') + def item_scraped(self, item, spider): """ If the item is a File or FileItem, writes its data to the filename in the crawl's directory. diff --git a/kingfisher_scrapy/settings.py b/kingfisher_scrapy/settings.py index 90f700d6d..a5d894ca4 100644 --- a/kingfisher_scrapy/settings.py +++ b/kingfisher_scrapy/settings.py @@ -160,6 +160,9 @@ # https://docs.scrapy.org/en/latest/topics/media-pipeline.html#std:setting-FILES_STORE FILES_STORE = os.getenv('FILES_STORE', 'data') +# https://docs.scrapy.org/en/latest/topics/stats.html#stats-collection +STATS_DUMP = os.getenv('KINGFISHER_STATS_DUMP', False) + # https://docs.scrapy.org/en/latest/topics/spider-middleware.html#httperror-allow-all HTTPERROR_ALLOW_ALL = True @@ -169,4 +172,4 @@ # Scrapyd won't have (and doesn't need) access to this module. if os.getenv('SCRAPY_PROJECT') is None: # https://docs.scrapy.org/en/latest/topics/commands.html#commands-module - COMMANDS_MODULE = 'kingfisher_scrapy.commands' + COMMANDS_MODULE = 'kingfisher_scrapy.commands' \ No newline at end of file