diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst index 1a9e975d832..819884ac214 100644 --- a/docs/topics/logging.rst +++ b/docs/topics/logging.rst @@ -10,7 +10,11 @@ logging`_ but this may change in the future. .. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html -The logging service must be explicitly started through the :func:`scrapy.log.start` function. +The logging service must be explicitly started through the +:func:`scrapy.log.start` function to catch the top level Scrapy's log messages. +On top of that, each crawler has its own independent log observer +(automatically attached when it's created) that intercepts its spider's log +messages. .. _topics-logging-levels: @@ -55,8 +59,11 @@ scrapy.log module .. function:: start(logfile=None, loglevel=None, logstdout=None) - Start the logging facility. This must be called before actually logging any - messages. Otherwise, messages logged before this call will get lost. + Start the top level Scrapy logger. This must be called before actually + logging any top level messages (those logged using this module's + :func:`~scrapy.log.msg` function instead of the :meth:`Spider.log + ` method). Otherwise, messages logged before this + call will get lost. :param logfile: the file path to use for logging output. If omitted, the :setting:`LOG_FILE` setting will be used. If both are ``None``, the log diff --git a/scrapy/crawler.py b/scrapy/crawler.py index 56823166bda..597bb2e9d57 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -128,6 +128,7 @@ def _signal_kill(self, signum, _): signame = signal_names[signum] log.msg(format='Received %(signame)s twice, forcing unclean shutdown', level=log.INFO, signame=signame) + self._stop_logging() reactor.callFromThread(self._stop_reactor) def start(self, stop_after_crawl=True): @@ -135,6 +136,7 @@ def start(self, stop_after_crawl=True): self._start_reactor(stop_after_crawl) def _start_logging(self): + self.log_observer = log.start_from_settings(self.settings) log.scrapy_info(self.settings) def _start_reactor(self, stop_after_crawl=True): @@ -149,6 +151,9 @@ def _start_reactor(self, stop_after_crawl=True): reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call + def _stop_logging(self): + self.log_observer.stop() + def _stop_reactor(self, _=None): try: reactor.stop() diff --git a/scrapy/log.py b/scrapy/log.py index 1f32003e7e3..aa53e357487 100644 --- a/scrapy/log.py +++ b/scrapy/log.py @@ -35,15 +35,16 @@ class ScrapyFileLogObserver(log.FileLogObserver): def __init__(self, f, level=INFO, encoding='utf-8', crawler=None): self.level = level self.encoding = encoding + self.crawler = crawler if crawler: - self.crawler = crawler self.emit = self._emit_with_crawler else: self.emit = self._emit log.FileLogObserver.__init__(self, f) def _emit(self, eventDict): - ev = _adapt_eventdict(eventDict, self.level, self.encoding) + ev = _adapt_eventdict(eventDict, self.level, self.encoding, + self.crawler) if ev is not None: log.FileLogObserver.emit(self, ev) return ev @@ -55,7 +56,8 @@ def _emit_with_crawler(self, eventDict): sname = 'log_count/%s' % level_names.get(level, level) self.crawler.stats.inc_value(sname) -def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True): +def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', + crawler=None, prepend_level=True): """Adapt Twisted log eventDict making it suitable for logging with a Scrapy log observer. It may return None to indicate that the event should be ignored by a Scrapy log observer. @@ -78,6 +80,12 @@ def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level= spider = ev.get('spider') if spider: ev['system'] = unicode_to_str(spider.name, encoding) + if crawler and (not spider or spider.crawler is not crawler): + # ignore events not triggered by own spiders in crawlers' observers + return + if not crawler and spider: + # ignore spiders' events in observers without crawler + return lvlname = level_names.get(level, 'NOLEVEL') message = ev.get('message') @@ -140,18 +148,14 @@ def start_from_settings(settings, crawler=None): settings['LOG_ENCODING'], crawler) def scrapy_info(settings): - log_observer = start_from_settings(settings) - if log_observer: - msg("Scrapy %s started (bot: %s)" % (scrapy.__version__, - settings['BOT_NAME'])) - - msg("Optional features available: %s" % ", ".join(scrapy.optional_features), - level=INFO) + msg("Scrapy %s started (bot: %s)" % (scrapy.__version__, + settings['BOT_NAME'])) - d = dict(overridden_settings(settings)) - msg(format="Overridden settings: %(settings)r", settings=d, level=INFO) + msg("Optional features available: %s" % ", ".join(scrapy.optional_features), + level=INFO) - log_observer.stop() + d = dict(overridden_settings(settings)) + msg(format="Overridden settings: %(settings)r", settings=d, level=INFO) def start_from_crawler(crawler): return start_from_settings(crawler.settings, crawler) diff --git a/tests/test_log.py b/tests/test_log.py index 3263cb42eee..113d1000451 100644 --- a/tests/test_log.py +++ b/tests/test_log.py @@ -6,6 +6,7 @@ from scrapy import log from scrapy.spider import Spider from scrapy.settings import default_settings +from scrapy.utils.test import get_crawler class LogTest(unittest.TestCase): @@ -40,10 +41,10 @@ def test_msg_basic(self): log.msg("Hello") self.assertEqual(self.logged(), "[scrapy] INFO: Hello") - def test_msg_spider(self): + def test_msg_ignore_spider(self): spider = Spider("myspider") log.msg("Hello", spider=spider) - self.assertEqual(self.logged(), "[myspider] INFO: Hello") + self.failIf(self.logged()) def test_msg_level1(self): log.msg("Hello", level=log.WARNING) @@ -57,11 +58,6 @@ def test_msg_wrong_level(self): log.msg("Hello", level=9999) self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello") - def test_msg_level_spider(self): - spider = Spider("myspider") - log.msg("Hello", spider=spider, level=log.WARNING) - self.assertEqual(self.logged(), "[myspider] WARNING: Hello") - def test_msg_encoding(self): log.msg(u"Price: \xa3100") self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100") @@ -133,5 +129,41 @@ def test_msg_encoding(self): # self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3") +class CrawlerScrapyFileLogObserverTest(unittest.TestCase): + + def setUp(self): + self.f = BytesIO() + self.crawler = get_crawler(Spider) + self.spider = self.crawler.spider = self.crawler._create_spider('test') + self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO, + 'utf-8', self.crawler) + self.log_observer.start() + + def tearDown(self): + self.flushLoggedErrors() + self.log_observer.stop() + + def logged(self): + return self.f.getvalue().strip()[25:] + + def test_msg_basic(self): + log.msg("Hello", spider=self.spider) + self.assertEqual(self.logged(), "[test] INFO: Hello") + + def test_msg_ignore_scrapy_channel(self): + log.msg("Hello") + self.failIf(self.logged()) + + def test_msg_ignore_another_crawler(self): + crawler = get_crawler(Spider) + log.msg("Hello", spider=crawler._create_spider('test')) + self.failIf(self.logged()) + + def test_msg_stats_log(self): + assert self.crawler.stats.get_value('log_count/INFO', 0) == 0 + log.msg("Hello", spider=self.spider) + self.assertEqual(self.crawler.stats.get_value('log_count/INFO'), 1) + + if __name__ == "__main__": unittest.main()