Skip to content

Commit

Permalink
Support multiple simultaneous LogObservers listening different crawlers
Browse files Browse the repository at this point in the history
  • Loading branch information
curita committed Aug 12, 2014
1 parent 89df18b commit 900a487
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 23 deletions.
13 changes: 10 additions & 3 deletions docs/topics/logging.rst
Expand Up @@ -10,7 +10,11 @@ logging`_ but this may change in the future.

.. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html

The logging service must be explicitly started through the :func:`scrapy.log.start` function.
The logging service must be explicitly started through the
:func:`scrapy.log.start` function to catch the top level Scrapy's log messages.
On top of that, each crawler has its own independent log observer
(automatically attached when it's created) that intercepts its spider's log
messages.

.. _topics-logging-levels:

Expand Down Expand Up @@ -55,8 +59,11 @@ scrapy.log module

.. function:: start(logfile=None, loglevel=None, logstdout=None)

Start the logging facility. This must be called before actually logging any
messages. Otherwise, messages logged before this call will get lost.
Start the top level Scrapy logger. This must be called before actually
logging any top level messages (those logged using this module's
:func:`~scrapy.log.msg` function instead of the :meth:`Spider.log
<scrapy.spider.Spider.log>` method). Otherwise, messages logged before this
call will get lost.

:param logfile: the file path to use for logging output. If omitted, the
:setting:`LOG_FILE` setting will be used. If both are ``None``, the log
Expand Down
5 changes: 5 additions & 0 deletions scrapy/crawler.py
Expand Up @@ -128,13 +128,15 @@ def _signal_kill(self, signum, _):
signame = signal_names[signum]
log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
level=log.INFO, signame=signame)
self._stop_logging()
reactor.callFromThread(self._stop_reactor)

def start(self, stop_after_crawl=True):
self._start_logging()
self._start_reactor(stop_after_crawl)

def _start_logging(self):
self.log_observer = log.start_from_settings(self.settings)
log.scrapy_info(self.settings)

def _start_reactor(self, stop_after_crawl=True):
Expand All @@ -149,6 +151,9 @@ def _start_reactor(self, stop_after_crawl=True):
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
reactor.run(installSignalHandlers=False) # blocking call

def _stop_logging(self):
self.log_observer.stop()

def _stop_reactor(self, _=None):
try:
reactor.stop()
Expand Down
30 changes: 17 additions & 13 deletions scrapy/log.py
Expand Up @@ -35,15 +35,16 @@ class ScrapyFileLogObserver(log.FileLogObserver):
def __init__(self, f, level=INFO, encoding='utf-8', crawler=None):
self.level = level
self.encoding = encoding
self.crawler = crawler
if crawler:
self.crawler = crawler
self.emit = self._emit_with_crawler
else:
self.emit = self._emit
log.FileLogObserver.__init__(self, f)

def _emit(self, eventDict):
ev = _adapt_eventdict(eventDict, self.level, self.encoding)
ev = _adapt_eventdict(eventDict, self.level, self.encoding,
self.crawler)
if ev is not None:
log.FileLogObserver.emit(self, ev)
return ev
Expand All @@ -55,7 +56,8 @@ def _emit_with_crawler(self, eventDict):
sname = 'log_count/%s' % level_names.get(level, level)
self.crawler.stats.inc_value(sname)

def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True):
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8',
crawler=None, prepend_level=True):
"""Adapt Twisted log eventDict making it suitable for logging with a Scrapy
log observer. It may return None to indicate that the event should be
ignored by a Scrapy log observer.
Expand All @@ -78,6 +80,12 @@ def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=
spider = ev.get('spider')
if spider:
ev['system'] = unicode_to_str(spider.name, encoding)
if crawler and (not spider or spider.crawler is not crawler):
# ignore events not triggered by own spiders in crawlers' observers
return
if not crawler and spider:
# ignore spiders' events in observers without crawler
return

lvlname = level_names.get(level, 'NOLEVEL')
message = ev.get('message')
Expand Down Expand Up @@ -140,18 +148,14 @@ def start_from_settings(settings, crawler=None):
settings['LOG_ENCODING'], crawler)

def scrapy_info(settings):
log_observer = start_from_settings(settings)
if log_observer:
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))

msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))

d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)

log_observer.stop()
d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)

def start_from_crawler(crawler):
return start_from_settings(crawler.settings, crawler)
46 changes: 39 additions & 7 deletions tests/test_log.py
Expand Up @@ -6,6 +6,7 @@
from scrapy import log
from scrapy.spider import Spider
from scrapy.settings import default_settings
from scrapy.utils.test import get_crawler

class LogTest(unittest.TestCase):

Expand Down Expand Up @@ -40,10 +41,10 @@ def test_msg_basic(self):
log.msg("Hello")
self.assertEqual(self.logged(), "[scrapy] INFO: Hello")

def test_msg_spider(self):
def test_msg_ignore_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider)
self.assertEqual(self.logged(), "[myspider] INFO: Hello")
self.failIf(self.logged())

def test_msg_level1(self):
log.msg("Hello", level=log.WARNING)
Expand All @@ -57,11 +58,6 @@ def test_msg_wrong_level(self):
log.msg("Hello", level=9999)
self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello")

def test_msg_level_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider, level=log.WARNING)
self.assertEqual(self.logged(), "[myspider] WARNING: Hello")

def test_msg_encoding(self):
log.msg(u"Price: \xa3100")
self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100")
Expand Down Expand Up @@ -133,5 +129,41 @@ def test_msg_encoding(self):
# self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3")


class CrawlerScrapyFileLogObserverTest(unittest.TestCase):

def setUp(self):
self.f = BytesIO()
self.crawler = get_crawler(Spider)
self.spider = self.crawler.spider = self.crawler._create_spider('test')
self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO,
'utf-8', self.crawler)
self.log_observer.start()

def tearDown(self):
self.flushLoggedErrors()
self.log_observer.stop()

def logged(self):
return self.f.getvalue().strip()[25:]

def test_msg_basic(self):
log.msg("Hello", spider=self.spider)
self.assertEqual(self.logged(), "[test] INFO: Hello")

def test_msg_ignore_scrapy_channel(self):
log.msg("Hello")
self.failIf(self.logged())

def test_msg_ignore_another_crawler(self):
crawler = get_crawler(Spider)
log.msg("Hello", spider=crawler._create_spider('test'))
self.failIf(self.logged())

def test_msg_stats_log(self):
assert self.crawler.stats.get_value('log_count/INFO', 0) == 0
log.msg("Hello", spider=self.spider)
self.assertEqual(self.crawler.stats.get_value('log_count/INFO'), 1)


if __name__ == "__main__":
unittest.main()

0 comments on commit 900a487

Please sign in to comment.