Skip to content

Commit

Permalink
Respect log settings in custom_settings: fixes GH-1612
Browse files Browse the repository at this point in the history
A new root logger is installed when a crawler is created
if one was already installed before.
This allows to respect custom settings related to logging,
such as LOG_LEVEL, LOG_FILE, etc.
  • Loading branch information
lopuhin committed Mar 3, 2017
1 parent cac5119 commit ef04cfd
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 7 deletions.
9 changes: 7 additions & 2 deletions scrapy/crawler.py
Expand Up @@ -16,7 +16,9 @@
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
from scrapy.utils.misc import load_object
from scrapy.utils.log import LogCounterHandler, configure_logging, log_scrapy_info
from scrapy.utils.log import (
LogCounterHandler, configure_logging, log_scrapy_info,
get_scrapy_root_handler, install_scrapy_root_handler)
from scrapy import signals

logger = logging.getLogger(__name__)
Expand All @@ -35,8 +37,11 @@ def __init__(self, spidercls, settings=None):
self.signals = SignalManager(self)
self.stats = load_object(self.settings['STATS_CLASS'])(self)

handler = LogCounterHandler(self, level=settings.get('LOG_LEVEL'))
handler = LogCounterHandler(self, level=self.settings.get('LOG_LEVEL'))
logging.root.addHandler(handler)
if get_scrapy_root_handler() is not None:
# scrapy root handler alread installed: update it with new settings
install_scrapy_root_handler(self.settings)
# lambda is assigned to Crawler attribute because this way it is not
# garbage collected after leaving __init__ scope
self.__remove_handler = lambda: logging.root.removeHandler(handler)
Expand Down
22 changes: 19 additions & 3 deletions scrapy/utils/log.py
Expand Up @@ -96,9 +96,25 @@ def configure_logging(settings=None, install_root_handler=True):
sys.stdout = StreamLogger(logging.getLogger('stdout'))

if install_root_handler:
logging.root.setLevel(logging.NOTSET)
handler = _get_handler(settings)
logging.root.addHandler(handler)
install_scrapy_root_handler(settings)


def install_scrapy_root_handler(settings):
global _scrapy_root_handler

if (_scrapy_root_handler is not None
and _scrapy_root_handler in logging.root.handlers):
logging.root.removeHandler(_scrapy_root_handler)
logging.root.setLevel(logging.NOTSET)
_scrapy_root_handler = _get_handler(settings)
logging.root.addHandler(_scrapy_root_handler)


def get_scrapy_root_handler():
return _scrapy_root_handler


_scrapy_root_handler = None


def _get_handler(settings):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_crawl.py
Expand Up @@ -97,8 +97,8 @@ def test_retry_dns_error(self):

@defer.inlineCallbacks
def test_start_requests_bug_before_yield(self):
crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
with LogCapture('scrapy', level=logging.ERROR) as l:
crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
yield crawler.crawl(fail_before_yield=1)

self.assertEqual(len(l.records), 1)
Expand All @@ -108,8 +108,8 @@ def test_start_requests_bug_before_yield(self):

@defer.inlineCallbacks
def test_start_requests_bug_yielding(self):
crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
with LogCapture('scrapy', level=logging.ERROR) as l:
crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
yield crawler.crawl(fail_yielding=1)

self.assertEqual(len(l.records), 1)
Expand Down
46 changes: 46 additions & 0 deletions tests/test_crawler.py
@@ -1,10 +1,14 @@
import logging
import os
import tempfile
import warnings
import unittest

import scrapy
from scrapy.crawler import Crawler, CrawlerRunner, CrawlerProcess
from scrapy.settings import Settings, default_settings
from scrapy.spiderloader import SpiderLoader
from scrapy.utils.log import configure_logging, get_scrapy_root_handler
from scrapy.utils.spider import DefaultSpider
from scrapy.utils.misc import load_object
from scrapy.extensions.throttle import AutoThrottle
Expand Down Expand Up @@ -74,6 +78,48 @@ class MySpider(scrapy.Spider):
self.assertIn(AutoThrottle, enabled_exts)


class CrawlerLoggingTestCase(unittest.TestCase):
def test_no_root_handler_installed(self):
handler = get_scrapy_root_handler()
if handler is not None:
logging.root.removeHandler(handler)

class MySpider(scrapy.Spider):
name = 'spider'

crawler = Crawler(MySpider, {})
assert get_scrapy_root_handler() is None

def test_spider_custom_settings_log_level(self):
with tempfile.NamedTemporaryFile() as log_file:
class MySpider(scrapy.Spider):
name = 'spider'
custom_settings = {
'LOG_LEVEL': 'INFO',
'LOG_FILE': log_file.name,
}

configure_logging()
self.assertEqual(get_scrapy_root_handler().level, logging.DEBUG)
crawler = Crawler(MySpider, {})
self.assertEqual(get_scrapy_root_handler().level, logging.INFO)
info_count = crawler.stats.get_value('log_count/INFO')
logging.debug('debug message')
logging.info('info message')
logging.warning('warning message')
logging.error('error message')
logged = log_file.read().decode('utf8')
self.assertNotIn('debug message', logged)
self.assertIn('info message', logged)
self.assertIn('warning message', logged)
self.assertIn('error message', logged)
self.assertEqual(crawler.stats.get_value('log_count/ERROR'), 1)
self.assertEqual(crawler.stats.get_value('log_count/WARNING'), 1)
self.assertEqual(
crawler.stats.get_value('log_count/INFO') - info_count, 1)
self.assertEqual(crawler.stats.get_value('log_count/DEBUG', 0), 0)


class SpiderLoaderWithWrongInterface(object):

def unneeded_method(self):
Expand Down

0 comments on commit ef04cfd

Please sign in to comment.