Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Use WebError Reporter objects for exception logging.
Browse files Browse the repository at this point in the history
This brings log_q reporting and logging-based reporting (syslog)
together into one place as WebOb error reporters.
  • Loading branch information
spladug committed Feb 14, 2013
1 parent 3657de5 commit 775c795
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 65 deletions.
32 changes: 30 additions & 2 deletions r2/example.ini
Expand Up @@ -27,8 +27,6 @@ reload_templates = true
uncompressedJS = true
# enable/disable verbose SQL printing
sqlprinting = false
# enable/disable writing errors as they occur to a rabbit-mq queue
exception_logging = false
# whether to print a "reddit app started" message at start"
log_start = true
# enable/disable logging for amqp/rabbitmq
Expand All @@ -39,6 +37,11 @@ read_only_mode = false
heavy_load_mode = false
# directory to write cProfile stats dumps to (disabled if not set)
profile_directory =
# exception reporter objects to give to ErrorMiddleware (see log.py)
error_reporters =
# should we force a re-parse of this file with logging.config.fileConfig?
# older versions of uwsgi (<1.1) didn't do this step so we'd never log!
old_uwsgi_load_logging_config = false

# the site's tagline, used in the title and description
short_description = open source is awesome
Expand Down Expand Up @@ -500,6 +503,31 @@ beaker.session_secret = somesecret
# execute malicious code after an exception is raised.
#set debug = false

[loggers]
keys = root, pycountry

[logger_root]
level = WARNING
handlers = console

[logger_pycountry]
level = CRITICAL
qualname = pycountry.db
handlers =

[handlers]
keys = console

[handler_console]
class = StreamHandler
args = (sys.stdout,)

[formatters]
keys = reddit

[formatter_reddit]
format = %(message)s

# the following configuration section makes up the "live" config. if zookeeper
# is enabled, then this configuration will be found by the app in zookeeper. to
# write it to zookeeper, use the writer script: scripts/write_live_config.
Expand Down
43 changes: 34 additions & 9 deletions r2/r2/lib/app_globals.py
Expand Up @@ -32,6 +32,8 @@
import sys

from sqlalchemy import engine, event

import cssutils
import pytz

from r2.config import queues
Expand Down Expand Up @@ -143,7 +145,6 @@ class Globals(object):
'css_killswitch',
'db_create_tables',
'disallow_db_writes',
'exception_logging',
'disable_ratelimit',
'amqp_logging',
'read_only_mode',
Expand All @@ -157,6 +158,7 @@ class Globals(object):
'static_secure_pre_gzipped',
'trust_local_proxies',
'shard_link_vote_queues',
'old_uwsgi_load_logging_config',
],

ConfigValue.tuple: [
Expand Down Expand Up @@ -323,16 +325,39 @@ def setup(self):
else:
self.static_names = {}

#setup the logger
self.log = logging.getLogger('reddit')
self.log.addHandler(logging.StreamHandler())
# if we're a web app running on old uwsgi, force load the logging
# config from the file since uwsgi didn't do it for us
if not self.running_as_script and self.old_uwsgi_load_logging_config:
logging.config.fileConfig(self.config["__file__"])

# make python warnings go through the logging system
logging.captureWarnings(capture=True)

log = logging.getLogger('reddit')

# when we're a script (paster run) just set up super simple logging
if self.running_as_script:
log.setLevel(logging.INFO)
log.addHandler(logging.StreamHandler())

# if in debug mode, override the logging level to DEBUG
if self.debug:
self.log.setLevel(logging.DEBUG)
else:
self.log.setLevel(logging.INFO)
log.setLevel(logging.DEBUG)

# set log level for pycountry which is chatty
logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)
# attempt to figure out which pool we're in and add that to the
# LogRecords.
try:
with open("/etc/ec2_asg", "r") as f:
pool = f.read().strip()
# clean up the pool name since we're putting stuff after "-"
pool = pool.partition("-")[0]
except IOError:
pool = "reddit-app"
self.log = logging.LoggerAdapter(log, {"pool": pool})

# make cssutils use the real logging system
csslog = logging.getLogger("cssutils")
cssutils.log.setLog(csslog)

if not self.media_domain:
self.media_domain = self.domain
Expand Down
32 changes: 2 additions & 30 deletions r2/r2/lib/base.py
Expand Up @@ -20,38 +20,20 @@
# Inc. All Rights Reserved.
###############################################################################

import _pylibmc
import pycassa.pool
import sqlalchemy.exc

from pylons import c, g, request, session, config, response
from pylons.controllers import WSGIController
from pylons.i18n import N_, _, ungettext, get_lang
from webob.exc import HTTPException, status_map
from r2.lib.filters import spaceCompress, _force_unicode
from r2.lib.template_helpers import get_domain
from utils import storify, string2js, read_http_date
from r2.lib.log import log_exception
import r2.lib.db.thing
import r2.lib.lock

import re, hashlib
from urllib import quote
import urllib2
import sys


OPERATIONAL_EXCEPTIONS = (_pylibmc.MemcachedError,
r2.lib.db.thing.NotFound,
r2.lib.lock.TimeoutExpired,
sqlalchemy.exc.OperationalError,
sqlalchemy.exc.IntegrityError,
pycassa.pool.AllServersUnavailable,
pycassa.pool.NoConnectionAvailable,
pycassa.pool.MaximumRetryException,
)


#TODO hack
import logging
from r2.lib.utils import UrlParser, query_string
Expand Down Expand Up @@ -141,18 +123,8 @@ def __call__(self, environ, start_response):

request.environ['pylons.routes_dict']['action_name'] = action
request.environ['pylons.routes_dict']['action'] = handler_name

try:
res = WSGIController.__call__(self, environ, start_response)
except Exception as e:
if g.exception_logging and not isinstance(e, OPERATIONAL_EXCEPTIONS):
try:
log_exception(e, *sys.exc_info())
except Exception as f:
print "log_exception() freaked out: %r" % f
print "sorry for breaking the stack trace:"
raise
return res

return WSGIController.__call__(self, environ, start_response)

def pre(self): pass
def post(self): pass
Expand Down
6 changes: 3 additions & 3 deletions r2/r2/lib/cloudsearch.py
Expand Up @@ -421,8 +421,8 @@ def xml_from_things(self):
if not self.use_safe_get:
raise
else:
g.log.warn("Ignoring problem on thing %r.\n\n%r",
thing, e)
g.log.warning("Ignoring problem on thing %r.\n\n%r",
thing, e)
return batch

def should_index(self, thing):
Expand Down Expand Up @@ -912,7 +912,7 @@ def _run_cached(cls, query, bq, sort="relevance", faceting=None, start=0,

warnings = response['info'].get('messages', [])
for warning in warnings:
g.log.warn("%(code)s (%(severity)s): %(message)s" % warning)
g.log.warning("%(code)s (%(severity)s): %(message)s" % warning)

hits = response['hits']['found']
docs = [doc['id'] for doc in response['hits']['hit']]
Expand Down
87 changes: 66 additions & 21 deletions r2/r2/lib/log.py
Expand Up @@ -21,11 +21,11 @@
###############################################################################

import cPickle
import traceback

from datetime import datetime

from pylons import g
from weberror.reporter import Reporter


QUEUE_NAME = 'log_q'
Expand All @@ -38,26 +38,6 @@ def _default_dict():
pid=g.reddit_pid)


# e_value and e should actually be the same thing.
# e_type is the just the type of e_value
# So e and e_traceback are the interesting ones.
def log_exception(e, e_type, e_value, e_traceback):
"""Send an exception to log_q for error reporting."""

from r2.lib import amqp

d = _default_dict()

d['type'] = 'exception'
d['traceback'] = traceback.extract_tb(e_traceback)

d['exception_type'] = e.__class__.__name__
s = str(e)
d['exception_desc'] = s[:10000]

amqp.add_item(QUEUE_NAME, cPickle.dumps(d))


def log_text(classification, text=None, level="info"):
"""Send some log text to log_q for appearance in the streamlog.
Expand All @@ -83,3 +63,68 @@ def log_text(classification, text=None, level="info"):
d['classification'] = classification

amqp.add_item(QUEUE_NAME, cPickle.dumps(d))


class LogQueueErrorReporter(Reporter):
"""ErrorMiddleware-compatible reporter that writes exceptions to log_q.
The log_q queue processor then picks these up, updates the /admin/errors
overview, and decides whether or not to send out emails about them.
"""

@staticmethod
def _operational_exceptions():
"""Get a list of exceptions caused by transient operational stuff.
These errors aren't terribly useful to track in /admin/errors because
they aren't directly bugs in the code but rather symptoms of
operational issues.
"""

import _pylibmc
import sqlalchemy.exc
import pycassa.pool
import r2.lib.db.thing
import r2.lib.lock

return (
_pylibmc.MemcachedError,
r2.lib.db.thing.NotFound,
r2.lib.lock.TimeoutExpired,
sqlalchemy.exc.OperationalError,
sqlalchemy.exc.IntegrityError,
pycassa.pool.AllServersUnavailable,
pycassa.pool.NoConnectionAvailable,
pycassa.pool.MaximumRetryException,
)

def report(self, exc_data):
from r2.lib import amqp

if issubclass(exc_data.exception_type, self._operational_exceptions()):
return

d = _default_dict()
d["type"] = "exception"
d["exception_type"] = exc_data.exception_type.__name__
d["exception_desc"] = exc_data.exception_value
# use the format that log_q expects; same as traceback.extract_tb
d["traceback"] = [(f.filename, f.lineno, f.name,
f.get_source_line().strip())
for f in exc_data.frames]

amqp.add_item(QUEUE_NAME, cPickle.dumps(d))


class LoggingErrorReporter(Reporter):
"""ErrorMiddleware-compatible reporter that writes exceptions to g.log."""

def report(self, exc_data):
text, extra = self.format_text(exc_data)
# TODO: send this all in one burst so that error reports aren't
# interleaved / individual lines aren't dropped. doing so will take
# configuration on the syslog side and potentially in apptail as well
for line in text.splitlines():
g.log.warning(line)

0 comments on commit 775c795

Please sign in to comment.