Skip to content
This repository has been archived by the owner on Dec 5, 2018. It is now read-only.

Commit

Permalink
Adds server-side logging to spec (closes #35).
Browse files Browse the repository at this point in the history
  • Loading branch information
chuckharmston committed Apr 7, 2016
1 parent 977fded commit 92be246
Show file tree
Hide file tree
Showing 8 changed files with 501 additions and 0 deletions.
21 changes: 21 additions & 0 deletions recommendation/factory.py
@@ -1,8 +1,13 @@
from logging import INFO, StreamHandler
from sys import stdout

from celery import Celery
from flask import Flask

from recommendation import conf
from recommendation.cors import cors_headers
from recommendation.mozlog.formatter import MozLogFormatter
from recommendation.mozlog.middleware import request_timer, request_summary
from recommendation.views.dummy import dummy
from recommendation.views.main import main
from recommendation.views.static import static
Expand All @@ -11,14 +16,30 @@

def create_app():
app = Flask(__name__)

# Append CORS headers to each request.
app.after_request(cors_headers)

# Register views.
app.register_blueprint(main)
app.register_blueprint(static)
app.register_blueprint(status)

# Use a dummy data generator while Yahoo BOSS access is being sorted out.
app.register_blueprint(dummy)

# Log using the mozlog format to stdout.
handler = StreamHandler(stream=stdout)
handler.setFormatter(MozLogFormatter(logger_name='universalSearch'))
handler.setLevel(INFO)
app.logger_name = 'request.summary'
app.logger.addHandler(handler)
app.logger.setLevel(INFO)

# Use logging middleware.
app.before_request(request_timer)
app.after_request(request_summary)

app.config.update(
CELERY_BROKER_URL=conf.CELERY_BROKER_URL,
DEBUG=conf.DEBUG
Expand Down
Empty file.
109 changes: 109 additions & 0 deletions recommendation/mozlog/formatter.py
@@ -0,0 +1,109 @@
"""
Copied from
https://github.com/mozilla/testpilot/blob/master/testpilot/base/logging.py
"""
import logging
import json
import socket
import traceback


class MozLogFormatter(logging.Formatter):
"""Log formatter that outputs machine-readable json.
This log formatter outputs JSON format messages that are compatible with
Mozilla's standard heka-based log aggregation infrastructure.
See also:
https://mana.mozilla.org/wiki/display/CLOUDSERVICES/Logging+Standard
https://mana.mozilla.org/wiki/pages/viewpage.action?pageId=42895640
Adapted from:
https://github.com/mozilla-services/mozservices/blob/master/mozsvc/util.py#L106
"""
LOGGING_FORMAT_VERSION = "2.0"

# Map from Python logging to Syslog severity levels
SYSLOG_LEVEL_MAP = {
50: 2, # CRITICAL
40: 3, # ERROR
30: 4, # WARNING
20: 6, # INFO
10: 7, # DEBUG
}

# Syslog level to use when/if python level isn't found in map
DEFAULT_SYSLOG_LEVEL = 7

EXCLUDED_LOGRECORD_ATTRS = set((
'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
'funcName', 'levelname', 'levelno', 'lineno', 'module', 'msecs',
'message', 'msg', 'name', 'pathname', 'process', 'processName',
'relativeCreated', 'stack_info', 'thread', 'threadName'
))

def __init__(self, format=None, datefmt=None, style='%',
logger_name='TestPilot'):
super(MozLogFormatter, self).__init__(format, datefmt, style)
self.logger_name = logger_name
self.hostname = socket.gethostname()

def format(self, record):
"""
Map from Python LogRecord attributes to JSON log format fields
* from -
https://docs.python.org/3/library/logging.html#logrecord-attributes #
noqa
* to -
https://mana.mozilla.org/wiki/pages/viewpage.action?pageId=42895640 #
noqa
"""
out = dict(
Timestamp=int(record.created * 1e9),
Type=record.name,
Logger=self.logger_name,
Hostname=self.hostname,
EnvVersion=self.LOGGING_FORMAT_VERSION,
Severity=self.SYSLOG_LEVEL_MAP.get(record.levelno,
self.DEFAULT_SYSLOG_LEVEL),
Pid=record.process,
)

# Include any custom attributes set on the record.
# These would usually be collected metrics data.
fields = dict()
for key, value in record.__dict__.items():
if key not in self.EXCLUDED_LOGRECORD_ATTRS:
fields[key] = value

# Only include the 'message' key if it has useful content
# and is not already a JSON blob.
message = record.getMessage()
if message:
if not message.startswith("{") and not message.endswith("}"):
fields["message"] = message

# If there is an error, format it for nice output.
if record.exc_info is not None:
fields["error"] = repr(record.exc_info[1])
fields["traceback"] = safer_format_traceback(*record.exc_info)

out['Fields'] = fields

return json.dumps(out)


def safer_format_traceback(exc_typ, exc_val, exc_tb):
"""Format an exception traceback into safer string.
We don't want to let users write arbitrary data into our logfiles,
which could happen if they e.g. managed to trigger a ValueError with
a carefully-crafted payload. This function formats the traceback
using "%r" for the actual exception data, which passes it through repr()
so that any special chars are safely escaped.
"""
lines = ["Uncaught exception:\n"]
lines.extend(traceback.format_tb(exc_tb))
lines.append("%r\n" % (exc_typ,))
lines.append("%r\n" % (exc_val,))
return "".join(lines)
65 changes: 65 additions & 0 deletions recommendation/mozlog/middleware.py
@@ -0,0 +1,65 @@
import json
import re
import time

from flask import current_app, request


IS_PROTOCOL = r'^[^\s]+\:\S'
IS_HOSTNAME = r'^[^\s]+\.\S'


def request_timer():
"""
before_request middleware that attaches the processing start time to the
request object, for later performance assessment.
"""
request.start_time = time.time()


def request_summary(response):
"""
after_request middleware that generates and logs a mozlog-formatted log
about the request.
Read more:
https://github.com/mozilla/universal-search/blob/master/docs/metrics.md
https://github.com/mozilla-services/Dockerflow/blob/master/docs/mozlog.md
"""
request.finish_time = time.time()
response.direct_passthrough = False

log = {
'predicates': {}
}
query = request.args.get('q')
data = response.get_data(as_text=True)
try:
body = json.loads(data)
except json.decoder.JSONDecodeError:
body = {}

log['agent'] = request.headers.get('User-Agent')
log['errno'] = 0 if response.status_code < 400 else response.status_code
log['lang'] = request.headers.get('Accept-Language')
log['method'] = request.method
log['path'] = request.path
log['t'] = request.finish_time - request.start_time

if query:
log['predicates']['query_length'] = len(query) > 20
log['predicates']['is_protocol'] = (re.match(IS_PROTOCOL, query) is not
None)
log['predicates']['is_hostname'] = (re.match(IS_HOSTNAME, query) is not
None)

if not any(log['predicates'].values()):
log['query'] = query if query else None
log['status_code'] = response.status_code
classifiers = body.get('enhancements')
log['classifiers'] = list(classifiers.keys()) if classifiers else []

current_app.logger.info('', extra=log)

return response
1 change: 1 addition & 0 deletions recommendation/mozlog/tests/__init__.py
@@ -0,0 +1 @@
VERSION = "0.1.0"
168 changes: 168 additions & 0 deletions recommendation/mozlog/tests/test_formatter.py
@@ -0,0 +1,168 @@
"""
Copied from
https://github.com/mozilla/testpilot/blob/master/testpilot/base/tests.py
"""
import json
import logging
import os
from unittest import TestCase

from testfixtures import LogCapture
import jsonschema

from recommendation.mozlog.formatter import MozLogFormatter

logger = logging.getLogger(__name__)


class TestMozLogFormatter(TestCase):

def setUp(self):
self.handler = LogCapture()
self.logger_name = "TestingTestPilot"
self.formatter = MozLogFormatter(logger_name=self.logger_name)

def tearDown(self):
self.handler.uninstall()

def _fetchLastLog(self):
self.assertEquals(len(self.handler.records), 1)
details = json.loads(self.formatter.format(self.handler.records[0]))
jsonschema.validate(details, JSON_LOGGING_SCHEMA)
return details

def test_basic_operation(self):
"""Ensure log formatter contains all the expected fields and values"""
message_text = "simple test"
logging.debug(message_text)
details = self._fetchLastLog()

expected_present = ["Timestamp", "Hostname"]
for key in expected_present:
self.assertTrue(key in details)

expected_meta = {
"Severity": 7,
"Type": "root",
"Pid": os.getpid(),
"Logger": self.logger_name,
"EnvVersion": self.formatter.LOGGING_FORMAT_VERSION
}
for key, value in expected_meta.items():
self.assertEquals(value, details[key])

self.assertEquals(details['Fields']['message'], message_text)

def test_custom_paramters(self):
"""Ensure log formatter can handle custom parameters"""
logger = logging.getLogger("mozsvc.test.test_logging")
logger.warn("custom test %s", "one", extra={"more": "stuff"})
details = self._fetchLastLog()

self.assertEquals(details["Type"], "mozsvc.test.test_logging")
self.assertEquals(details["Severity"], 4)

fields = details['Fields']
self.assertEquals(fields["message"], "custom test one")
self.assertEquals(fields["more"], "stuff")

def test_logging_error_tracebacks(self):
"""Ensure log formatter includes exception traceback information"""
try:
raise ValueError("\n")
except Exception:
logging.exception("there was an error")
details = self._fetchLastLog()

expected_meta = {
"Severity": 3,
}
for key, value in expected_meta.items():
self.assertEquals(value, details[key])

fields = details['Fields']
expected_fields = {
'message': 'there was an error',
'error': "ValueError('\\n',)"
}
for key, value in expected_fields.items():
self.assertEquals(value, fields[key])

self.assertTrue(fields['traceback'].startswith('Uncaught exception:'))
self.assertTrue("<class 'ValueError'>" in fields['traceback'])


# https://mana.mozilla.org/wiki/pages/viewpage.action?pageId=42895640
JSON_LOGGING_SCHEMA = json.loads("""
{
"type":"object",
"required":["Timestamp"],
"properties":{
"Timestamp":{
"type":"integer",
"minimum":0
},
"Type":{
"type":"string"
},
"Logger":{
"type":"string"
},
"Hostname":{
"type":"string",
"format":"hostname"
},
"EnvVersion":{
"type":"string",
"pattern":"^\\d+(?:\\.\\d+){0,2}$"
},
"Severity":{
"type":"integer",
"minimum":0,
"maximum":7
},
"Pid":{
"type":"integer",
"minimum":0
},
"Fields":{
"type":"object",
"minProperties":1,
"additionalProperties":{
"anyOf": [
{ "$ref": "#/definitions/field_value"},
{ "$ref": "#/definitions/field_array"},
{ "$ref": "#/definitions/field_object"}
]
}
}
},
"definitions":{
"field_value":{
"type":["string", "number", "boolean"]
},
"field_array":{
"type":"array",
"minItems": 1,
"oneOf": [
{"items": {"type":"string"}},
{"items": {"type":"number"}},
{"items": {"type":"boolean"}}
]
},
"field_object":{
"type":"object",
"required":["value"],
"properties":{
"value":{
"oneOf": [
{ "$ref": "#/definitions/field_value" },
{ "$ref": "#/definitions/field_array" }
]
},
"representation":{"type":"string"}
}
}
}
}
""".replace("\\", "\\\\")) # HACK: Fix escaping for easy copy/paste

0 comments on commit 92be246

Please sign in to comment.