In [1]:
# Reload logging for the Notebook

import importlib
import logging

importlib.reload(logging)

<module 'logging' from '/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/logging/__init__.py'>

# Structured logging in Python

### Rob Van Gennip

linkedin.com/in/ravangen

# Log Levels

- Debug
- Info
- Warning
- Error
- Critical

## Debug

Detailed internal state for diagnosing problems

## Info

Confirmation things are working as expected

## Warning

Something unexpected happened or indicative of a problem for the near future

## Error

Something is wrong, not able to perform an operation

## Critical

Something really bad happened, may be unable to continue running

# Core Logging Classes

- Loggers
- Handlers
- Filters
- Formatters

## Logger

- The interface that application code directly uses
- Named typically as a dot-separated hierarchy like `'a'`, `'a.b'` or `'a.b.c.d'`
- Retrieve an instance with `logging.getLogger(name)`
- Retrieving a logger same name returns a reference to the same logger

In [2]:
# Configure default behaviour to output all log levels to stdout

import logging
import sys

logging.basicConfig(
    stream=sys.stdout,
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s'
)

# https://docs.python.org/3/library/logging.html#logrecord-attributes
# %(asctime)s : Human-readable time when the LogRecord was created
# %(levelname)s : Text logging level for the message
# %(name)s : Name of the logger used to log the call
# %(message)s : The logged message

In [3]:
root_logger = logging.getLogger()  # no name provided

root_logger.info('Hello PyCon Canada!')

2017-11-11 12:08:31,659 - INFO - root - Hello PyCon Canada!


In [4]:
foo_bar_logger = logging.getLogger('foo.bar')

foo_bar_logger.info('Hello PyCon Canada!')

2017-11-11 12:08:31,669 - INFO - foo.bar - Hello PyCon Canada!


In [5]:
# Use __name__ to get the fully-qualified name of the module
# __name__ is '__main__' in an interactive prompt, script, or standard input

auto_named_logger = logging.getLogger(__name__)

auto_named_logger.info('Hello PyCon Canada!')

2017-11-11 12:08:31,677 - INFO - __main__ - Hello PyCon Canada!


## Handler

- Sends the log records to a destination
- `StreamHandler`: output to streams such as `stdout`, `stderr`
- `NullHandler`: no output
- `FileHandler`: output to a disk file
- `RotatingFileHandler`: rotates at max file size
- `TimedRotatingFileHandler`: interval based rotation
- `SmtpHandler`: send an email per record

## Filter

- Fine grained mechanism for determining which log records to output

## Formatter

- Specify how content of log records are transformed for output

In [6]:
server_logger = logging.getLogger('server')
server_logger.propagate = False  # do not propagate messages up logger hierarchy

log_format = '%(asctime)s - %(levelname)s - %(request_id)s - %(message)s'
formatter = logging.Formatter(log_format)

handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(formatter)
server_logger.addHandler(handler)

data = {
    'request_id': '90fca134-4468-41dc-bb26-4d087d61857b',
    'duration': 14,  # not output, not in log_format
}

server_logger.info('User logout', extra=data)

# NOTE: request_id must always be provided to 'server' logger
# If not provided, a KeyError will be raised by the format string

2017-11-11 12:08:31,702 - INFO - 90fca134-4468-41dc-bb26-4d087d61857b - User logout


### JsonFormatter

- Output data as JSON objects
- Stop writing custom parsers for syslog type records
- Easily machine parsable 
- For each argument in format string, output as key/value in object

In [7]:
from datetime import date, datetime, time
import json
import logging
import re
import six
import traceback

from collections import OrderedDict
from inspect import istraceback


# Skip natural LogRecord attributes
# http://docs.python.org/library/logging.html#logrecord-attributes
RESERVED_ATTRS = (
    'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
    'funcName', 'levelname', 'levelno', 'lineno', 'module',
    'msecs', 'message', 'msg', 'name', 'pathname', 'process',
    'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName',
)

RESERVED_ATTR_HASH = dict(zip(RESERVED_ATTRS, RESERVED_ATTRS))


def merge_record_extra(record, target, reserved=RESERVED_ATTR_HASH):
    """
    Merges extra attributes from LogRecord object into target dictionary
    :param record: logging.LogRecord
    :param target: dict to update
    :param reserved: dict or list with reserved keys to skip
    """
    for key, value in six.iteritems(record.__dict__):
        # support numeric keys
        if key not in reserved and not (hasattr(key, "startswith") and key.startswith('_')):
            target[key] = value
    return target


class JsonDataFormatter(logging.Formatter):  # originally based on https://github.com/madzak/python-json-logger
    """
    Format logging records as json strings. Unsupported json types will be converted to strings.
    """

    def __init__(self, *args, **kwargs):
        """
        :param json_default: a function for encoding non-standard objects as outlined in http://docs.python.org/2/library/json.html
        :param json_encoder: optional custom encoder
        """
        self.json_default = kwargs.pop('json_default', None)
        self.json_encoder = kwargs.pop('json_encoder', None)

        super(JsonDataFormatter, self).__init__(*args, **kwargs)

        if not self.json_encoder and not self.json_default:
            def _default_json_handler(obj):
                if isinstance(obj, (date, datetime, time)):
                    return obj.isoformat()
                elif istraceback(obj):
                    tb = six.text_type(''.join(traceback.format_tb(obj)))
                    return tb.strip()
                elif isinstance(obj, Exception):
                    return 'Exception: %s' % six.text_type(obj)
                return six.text_type(obj)

            self.json_default = _default_json_handler

        self._required_fields = self.parse()
        self._skip_fields = dict(zip(self._required_fields, self._required_fields))
        self._skip_fields.update(RESERVED_ATTR_HASH)

    def parse(self):
        """
        Parses format string looking for substitutions
 
        This method is responsible for returning a list of fields (as strings)
        to include in all log messages.
        """
        standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
        return standard_formatters.findall(self._fmt)

    def add_fields(self, log_data, record, message_dict):
        """
        Override this method to implement custom logic for adding fields.
        """
        for field in self._required_fields:
            log_data[field] = record.__dict__.get(field)
        log_data.update(message_dict)
        merge_record_extra(record, log_data, reserved=self._skip_fields)

    def process_log_data(self, log_data):
        """
        Override this method to implement custom logic.
        """
        return log_data

    def jsonify_log_data(self, log_data):
        """
        Returns a json string of the log data.
        """
        return json.dumps(log_data, default=self.json_default, cls=self.json_encoder)

    def format(self, record):
        """
        Formats a log record and serializes to json.
        """
        message_data = {}
        record.message = record.getMessage()

        # only format time if needed
        if "asctime" in self._required_fields:
            record.asctime = self.formatTime(record, self.datefmt)

        # Display formatted exception, but allow overriding it in the user-supplied dict.
        if record.exc_info and not message_data.get('exc_info'):
            message_data['exc_info'] = self.formatException(record.exc_info)

        log_data = OrderedDict()
        self.add_fields(log_data, record, message_data)  # collect all information together into log_data
        log_data = self.process_log_data(log_data)
        return self.jsonify_log_data(log_data)

In [8]:
api_logger = logging.getLogger('api')
api_logger.propagate = False  # do not propagate messages up logger hierarchy

log_format = '%(asctime)s %(levelname)s %(message)s'
formatter = JsonDataFormatter(log_format)

handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(formatter)
api_logger.addHandler(handler)

data = {
    # output despite not in log_format
    'request_id': '90fca134-4468-41dc-bb26-4d087d61857b',
    'duration': 14,
}

api_logger.info('User logout', extra=data)

{"asctime": "2017-11-11 12:08:32,099", "levelname": "INFO", "message": "User logout", "request_id": "90fca134-4468-41dc-bb26-4d087d61857b", "user": "ravangen", "duration": 14}


from logging import config
config.dictConfig({
    'version': 1,
    'disable_existing_loggers': True,
    'formatters': {
        'json': {
            '()': JsonDataFormatter,
            # for each argument in format, include its value in the output json (does not render content formatted as a single value)
            'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s',
        },
    },
    'handlers': {
        'console': {
            'level': 'DEBUG',
            'class': 'logging.StreamHandler',
            'formatter': 'json',
        },
    },
    'loggers': {
       
    },
    'root': {
        'handlers': ['console', ],
        'level': 'INFO',
    },
})

from logging import config
config.dictConfig({
    'version': 1,
    'formatters': {
        'defa': {
            '()': logging.Formatter,
            'format': '%(levelname)s %(asctime)s ... %(message)s',
        },
    },
    'handlers': {
        'console': {
            'level': 'DEBUG',
            'class': 'logging.StreamHandler',
        },
    },
    'loggers': {
        'api': {
            'level': 'WARNING',
            'handlers': ['console', ],
            'propagate': False,
        },
    },
})

api_logger = logging.getLogger('api')