Skip to content

Commit

Permalink
Add Statsd facilities for better instrumentation
Browse files Browse the repository at this point in the history
Adds a stastd_client to the Context object to increase visibility
into Thumbor internals. For now, it is mostly insturmenting the
HTTP loader, and doing some logging in there.

I also added the HTTP_LOADER_CURL_ASYNC_HTTP_CLIENT config option
because the curl async client provides a lot more timing data
that is very useful to log.
  • Loading branch information
clifff committed Dec 23, 2014
1 parent 135865a commit 6a6dff3
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 3 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def run_setup(extension_modules=[]):
"derpconf>=0.2.0",
"python-magic>=0.4.3",
"thumbor-pexif>=0.14,<1.0",
"statsd>=3.0.1",
],

extras_require={
Expand Down
11 changes: 11 additions & 0 deletions thumbor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
'THUMBOR_LOG_DATE_FORMAT', '%Y-%m-%d %H:%M:%S',
'Date Format to be used by thumbor when writing log messages.', 'Logging')

Config.define(
'STATSD_HOST', None,
'Host to send statsd instrumentation to', 'Logging')

Config.define(
'STATSD_PREFIX', None,
'Prefix for statsd', 'Logging')

Config.define('MAX_WIDTH', 0, "Max width in pixels for images read or generated by thumbor", 'Imaging')
Config.define('MAX_HEIGHT', 0, "Max height in pixels for images read or generated by thumbor", 'Imaging')
Config.define('MIN_WIDTH', 1, "Min width in pixels for images read or generated by thumbor", 'Imaging')
Expand Down Expand Up @@ -131,6 +139,9 @@
Config.define(
'HTTP_LOADER_CLIENT_CERT', None,
'The filename for client SSL certificate', 'HTTP Loader')
Config.define(
'HTTP_LOADER_CURL_ASYNC_HTTP_CLIENT', False,
'If the CurlAsyncHTTPClient should be used', 'HTTP Loader')

# FILE STORAGE GENERIC OPTIONS
Config.define(
Expand Down
22 changes: 22 additions & 0 deletions thumbor/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,27 @@
from os.path import abspath, exists

from thumbor.filters import FiltersFactory
from thumbor.utils import logger
from thumbor.url import Url
import statsd

class ThumborStatsClient(statsd.StatsClient):

def __init__(self, config, host, port=8125, prefix=None, maxudpsize=512):
self.config = config
if config.STATSD_HOST:
self.enabled = True
else:
self.enabled = False
# Just setting this so we can initialize the client -
# we never send any data if enabled is false
host = 'localhost'
super(ThumborStatsClient, self).__init__(host, port, prefix)

def _send(self, data):
logger.debug("STATSD: %s", data)
if self.enabled:
super(ThumborStatsClient, self)._send(data)


class Context:
Expand All @@ -34,6 +54,8 @@ def __init__(self, server=None, config=None, importer=None, request_handler=None
self.modules = None
self.filters_factory = FiltersFactory(self.modules.filters if self.modules else [])
self.request_handler = request_handler
self.statsd_client = ThumborStatsClient(config, config.STATSD_HOST, 8125, prefix=config.STATSD_PREFIX)



class ServerParameters(object):
Expand Down
16 changes: 16 additions & 0 deletions thumbor/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,18 @@ def execute_image_operations(self):

should_store = self.context.config.RESULT_STORAGE_STORES_UNSAFE or not self.context.request.unsafe
if self.context.modules.result_storage and should_store:
start = datetime.datetime.now()
result = self.context.modules.result_storage.get()
finish = datetime.datetime.now()
self.context.statsd_client.timing('result_storage.incoming_time', (finish - start).total_seconds() * 1000 )
if result is None:
self.context.statsd_client.incr('result_storage.miss')
else:
self.context.statsd_client.incr('result_storage.hit')
self.context.statsd_client.incr('result_storage.bytes_read', len(result))

if result is not None:

mime = BaseEngine.get_mimetype(result)
if mime == '.gif' and self.context.config.USE_GIFSICLE_ENGINE:
self.context.request.engine = GifEngine(self.context)
Expand Down Expand Up @@ -240,7 +250,11 @@ def finish_request(self, context, result=None):

if should_store:
if context.modules.result_storage and not context.request.prevent_result_storage:
start = datetime.datetime.now()
context.modules.result_storage.put(results)
finish = datetime.datetime.now()
context.statsd_client.incr('result_storage.bytes_written', len(results))
context.statsd_client.timing('result_storage.outgoing_time', (finish - start).total_seconds() * 1000 )

def optimize(self, context, image_extension, results):
for optimizer in context.modules.optimizers:
Expand Down Expand Up @@ -315,6 +329,7 @@ def _fetch(self, url, extension, callback):
buffer = storage.get(url)

if buffer is not None:
self.context.statsd_client.incr('storage.hit')
mime = BaseEngine.get_mimetype(buffer)
if mime == '.gif' and self.context.config.USE_GIFSICLE_ENGINE:
self.context.request.engine = GifEngine(self.context)
Expand All @@ -323,6 +338,7 @@ def _fetch(self, url, extension, callback):

callback(False, buffer=buffer)
else:
self.context.statsd_client.incr('storage.miss')
def handle_loader_loaded(buffer):
if buffer is None:
callback(False, None)
Expand Down
12 changes: 9 additions & 3 deletions thumbor/loaders/http_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,25 @@ def validate(context, url):
return False


def return_contents(response, url, callback):
def return_contents(response, url, callback, context):
context.statsd_client.incr('original_image.status.' + str(response.code))
if response.error:
logger.warn("ERROR retrieving image {0}: {1}".format(url, str(response.error)))
callback(None)
elif response.body is None or len(response.body) == 0:
logger.warn("ERROR retrieving image {0}: Empty response.".format(url))
callback(None)
else:
if response.time_info:
for x in response.time_info:
context.statsd_client.timing('original_image.time_info.' + x, response.time_info[x] * 1000)
context.statsd_client.timing('original_image.time_info.bytes_per_second', len(response.body) / response.time_info['total'])
callback(response.body)


def load(context, url, callback):
if context.config.HTTP_LOADER_PROXY_HOST and context.config.HTTP_LOADER_PROXY_PORT:
using_proxy = context.config.HTTP_LOADER_PROXY_HOST and context.config.HTTP_LOADER_PROXY_PORT
if using_proxy or context.config.HTTP_LOADER_CURL_ASYNC_HTTP_CLIENT:
tornado.httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
client = tornado.httpclient.AsyncHTTPClient()

Expand Down Expand Up @@ -78,7 +84,7 @@ def load(context, url, callback):
client_cert=encode(context.config.HTTP_LOADER_CLIENT_CERT)
)

client.fetch(req, callback=partial(return_contents, url=url, callback=callback))
client.fetch(req, callback=partial(return_contents, url=url, callback=callback, context=context))

def encode(string):
return None if string is None else string.encode('ascii')

0 comments on commit 6a6dff3

Please sign in to comment.