From 4d5bdff2b8b1b77917860cfc9ec4590d29d03156 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 17 Sep 2008 16:19:46 +0200 Subject: [PATCH] Added experimental and undocumented bytecode cache support --HG-- branch : trunk --- jinja2/bccache.py | 156 ++++++++++++++++++++++++++++++++++++++++++ jinja2/environment.py | 15 +++- jinja2/ext.py | 5 +- jinja2/loaders.py | 15 +++- 4 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 jinja2/bccache.py diff --git a/jinja2/bccache.py b/jinja2/bccache.py new file mode 100644 index 000000000..50532cdf2 --- /dev/null +++ b/jinja2/bccache.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +""" + jinja2.bccache + ~~~~~~~~~~~~~~ + + This module implements the bytecode cache system Jinja is optionally + using. This is useful if you have very complex template situations and + the compiliation of all those templates slow down your application too + much. + + Situations where this is useful are often forking web applications that + are initialized on the first request. + + :copyright: Copyright 2008 by Armin Ronacher. + :license: BSD. +""" +from os import path +import marshal +import cPickle as pickle +from cStringIO import StringIO +try: + from hashlib import sha1 +except ImportError: + from sha import new as sha1 + + +bc_version = 1 +bc_magic = 'j2' + pickle.dumps(bc_version, 2) + + +class Bucket(object): + """Buckets are used to store the bytecode for one template. It's + initialized by the bytecode cache with the checksum for the code + as well as the unique key. + + The bucket then provides method to load the bytecode from file(-like) + objects and strings or dump it again. + """ + + def __init__(self, cache, environment, key, checksum): + self._cache = cache + self.environment = environment + self.key = key + self.checksum = checksum + self.reset() + + def reset(self): + """Resets the bucket (unloads the code).""" + self.code = None + + def load(self, f): + """Loads bytecode from a f.""" + # make sure the magic header is correct + magic = f.read(len(bc_magic)) + if magic != bc_magic: + self.reset() + return + # the source code of the file changed, we need to reload + checksum = pickle.load(f) + if self.checksum != checksum: + self.reset() + return + # now load the code. Because marshal is not able to load + # from arbitrary streams we have to work around that + if isinstance(f, file): + self.code = marshal.load(f) + else: + self.code = marshal.loads(f.read()) + + def dump(self, f): + """Dump the bytecode into f.""" + if self.code is None: + raise TypeError('can\'t write empty bucket') + f.write(bc_magic) + pickle.dump(self.checksum, f, 2) + if isinstance(f, file): + marshal.dump(self.code, f) + else: + f.write(marshal.dumps(self.code)) + + def loads(self, string): + """Load bytecode from a string.""" + self.load(StringIO(string)) + + def dumps(self): + """Return the bytecode as string.""" + out = StringIO() + self.dump(out) + return out.getvalue() + + def write_back(self): + """Write the bucket back to the cache.""" + self._cache.dump_bucket(self) + + +class BytecodeCache(object): + """To implement your own bytecode cache you have to subclass this class + and override :meth:`load_bucket` and :meth:`dump_bucket`. Both of these + methods are passed a :class:`Bucket` that they have to load or dump. + """ + + def load_bucket(self, bucket): + """Subclasses have to override this method to load bytecode + into a bucket. + """ + raise NotImplementedError() + + def dump_bucket(self, bucket): + """Subclasses have to override this method to write the + bytecode from a bucket back to the cache. + """ + raise NotImplementedError() + + def get_cache_key(self, name): + """Return the unique hash key for this template name.""" + return sha1(name.encode('utf-8')).hexdigest() + + def get_source_checksum(self, source): + """Return a checksum for the source.""" + return sha1(source.encode('utf-8')).hexdigest() + + def get_bucket(self, environment, name, source): + """Return a cache bucket.""" + key = self.get_cache_key(name) + checksum = self.get_source_checksum(source) + bucket = Bucket(self, environment, key, checksum) + self.load_bucket(bucket) + return bucket + + +class FileSystemCache(BytecodeCache): + """A bytecode cache that stores bytecode on the filesystem.""" + + def __init__(self, directory, pattern='%s.jbc'): + self.directory = directory + self.pattern = pattern + + def _get_cache_filename(self, bucket): + return path.join(self.directory, self.pattern % bucket.key) + + def load_bucket(self, bucket): + filename = self._get_cache_filename(bucket) + if path.exists(filename): + f = file(filename, 'rb') + try: + bucket.load(f) + finally: + f.close() + + def dump_bucket(self, bucket): + filename = self._get_cache_filename(bucket) + f = file(filename, 'wb') + try: + bucket.dump(f) + finally: + f.close() diff --git a/jinja2/environment.py b/jinja2/environment.py index 00e3646db..5b77d45c9 100644 --- a/jinja2/environment.py +++ b/jinja2/environment.py @@ -155,6 +155,11 @@ class Environment(object): requested the loader checks if the source changed and if yes, it will reload the template. For higher performance it's possible to disable that. + + `bytecode_cache` + If set to a bytecode cache object, this object will provide a + cache for the internal Jinja bytecode so that templates don't + have to be parsed if they were not changed. """ #: if this environment is sandboxed. Modifying this variable won't make @@ -189,7 +194,8 @@ def __init__(self, autoescape=False, loader=None, cache_size=50, - auto_reload=True): + auto_reload=True, + bytecode_cache=None): # !!Important notice!! # The constructor accepts quite a few arguments that should be # passed by keyword rather than position. However it's important to @@ -225,7 +231,9 @@ def __init__(self, # set the loader provided self.loader = loader + self.bytecode_cache = None self.cache = create_cache(cache_size) + self.bytecode_cache = bytecode_cache self.auto_reload = auto_reload # load extensions @@ -248,7 +256,8 @@ def overlay(self, block_start_string=missing, block_end_string=missing, line_statement_prefix=missing, trim_blocks=missing, extensions=missing, optimized=missing, undefined=missing, finalize=missing, autoescape=missing, loader=missing, - cache_size=missing, auto_reload=missing): + cache_size=missing, auto_reload=missing, + bytecode_cache=missing): """Create a new overlay environment that shares all the data with the current environment except of cache and the overriden attributes. Extensions cannot be removed for a overlayed environment. A overlayed @@ -497,7 +506,7 @@ def __new__(cls, source, variable_end_string, comment_start_string, comment_end_string, line_statement_prefix, trim_blocks, newline_sequence, frozenset(extensions), optimized, undefined, finalize, - autoescape, None, 0, False) + autoescape, None, 0, False, None) return env.from_string(source, template_class=cls) @classmethod diff --git a/jinja2/ext.py b/jinja2/ext.py index a666d77d2..353f2654d 100644 --- a/jinja2/ext.py +++ b/jinja2/ext.py @@ -435,8 +435,9 @@ def babel_extract(fileobj, keywords, comment_tags, options): # fill with defaults so that environments are shared # with other spontaneus environments. The rest of the # arguments are optimizer, undefined, finalize, autoescape, - # loader, cache size and auto reloading setting - True, Undefined, None, False, None, 0, False + # loader, cache size, auto reloading setting and the + # bytecode cache + True, Undefined, None, False, None, 0, False, None ) source = fileobj.read().decode(options.get('encoding', 'utf-8')) diff --git a/jinja2/loaders.py b/jinja2/loaders.py index e964fdcf7..662425c17 100644 --- a/jinja2/loaders.py +++ b/jinja2/loaders.py @@ -89,7 +89,20 @@ def load(self, environment, name, globals=None): if globals is None: globals = {} source, filename, uptodate = self.get_source(environment, name) - code = environment.compile(source, name, filename) + + code = bucket = None + if environment.bytecode_cache is not None: + bucket = environment.bytecode_cache.get_bucket(environment, name, + source) + code = bucket.code + + if code is None: + code = environment.compile(source, name, filename) + + if bucket and bucket.code is None: + bucket.code = code + bucket.write_back() + return environment.template_class.from_code(environment, code, globals, uptodate)