Permalink
Browse files

Bug 747158 - kumascript: Optimize wiki.pageExists

* refresh_wiki_caches management command, which should be run from a
  crontab and will preload memcache with summary data useful for
  kumascript and possibly more in the future

* Fixes for encoding in kumascript response caching in Kuma

* Expose user agent Cache-Control header to templates, for future use in
  deciding whether or not to revalidate cached data

* kumascript JSON configs updated to use memcache
  • Loading branch information...
1 parent d7cf675 commit 807ba319cd6e2ca15e44367bd32bcd1b4fd26662 @lmorchard lmorchard committed Apr 20, 2012
No changes.
No changes.
@@ -0,0 +1,55 @@
+"""
+Refresh cached wiki data.
+
+Run this periodically, it's useful for preventing redundant traffic between
+Kuma and other services like Kumascript.
+"""
+import sys
+import time
+import datetime
+import hashlib
+import logging
+
+from optparse import make_option
+
+from django.conf import settings
+from django.core.cache import cache
+from django.contrib.auth.models import User
+from django.core.management.base import (BaseCommand, NoArgsCommand,
+ CommandError)
+
+from wiki.models import (Document, Revision)
+
+
+PAGE_EXISTS_KEY_TMPL = getattr(settings, 'wiki_page_exists_key_tmpl',
+ 'kuma:page_exists:%s')
+PAGE_EXISTS_TIMEOUT = getattr(settings, 'wiki_page_exists_timeout',
+ 86400)
+
+
+class Command(BaseCommand):
+
+ help = "Refresh cached wiki data"
+ option_list = BaseCommand.option_list + (
+ )
+
+ def handle(self, *args, **options):
+ self.options = options
+
+ logging.info("Querying all Documents...")
+ doc_cnt, doc_total = 0, Document.objects.count()
+ for doc in Document.objects.order_by('-modified').iterator():
+
+ # Give some indication of progress, occasionally
+ doc_cnt += 1
+ if (doc_cnt % 1000) == 0:
+ logging.info("(%s / %s) documents processed" %
+ (doc_cnt, doc_total))
+
+ # Get an MD5 hash of the lowercased path
+ path = doc.full_path.lower().encode('utf-8')
+ path_hash = hashlib.md5(path).hexdigest()
+
+ # Warm up the page_exists cache
+ key = PAGE_EXISTS_KEY_TMPL % path_hash
+ cache.set(key, 1, PAGE_EXISTS_TIMEOUT)
@@ -1,5 +1,5 @@
<div class="warning" id="kumascript-errors">
-<p>{{ _("There are scripting errors on this page:") }}</p>
+<p>{{ _("There are scripting messages on this page:") }}</p>
<ul>
{% for error in kumascript_errors %}
<li class="error error-{{ error.level }}">
View
@@ -378,6 +378,7 @@ def _perform_kumascript_request(request, response_headers, document,
slug=document.slug,
tags=[x.name for x in document.tags.all()],
modified=time.mktime(document.modified.timetuple()),
+ cache_control=cache_control,
)
# Encode the vars as kumascript headers, as base64 JSON-encoded values.
headers.update(dict(
@@ -396,7 +397,17 @@ def _perform_kumascript_request(request, response_headers, document,
resp = requests.get(url, headers=headers,
timeout=constance.config.KUMASCRIPT_TIMEOUT)
- if resp.status_code == 200:
+ if resp.status_code == 304:
+ # Conditional GET was a pass, so use the cached content.
+ c_result = cache.get_many([ck_body, ck_errors])
+ resp_body = c_result.get(ck_body, '').decode('utf-8')
+ resp_errors = c_result.get(ck_errors, None)
+
+ # Set a header so we can see what happened in caching.
+ response_headers['X-Kumascript-Caching'] = (
+ '304 Not Modified, Age: %s' % resp.headers.get('age', 0))
+
+ elif resp.status_code == 200:
# HACK: Assume we're getting UTF-8, which we should be.
# TODO: Better solution would be to upgrade the requests module
# in vendor from 0.6.1 to at least 0.10.6, and use resp.text,
@@ -448,22 +459,14 @@ def _perform_kumascript_request(request, response_headers, document,
# Cache the request for conditional GET, but use the max_age for
# the cache timeout here too.
- cache.set_many({
- ck_etag: resp.headers.get('etag'),
- ck_modified: resp.headers.get('last-modified'),
- ck_body: resp_body,
- ck_errors: resp_errors
- }, timeout=max_age)
-
- elif resp.status_code == 304:
- # Conditional GET was a pass, so use the cached content.
- c_result = cache.get_many([ck_body, ck_errors])
- resp_body = c_result.get(ck_body, None)
- resp_errors = c_result.get(ck_errors, None)
-
- # Set a header so we can see what happened in caching.
- response_headers['X-Kumascript-Caching'] = (
- '304 Not Modified, Age: %s' % resp.headers.get('age', 0))
+ cache.set(ck_etag, resp.headers.get('etag'),
+ timeout=max_age)
+ cache.set(ck_modified, resp.headers.get('last-modified'),
+ timeout=max_age)
+ cache.set(ck_body, resp_body.encode('utf-8'),
+ timeout=max_age)
+ if resp_errors:
+ cache.set(ck_errors, resp_errors, timeout=max_age)
elif resp.status_code == None:
resp_errors = [
@@ -10,6 +10,10 @@
"port": 9080,
"numWorkers": 4,
"workerTimeout": 10000,
+ "memcache": {
+ "server": {"127.0.0.1:11211": 1},
+ "options": { }
+ },
"document_url_template": "http://localhost/en-US/docs/{path}?raw=1",
"template_url_template": "http://localhost/en-US/docs/en-US/Template:{name}?raw=1",
"template_class": "EJSTemplate",
@@ -10,6 +10,10 @@
"port": 9080,
"numWorkers": 4,
"workerTimeout": 10000,
+ "memcache": {
+ "server": {"127.0.0.1:11211": 1},
+ "options": { }
+ },
"document_url_template": "http://localhost/en-US/docs/{path}?raw=1",
"template_url_template": "http://localhost/en-US/docs/en-US/Template:{name}?raw=1",
"template_class": "EJSTemplate",

0 comments on commit 807ba31

Please sign in to comment.