Permalink
Browse files

Inital Version

  • Loading branch information...
0 parents commit f5e6fba44d5c4556e2025893fedb4f7333e7321e @symkat committed Sep 27, 2010
Showing with 316 additions and 0 deletions.
  1. +2 −0 ChangeLog
  2. +13 −0 INSTALL
  3. +33 −0 LICENSE
  4. +27 −0 README
  5. +8 −0 app.yaml
  6. +39 −0 hutils.py
  7. +11 −0 index.yaml
  8. +152 −0 main.py
  9. +31 −0 models.py
@@ -0,0 +1,2 @@
+Version 0.1 / September 29, 2010
+- Inital Version
13 INSTALL
@@ -0,0 +1,13 @@
+1. Learn about Google AppEngine:
+ http://code.google.com/appengine/docs/whatisgoogleappengine.html
+
+2. Learn about Deploying an application:
+ http://code.google.com/appengine/articles/domains.html
+
+3. Change app.yaml => "application: *replace*me*"
+
+4. Change the configuration in main.py (origin = http://replace*me/)
+ This should be changed to the domain you are mirroring.
+
+5. Deploy the application
+ Then check out http://yourappname.appspot.com/
33 LICENSE
@@ -0,0 +1,33 @@
+###############################################################################
+# SymPullCDN #
+# The latest version of this can be found at: #
+# #
+###############################################################################
+# Copyright (c) 2010 SymKat (symkat.com) #
+# #
+# Redistribution and use in source and binary forms, with or without #
+# modification, are permitted provided that the following conditions are #
+# met: #
+# #
+# * Redistributions of source code must retain the above copyright #
+# notice, this list of conditions, and the following disclaimer. #
+# * Redistributions in binary form must reproduce the above copyright #
+# notice, this list of conditions and the following disclaimer in the #
+# documentation and/or other materials provided with the distribution. #
+# * The name SymKat may not be used to endorse or promote #
+# products derived from this software without specific prior written #
+# permission. #
+###############################################################################
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" #
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE #
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE #
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR #
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF #
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS #
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN #
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) #
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE #
+# POSSIBILITY OF SUCH DAMAGE. #
+###############################################################################
+
27 README
@@ -0,0 +1,27 @@
+About:
+ SymPullCDN is a Google Application Engine program written
+ in Python with the intent to allow Google Application
+ Engine to function as a reverse caching proxy.
+
+ Its primary purpose is to allow the offloading of static
+ files to be served through Google Application Engine to
+ save bandwidth and processing time on my own servers.
+
+ SymPullCDN makes an effort to be HTTP compliant, however
+ some aspects of RFC 2616 have been ignored in the
+ development of it for one reason or another. If strict
+ compliance to the HTTP Specification is required, you may
+ want to take a look at Squid. On the other hand, this is
+ "Good Enough" for most situations.
+
+Known Bugs:
+ Some date formats provided by an origin HTTP server
+ may not be handled correctly. If you experience
+ this please see "Reporting Bugs".
+
+Reporting Bugs:
+ Please send stack traces, debug output, and steps
+ to reproduce the bug to me at symkat@symkat.com.
+
+ Please include the origin domain and URI.
+
@@ -0,0 +1,8 @@
+application: *replace*me*
+version: 1
+runtime: python
+api_version: 1
+
+handlers:
+- url: .*
+ script: main.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+import datetime
+import re
+
+# Compiled regexs
+find_n_max_age = re.compile( r"max-age=(\d+)", re.IGNORECASE )
+find_s_max_age = re.compile( r"s-maxage=(\d+)", re.IGNORECASE )
+
+# Given the headers from a request object find the time when the
+# entity must be refreshed in the cache.
+# Order of presidence:
+# 1. Cache-Control: s-maxage
+# 2. Cache-Control: max-age
+# 3. Now + ( Expires - Date )
+# 4. Set a default cache delta
+
+def get_expires( headers ):
+ if "Cache-Control" in headers:
+ s_maxage = find_s_max_age.match( headers["Cache-Control"] )
+ max_age = find_n_max_age.match( headers["Cache-Control"] )
+ if s_maxage:
+ return datetime.datetime.now() + datetime.timedelta(int(s_maxage.group(1)))
+ elif max_age:
+ return datetime.datetime.now() + datetime.timedelta(seconds=int(max_age.group(1)))
+
+ if "Expires" in headers:
+ h_expires = datetime.datetime.strptime( headers["Expires"], "%a, %d %b %Y %H:%M:%S GMT" )
+ h_date = datetime.datetime.strptime( headers["Date"], "%a, %d %b %Y %H:%M:%S GMT" )
+ delta = datetime.timedelta = h_expires - h_date
+ return datetime.datetime.now() + delta
+
+ return datetime.datetime.now() + datetime.timedelta( days=7 )
+
+def get_header( want, headers ):
+ if want in headers:
+ return headers[want]
+ else:
+ return None
@@ -0,0 +1,11 @@
+indexes:
+
+# AUTOGENERATED
+
+# This index.yaml is automatically updated whenever the dev_appserver
+# detects that a new type of query is run. If you want to manage the
+# index.yaml file manually, remove the above marker line (the line
+# saying "# AUTOGENERATED"). If you want to manage some indexes
+# manually, move them above the marker line. The index.yaml file is
+# automatically uploaded to the admin console when you next deploy
+# your application using appcfg.py.
152 main.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+#
+
+from google.appengine.ext import db
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp import util
+from google.appengine.api.urlfetch import fetch
+import datetime
+import models
+import hutils
+import re
+
+################################################################################
+# SymPullCDN Configuration #
+################################################################################
+# #
+# 1. Origin #
+# The origin server will be mirrored by this instance of SymPullCDN #
+# configure a full http:// path with a FQDN, trailing slash included #
+origin = "http://replace*me/" #
+# #
+# 2. Cachable Codes #
+# This is a list of HTTP Status Codes that will be cached when sent from #
+# the origin. By default only 200 OK codes will be cached. Edit this #
+# list only if you have a reason. #
+# #
+cache_codes = ( 200, ) #
+# #
+# #
+################################################################################
+
+
+
+
+
+# Compiled Regular Expressions
+no_cache_regex = re.compile( "(no-cache|no-store|private)", re.IGNORECASE )
+
+class Entity(db.Model):
+ uri = db.StringProperty(required=True)
+ LastModified = db.StringProperty()
+ headers = models.DictProperty()
+ expires = db.DateTimeProperty()
+ status = db.IntegerProperty()
+ content = db.BlobProperty(required=True)
+
+class MainHandler(webapp.RequestHandler):
+ def get(self):
+
+ ############################################################################################
+ # #
+ # Getting entity from cache, Passing to the user, possibly revalidating it #
+ # #
+ ############################################################################################
+
+ entity = Entity.all().filter("uri =", self.request.path).get()
+ if entity:
+ # Revalidate if required. Note, revalidation here updates the
+ # request /after/ this one for the given entity.
+ if entity.expires <= datetime.datetime.now():
+ request_entity = fetch( origin + self.request.path, method="GET",
+ headers={"If-Modified-Since" : entity.LastModified} )
+
+ # If 304 JUST update the headers.
+ if request_entity.status_code == 304:
+ headers = dict(request_entity.headers)
+ entity.expires = hutils.get_expires( request_entity.headers )
+ entity.LastModified = hutils.get_header( "Last-Modified", request_entity.headers )
+ entity.save()
+ # If 200, update the content too.
+ elif request_entity.status_code == 200:
+ headers = dict(request_entity.headers)
+ entity.expires = hutils.get_expires( request_entity.headers )
+ entity.LastModified = hutils.get_header( "Last-Modified", request_entity.headers )
+ entity.content = request_entity.content
+ entity.save()
+ #Revalidation failed, send the entity stale and delete from the cache.
+ else:
+ for key in iter(entity.headers):
+ self.response.headers[key] = entity.headers[key]
+ self.response.set_status(entity.status)
+ self.response.headers["X-SymPullCDN-Status"] = "Hit[EVALIDFAIL]"
+ self.response.out.write(entity.content)
+ entity.delete()
+ return True
+
+ # See if we can send a 304
+ if "If-Modified-Since" in self.request.headers:
+ if self.request.headers["If-Modified-Since"] == entity.LastModified:
+ for key in iter(entity.headers):
+ self.response.headers[key] = entity.headers[key]
+ self.response.set_status(304)
+ self.response.headers["X-SymPullCDN-Status"] = "Hit[304]"
+ self.response.out.write(None)
+ return True
+
+ for key in iter(entity.headers):
+ self.response.headers[key] = entity.headers[key]
+ self.response.set_status(entity.status)
+ self.response.headers["X-SymPullCDN-Status"] = "Hit[200]"
+ self.response.out.write(entity.content)
+ return True
+
+ ############################################################################################
+ # #
+ # Fetching The Entity, Passing it to the user, possibly storing it #
+ # #
+ ############################################################################################
+
+ request_entity = fetch( origin + self.request.path, method="GET", payload=None )
+
+ # Respect no-cache and private
+ if "Cache-Control" in request_entity.headers:
+ m = no_cache_regex.match( request_entity.headers["Cache-Control"] )
+ if m:
+ self.response.headers["X-SymPullCDN-Status"] = "Miss[NoCtrl]"
+ for key in iter(request_entity.headers):
+ self.response.headers[key] = request_entity.headers[key]
+ self.response.out.write(request_entity.content)
+ return True
+ # Only Cache Specific Codes
+ if request_entity.status_code not in cache_codes:
+ self.response.headers["X-SymPullCDN-Status"] = "Miss[NoCode]"
+ for key in iter(request_entity.headers):
+ self.response.headers[key] = request_entity.headers[key]
+ self.response.set_status(request_entity.status_code)
+ self.response.out.write(request_entity.content)
+ return True
+
+ # Set up data to store.
+ entity = Entity(
+ uri = self.request.path,
+ headers = dict(request_entity.headers),
+ expires = hutils.get_expires( request_entity.headers ),
+ LastModified = hutils.get_header( "Last-Modified", request_entity.headers ),
+ status = request_entity.status_code,
+ content = request_entity.content).save()
+
+ for key in iter(request_entity.headers):
+ self.response.headers[key] = request_entity.headers[key]
+ self.response.headers["X-SymPullCDN-Status"] = "Miss[Cached]"
+ self.response.out.write(request_entity.content)
+
+
+def main():
+ application = webapp.WSGIApplication([('/.*', MainHandler)],
+ debug=True)
+ util.run_wsgi_app(application)
+
+
+if __name__ == '__main__':
+ main()
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+from google.appengine.ext import db
+import pickle
+
+# Class taken from http://stackoverflow.com/questions/1953784
+class DictProperty(db.Property):
+ data_type = dict
+
+ def get_value_for_datastore(self, model_instance):
+ value = super(DictProperty, self).get_value_for_datastore(model_instance)
+ return db.Blob(pickle.dumps(value))
+
+ def make_value_from_datastore(self, value):
+ if value is None:
+ return dict()
+ return pickle.loads(value)
+
+ def default_value(self):
+ if self.default is None:
+ return dict()
+ else:
+ return super(DictProperty, self).default_value().copy()
+
+ def validate(self, value):
+ if not isinstance(value, dict):
+ raise db.BadValueError('Property %s needs to be convertible '
+ 'to a dict instance (%s) of class dict' % (self.name, value))
+ return super(DictProperty, self).validate(value)
+
+ def empty(self, value):
+ return value is None

0 comments on commit f5e6fba

Please sign in to comment.