Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 7f5b3717ee27d452b5ace60f9a202bd51232f1dd Aaron Straup Cope committed Aug 12, 2009
@@ -0,0 +1,129 @@
+from google.appengine.ext import webapp
+from django.utils import simplejson
+from django.utils import html
+import types
+
+class APIApp (webapp.RequestHandler) :
+
+ def __init__ (self, default_format='xml') :
+
+ webapp.RequestHandler.__init__(self)
+
+ self.format = default_format
+ self.valid_formats = ('xml', 'json')
+
+ def ensure_args(self, required) :
+
+ for r in required :
+ if not self.request.get(r) :
+ self.api_error(1, 'required parameter missing: %s' % r)
+ return False
+
+ return True
+
+ def api_error (self, code=999, msg='INVISIBLE ERROR') :
+ out = {'stat' : 'fail', 'error' : { 'code' : code, 'message' : msg } }
+ self.send_rsp(out)
+
+ def api_ok (self, out={}) :
+ out['stat'] = 'ok'
+ self.send_rsp(out)
+
+ def send_rsp (self, data) :
+
+ format = self.request.get('format')
+
+ if format != '' and format in self.valid_formats :
+ self.format = format
+
+ rsp = self.serialize_rsp('rsp', data)
+
+ if self.format == 'json' :
+ return self.send_json(rsp)
+
+ return self.send_xml(rsp)
+
+ def send_json (self, json) :
+
+ # because it's my bloody toy. see also:
+ # http://simonwillison.net/2009/Feb/6/json/
+
+ type = 'text/plain'
+
+ if self.request.get('dtrt') :
+ type = 'application/json'
+
+ self.response.headers["Content-Type"] = type
+ self.response.out.write(json)
+
+ def send_xml (self, xml) :
+ self.response.headers["Content-Type"] = "text/xml"
+ self.response.out.write("<?xml version=\"1.0\" ?>")
+ self.response.out.write(xml)
+
+ def serialize_rsp (self, root, data) :
+
+ if self.format == 'json' :
+ return self.serialize_json(root, data)
+
+ return self.serialize_xml(root, data)
+
+ def serialize_json (self, root, data) :
+ return simplejson.dumps({root : data})
+
+ def serialize_xml (self, root, data) :
+
+ xml = ''
+ ima = type(data)
+
+ if ima == types.DictType :
+
+ xml += "<%s" % self.prepare_xml_content(root)
+
+ attrs = []
+ children = []
+ cdata = None
+
+ for (foo, bar) in data.items() :
+
+ if foo == '_content' :
+ cdata = bar
+ break
+ elif type(bar) != types.DictType and type(bar) != types.ListType :
+ attrs.append((foo, bar))
+ else :
+ children.append((foo, bar))
+
+ if cdata :
+ xml += ">%s</%s>" % (self.prepare_xml_content(cdata), self.prepare_xml_content(root))
+ return xml
+
+ for pair in attrs :
+ xml += self.serialize_xml(pair[0], pair[1])
+
+ if len(children) == 0 :
+ xml += " />"
+ return xml
+
+ xml += ">"
+
+ for pair in children :
+ xml += self.serialize_xml(pair[0], pair[1])
+
+ xml += "</%s>" % self.prepare_xml_content(root)
+
+ elif ima == types.ListType :
+
+ for item in data :
+ xml += self.serialize_xml(root, item)
+
+ else :
+
+ xml += " %s=\"%s\"" % (self.prepare_xml_content(root), self.prepare_xml_content(data))
+
+ return xml
+
+ # will this some day fuck up an element name? oh, probably...
+
+ def prepare_xml_content (self, data) :
+ return html.escape(unicode(data))
5 README
@@ -0,0 +1,5 @@
+This is a super-duper thin Google AppEngine API wrapper around the topia term
+extractor.
+
+The short version is, you call it by passing a 'text' parameter (GET or POST)
+and an optional 'format' parameter ('xml' or 'json').
@@ -0,0 +1,8 @@
+application: gae-termextractor
+version: 1
+runtime: python
+api_version: 1
+
+handlers:
+- url: .*
+ script: main.py
@@ -0,0 +1,11 @@
+indexes:
+
+# AUTOGENERATED
+
+# This index.yaml is automatically updated whenever the dev_appserver
+# detects that a new type of query is run. If you want to manage the
+# index.yaml file manually, remove the above marker line (the line
+# saying "# AUTOGENERATED"). If you want to manage some indexes
+# manually, move them above the marker line. The index.yaml file is
+# automatically uploaded to the admin console when you next deploy
+# your application using appcfg.py.
16 main.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import wsgiref.handlers
+from google.appengine.ext import webapp
+
+import termextractor
+
+if __name__ == '__main__' :
+
+ handlers = [
+ ('/', termextractor.Main),
+ ('/terms', termextractor.Terms),
+ ]
+
+ application = webapp.WSGIApplication(handlers, debug=True)
+ wsgiref.handlers.CGIHandler().run(application)
@@ -0,0 +1,58 @@
+# $Id$
+
+# http://pypi.python.org/pypi/topia.termextract/
+
+from APIApp import APIApp
+from topia.termextract import extract
+from topia.termextract import tag
+
+class termextractor (APIApp) :
+
+ def __init__ (self) :
+
+ APIApp.__init__(self, 'xml')
+
+ tagger = tag.Tagger()
+ tagger.initialize()
+
+ self.ex = extract.TermExtractor(tagger)
+ self.ex.filter = extract.permissiveFilter
+
+class Main (termextractor) :
+
+ def get (self) :
+
+ host = self.request.host_url
+ url = "%s/terms?text=this is the network of our disconnect" % host
+
+ self.response.out.write("Usage: <em>GET or POST</em> ")
+ self.response.out.write("<a href=\"%s\">%s</a>" % (url, url))
+ self.response.out.write(" (<a href=\"%s&format=json\">&format=json</a>)" % url)
+
+ return
+
+class Terms (termextractor) :
+
+ def get (self) :
+ return self.extract()
+
+ def post (self) :
+ return self.extract()
+
+ def extract(self, text='') :
+
+ text = self.request.get('text')
+
+ if text == '' :
+ self.api_error(1, 'Required input missing')
+ return
+
+ res = self.ex(text)
+ terms = { 'query' : text, 'term' : [ ] }
+
+ for r in res :
+
+ terms['term'].append({'value' : r[0], 'occurrence' : r[1], 'strength' : r[1]})
+
+ self.api_ok({'terms' : terms})
+ return
@@ -0,0 +1,3 @@
+import pkg_resources
+pkg_resources.declare_namespace(__name__)
+
@@ -0,0 +1 @@
+# Make a package.
Oops, something went wrong.

0 comments on commit 7f5b371

Please sign in to comment.