Skip to content

Commit

Permalink
beginning of file upload, fit_to_area, various other tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
jessykate committed Nov 13, 2010
1 parent 0f1c9e9 commit 5b72221
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 140 deletions.
182 changes: 132 additions & 50 deletions wordapi/api/handlers.py
@@ -1,7 +1,7 @@
from piston.handler import BaseHandler
from piston.utils import rc
import nltk, urllib, urllib2
import urllib, random
import random, math
from lib import html_unescape
try:
import json
Expand Down Expand Up @@ -47,31 +47,27 @@ def read(self, request):
thisarg = urllib.unquote(request.GET.get(arg))
self.kwargs[arg] = thisarg

#print 'kwargs:'
#print self.kwargs
# each handler defines an 'execute' function that calls the
# main work function with arguments stored in the class
# attributes. might simply call some other function, or can
# contain the work itself. the former would look something
# like:
#
# def execute(self):
# work_func(self.fargs[0], ... self.fargs[n], *self.kwargs)
#
return self.execute()

def create(self, request):
''' mirrors the functionality implemented by read(), for POST
requests.'''
print 'in "create"'
print request.FILES
self.fargs = []
for arg in self.args_required:
if not request.POST.get(arg, None):
resp = rc.BAD_REQUEST
resp.write(': Missing Required Parameter "%s"' % arg)
return resp
else:
thisarg = urllib.unquote_plus(request.POST.get(arg))
# assumes only that a single file will be uploaded and it's
# field name in the form will be 'file'. XXX TODO make this
# more robust.
if arg == 'file':
thisarg = request.FILES['file']
else:
thisarg = urllib.unquote_plus(request.POST.get(arg))
self.fargs.append(thisarg)

self.kwargs = {}
Expand All @@ -80,15 +76,6 @@ def create(self, request):
thisarg = urllib.unquote(request.POST.get(arg))
self.kwargs[arg] = thisarg

#print 'kwargs'
#print self.kwargs
#print ''
#print 'fargs'
#print self.fargs

# each handler defines an 'execute' function that calls the
# main work function with arguments stored in the class
# attributes
return self.execute()


Expand Down Expand Up @@ -208,30 +195,64 @@ def color_scheme(color_a=None, color_b=None, total_steps=5):

return palette

def quadratic_equation(a,b,c):
a = float(a)
b = float(b)
c = float(c)
x1 = (-b + math.sqrt(pow(b,2.0) - 4.0*a*c))/2.0*a
x2 = (-b - math.sqrt(pow(b,2.0) - 4.0*a*c))/2.0*a
return x1,x2

def tag_cloud(dist, id_ = "", class_ = "", width=None, height=None,
max_size=70, min_size=10, max_words = None,
start_color=None, end_color=None, color_steps=None,
sort_order="random"):
''' returns a dict with style and body elements. style contains
defalt styling for the tag cloud, while body contains the html
markup. '''
def fit_to_area(width, height, dist, font):

# sort() returns a list of tuples in order of decreasing frequency
dist = sort(dist)
# add a 10% buffer to the area we fill:
w = 0.9*width
h = 0.9*height

# there is the mild problem that a given font's true size in pixels often
# actually takes up more or less pixels than its 'font size'. so we need to
# figure out an effective width and height by converting to specific font
# units.


# calculate the number of times a frequency occurs
freqs = {}
for word, freq in dist:
freqs[freq] = freqs.get(freq, 0) + 1

# equation of the line: y = mx + b; m=1
# to calculate area width * height in px^2:
# width * height = sum over i: n_i*(f_i + b)^2
# where we want to solve for b, the intercept of the line.
# each i'th term implifies to:
# nf^2 + 2nfb + nb^2
# we compute the sum and collect like terms, ending with
# a polynomial of the form:
# a1 + a2*b + a3*b^2 = width*height
# (a1 - width*height) + a2*b + a3b^2 = 0
# and then use the quadratic equation to solve for b.
a1 = -(w*h)
a2 = a3 = 0
for f, n in freqs.iteritems():
a1 += n*pow(f,2.0)
a2 += 2*n*f
a3 += n

# solve for b using quadratic formula
b1,b2 = quadratic_equation(a1,a2,a3)

if b1>b2:
b = b1
else: b = b2

font_size_fn = lambda freq: freq+b
return font_size_fn

def min_max_extrapolate(dist, max_size, min_size):
# explicitly set the indices where the min and max values can be found in
# the dist list.
MAX = 0
MIN = -1

# truncate the list of items if max_words was specified
if max_words:
max_words = int(max_words)
dist = dist[:max_words]

# get the equation of the line between min_size and max_size. do this AFTER
# truncating to max_words and BEFORE shuffling the order around.

# y = mx+b --> max_size = m*max_freq + b, min_size = m*min_freq + b.
# max_size - min_size = m (max_freq - min_freq)
# --> m = (max_size - min_size)/(max_freq - min_freq)
Expand All @@ -240,19 +261,53 @@ def tag_cloud(dist, id_ = "", class_ = "", width=None, height=None,
max_size = float(max_size)
min_size = float(min_size)
# if they're all the same frequency, everything is the same size. use the
# mid-point between max_size and min_size (it's left as a function so we
# can use it easily in place of a dynamic value below).
# mid-point between max_size and min_size (it's still built as a function
# so we can use it easily in place of a dynamic value).
if max_freq == min_freq:
font_size = lambda freq: (max_size + min_size)/2
font_size_fn = lambda freq: (max_size + min_size)/2.0
else:
m = (max_size - min_size)/(max_freq - min_freq)
b = max_size - m*max_freq
font_size = lambda freq: m*freq+b
font_size_fn = lambda freq: m*freq+b

return font_size_fn

def tag_cloud(dist, id_ = "", class_ = "", width=None, height=None,
max_size=None, min_size=None, max_words = None,
start_color=None, end_color=None, color_steps=None,
sort_order="random"):
''' returns a dict with style and body elements. style contains
defalt styling for the tag cloud, while body contains the html
markup. '''

# sort() returns a list of tuples in order of decreasing frequency
dist = sort(dist)

# truncate the list of items if max_words was specified
if max_words:
max_words = int(max_words)
dist = dist[:max_words]

# get the equation of the line
max_size = 70;
min_size = 10;
if max_size and min_size:
font_size_fn = min_max_extrapolate(dist, max_size, min_size)
else:
if not (width and height):
width = 600
height = 800
font = 'times new roman'
font_size_fn = fit_to_area(width, height, dist, font)
print font_size_fn

# get the equation of the line between min_size and max_size. do this AFTER
# truncating to max_words and BEFORE shuffling the order around.

# determine the sort order. if the sort order is frequency, there's nothing
# to do since the distribution object is already sorted by frequency.
if sort_order not in ['random', 'frequency', 'alphabetical']:
print 'invalid sort orderi; using default = random'
print 'invalid sort order; using default = random'
sort_order = 'random'

if sort_order == 'random':
Expand Down Expand Up @@ -304,11 +359,11 @@ def tag_cloud(dist, id_ = "", class_ = "", width=None, height=None,
color = colors[color_index]
style += ('''
.%s {padding-left: 15px; padding-right: 15px; font-size: %s; color: %s }'''
% (freq_as_word, font_size(f), color))
% (freq_as_word, font_size_fn(f), color))
style += '''
</style>'''
#print 'style portion'
#print style
print 'style portion'
print style
resp = {'body': body, 'style': style}
return resp

Expand Down Expand Up @@ -363,6 +418,13 @@ class TagCloudBaseHandler(GeneralHandler):
def execute(self):
pass

def escape_text(self, raw_text, encoding=None):
if not encoding:
encoding = 'utf8'
#ustring = unicode(raw_text, encoding, 'ignore')
ustring = html_unescape(raw_text)
return ustring

def get_text(self):
# return the actual contents to be used in the tag cloud. implemented
# by child class, depending on the call type-- eg. in-line or url
Expand Down Expand Up @@ -415,20 +477,40 @@ class TagCloudBodyHandler(TagCloudBaseHandler):
args_required = ['body']

def get_text(self):
return self.fargs[0]
return self.escape_text(self.fargs[0])

def execute(self):
tokens = self.get_tokens()
freq = self.get_freqdist(tokens)
return self.get_cloud(freq)


class TagCloudFileHandler(TagCloudBaseHandler):
# 'file' becomes fargs[0] in the parent class's execute() method
print 'in file handler'
args_required = ['file']

def get_text(self):
fp = self.fargs[0]
print fp
# assumes file is small enough to fit into memory..
tmp_file = ''
for chunk in fp.chunks():
tmp_file += chunk
return self.escape_text(tmp_file)

def execute(self):
tokens = self.get_tokens()
freq = self.get_freqdist(tokens)
return self.get_cloud(freq)

class TagCloudUrlHandler(TagCloudBaseHandler):
# 'url' becomes fargs[0] in the parent class's execute() method
args_required = ['url']

def get_text(self):
# the text to be analyzed is passed in via a url, so we need to retrieve it
# the text to be analyzed is passed in via a url, so we need to
# retrieve it
url = self.fargs[0]
if not url.startswith('http://'):
url = 'http://'+url
Expand Down
4 changes: 3 additions & 1 deletion wordapi/api/urls.py
Expand Up @@ -2,7 +2,7 @@
UrlTokenHandler, RequestTokenHandler,
UrlFrequencyHandler, RequestFrequencyHandler,
TagCloudBodyHandler, TagCloudUrlHandler,
TagCloudFreqHandler,
TagCloudFreqHandler,TagCloudFileHandler
)

from django.conf.urls.defaults import patterns, url
Expand All @@ -23,5 +23,7 @@
url(r'^tagcloud/url\.(?P<emitter_format>.+)$', Resource(TagCloudUrlHandler)),
# build a tag cloud from the json-encoded dictionary of frequencies passed in
url(r'^tagcloud/freq\.(?P<emitter_format>.+)$', Resource(TagCloudFreqHandler)),
# build a tag cloud from the file uploaded
url(r'^tagcloud/file\.(?P<emitter_format>.+)$', Resource(TagCloudFileHandler)),

)
15 changes: 9 additions & 6 deletions wordfrontend/frontend/forms.py
Expand Up @@ -5,6 +5,7 @@ class TagCloudForm(forms.Form):
help_text = {
'body' : 'Paste your text inline (eg. into this box)',
'url' : 'Retrieve text from the specified url.',
'file' : 'Upload a file',
'freqs' : "Enter in a dictionary of word:frequency counts",
'strip' : 'Strip any html markup. Default is True.',
'max_size' : 'Maximum word size (px). Default 70px.',
Expand All @@ -23,7 +24,9 @@ class TagCloudForm(forms.Form):

body = forms.CharField(widget=forms.Textarea(attrs={'rows':'20', 'cols':60 }), help_text = help_text['body'], required=False)
url = forms.CharField(help_text = help_text['url'], required=False)
freqs = forms.CharField(help_text = help_text['freqs'], required=False)
file = forms.FileField(help_text = help_text['file'], required=False)
# this is an option in the API but not likely to be useful in the frontend
#freqs = forms.CharField(help_text = help_text['freqs'], required=False)
max_words = forms.IntegerField(help_text = help_text['max_words'], required=False)
start_color = forms.CharField(required=False)
end_color = forms.CharField(required=False)
Expand All @@ -44,11 +47,11 @@ def clean(self):
cleaned_data = self.cleaned_data
url = cleaned_data.get('url')
body = cleaned_data.get('body')
freqs = cleaned_data.get('freqs')
if not url and not body and not freqs:
raise forms.ValidationError('You must specify one of "url", "body" or "freqs" fields')
if (url and body) or (url and freqs) or (body and freqs):
raise forms.ValidationError('Specify only one of "url" or "body" or "freqs" fields')
file = cleaned_data.get('file')
if not url and not body and not file:
raise forms.ValidationError('You must specify one of "url", "body" or "file" fields')
if (url and body) or (url and file) or (body and file):
raise forms.ValidationError('Specify only one of "url" or "body" or "file" fields')

start_color = cleaned_data.get('start_color')
end_color = cleaned_data.get('end_color')
Expand Down

0 comments on commit 5b72221

Please sign in to comment.