Permalink
Fetching contributors…
Cannot retrieve contributors at this time
281 lines (251 sloc) 10 KB
##############################################################################
##
## minjson.py implements JSON reading and writing in python.
## Copyright (c) 2005 Jim Washington and Contributors.
##
## This library is free software; you can redistribute it and/or
## modify it under the terms of the GNU Lesser General Public
## License as published by the Free Software Foundation; either
## version 2.1 of the License, or (at your option) any later version.
##
## This library is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## Lesser General Public License for more details.=
##
## You should have received a copy of the GNU Lesser General Public
## License along with this library; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##
##############################################################################
# minjson.py
# use python's parser to read minimal javascript objects.
# str's objects and fixes the text to write javascript.
# Thanks to Patrick Logan for starting the json-py project and making so many
# good test cases.
# Jim Washington 7 Aug 2005.
from re import compile, sub, search, DOTALL
# set to true if transmission size is much more important than speed
# only affects writing, and makes a minimal difference in output size.
alwaysStripWhiteSpace = False
# add to this string if you wish to exclude additional math operators
# from reading.
badOperators = '*'
#################################
# read JSON object #
#################################
slashstarcomment = compile(r'/\*.*?\*/',DOTALL)
doubleslashcomment = compile(r'//.*\n')
def _Read(aString):
"""Use eval in a 'safe' way to turn javascript expression into
a python expression. Allow only True, False, and None in global
__builtins__, and since those map as true, false, null in
javascript, pass those as locals
"""
try:
result = eval(aString,
{"__builtins__":{'True':True,'False':False,'None':None}},
{'null':None,'true':True,'false':False})
except NameError:
raise ReadException, \
"Strings must be quoted. Could not read '%s'." % aString
except SyntaxError:
raise ReadException, \
"Syntax error. Could not read '%s'." % aString
return result
# badOperators is defined at the top of the module
# generate the regexes for math detection
regexes = {}
for operator in badOperators:
if operator in '+*':
# '+' and '*' need to be escaped with \ in re
regexes[operator,'numeric operation'] \
= compile(r"\d*\s*\%s|\%s\s*\d*" % (operator, operator))
else:
regexes[operator,'numeric operation'] \
= compile(r"\d*\s*%s|%s\s*\d*" % (operator, operator))
def _getStringState(aSequence):
"""return the list of required quote closures if the end of aString needs them
to close quotes.
"""
state = []
for k in aSequence:
if k in ['"',"'"]:
if state and k == state[-1]:
state.pop()
else:
state.append(k)
return state
def _sanityCheckMath(aString):
"""just need to check that, if there is a math operator in the
client's JSON, it is inside a quoted string. This is mainly to
keep client from successfully sending 'D0S'*9**9**9**9...
Return True if OK, False otherwise
"""
for operator in badOperators:
#first check, is it a possible math operation?
if regexes[(operator,'numeric operation')].search(aString) is not None:
# OK. possible math operation. get the operator's locations
getlocs = regexes[(operator,'numeric operation')].finditer(aString)
locs = [item.span() for item in getlocs]
halfStrLen = len(aString) / 2
#fortunately, this should be rare
for loc in locs:
exprStart = loc[0]
exprEnd = loc[1]
# We only need to know the char is within open quote
# status.
if exprStart <= halfStrLen:
teststr = aString[:exprStart]
else:
teststr = list(aString[exprEnd+1:])
teststr.reverse()
if not _getStringState(teststr):
return False
return True
def safeRead(aString):
"""turn the js into happier python and check for bad operations
before sending it to the interpreter
"""
# get rid of trailing null. Konqueror appends this, and the python
# interpreter balks when it is there.
CHR0 = chr(0)
while aString.endswith(CHR0):
aString = aString[:-1]
# strip leading and trailing whitespace
aString = aString.strip()
# zap /* ... */ comments
aString = slashstarcomment.sub('',aString)
# zap // comments
aString = doubleslashcomment.sub('',aString)
# here, we only check for the * operator as a DOS problem by default;
# additional operators may be excluded by editing badOperators
# at the top of the module
if _sanityCheckMath(aString):
return _Read(aString)
else:
raise ReadException, 'Unacceptable JSON expression: %s' % aString
read = safeRead
#################################
# write object as JSON #
#################################
#alwaysStripWhiteSpace is defined at the top of the module
tfnTuple = (('True','true'),('False','false'),('None','null'),)
def _replaceTrueFalseNone(aString):
"""replace True, False, and None with javascript counterparts"""
for k in tfnTuple:
if k[0] in aString:
aString = aString.replace(k[0],k[1])
return aString
def _handleCode(subStr,stripWhiteSpace):
"""replace True, False, and None with javascript counterparts if
appropriate, remove unicode u's, fix long L's, make tuples
lists, and strip white space if requested
"""
if 'e' in subStr:
#True, False, and None have 'e' in them. :)
subStr = (_replaceTrueFalseNone(subStr))
if stripWhiteSpace:
# re.sub might do a better job, but takes longer.
# Spaces are the majority of the whitespace, anyway...
subStr = subStr.replace(' ','')
if subStr[-1] in "uU":
#remove unicode u's
subStr = subStr[:-1]
if "L" in subStr:
#remove Ls from long ints
subStr = subStr.replace("L",'')
#do tuples as lists
if "(" in subStr:
subStr = subStr.replace("(",'[')
if ")" in subStr:
subStr = subStr.replace(")",']')
return subStr
# re for a double-quoted string that has a single-quote in it
# but no double-quotes and python punctuation after:
redoublequotedstring = compile(r'"[^"]*\'[^"]*"[,\]\}:\)]')
escapedSingleQuote = r"\'"
escapedDoubleQuote = r'\"'
def doQuotesSwapping(aString):
"""rewrite doublequoted strings with single quotes as singlequoted strings with
escaped single quotes"""
s = []
foundlocs = redoublequotedstring.finditer(aString)
prevend = 0
for loc in foundlocs:
start,end = loc.span()
s.append(aString[prevend:start])
tempstr = aString[start:end]
endchar = tempstr[-1]
ts1 = tempstr[1:-2]
ts1 = ts1.replace("'",escapedSingleQuote)
ts1 = "'%s'%s" % (ts1,endchar)
s.append(ts1)
prevend = end
s.append(aString[prevend:])
return ''.join(s)
def _pyexpr2jsexpr(aString, stripWhiteSpace):
"""Take advantage of python's formatting of string representations of
objects. Python always uses "'" to delimit strings. Except it doesn't when
there is ' in the string. Fix that, then, if we split
on that delimiter, we have a list that alternates non-string text with
string text. Since string text is already properly escaped, we
only need to replace True, False, and None in non-string text and
remove any unicode 'u's preceding string values.
if stripWhiteSpace is True, remove spaces, etc from the non-string
text.
"""
inSingleQuote = False
inDoubleQuote = False
#python will quote with " when there is a ' in the string,
#so fix that first
if redoublequotedstring.search(aString):
aString = doQuotesSwapping(aString)
marker = None
if escapedSingleQuote in aString:
#replace escaped single quotes with a marker
marker = markerBase = '|'
markerCount = 1
while marker in aString:
#if the marker is already there, make it different
markerCount += 1
marker = markerBase * markerCount
aString = aString.replace(escapedSingleQuote,marker)
#escape double-quotes
aString = aString.replace('"',escapedDoubleQuote)
#split the string on the real single-quotes
splitStr = aString.split("'")
outList = []
alt = True
for subStr in splitStr:
#if alt is True, non-string; do replacements
if alt:
subStr = _handleCode(subStr,stripWhiteSpace)
outList.append(subStr)
alt = not alt
result = '"'.join(outList)
if marker:
#put the escaped single-quotes back as "'"
result = result.replace(marker,"'")
return result
def write(obj, encoding="utf-8",stripWhiteSpace=alwaysStripWhiteSpace):
"""Represent the object as a string. Do any necessary fix-ups
with pyexpr2jsexpr"""
try:
#not really sure encode does anything here
aString = str(obj).encode(encoding)
except UnicodeEncodeError:
aString = obj.encode(encoding)
if isinstance(obj,basestring):
if '"' in aString:
aString = aString.replace(escapedDoubleQuote,'"')
result = '"%s"' % aString.replace('"',escapedDoubleQuote)
else:
result = '"%s"' % aString
else:
result = _pyexpr2jsexpr(aString,stripWhiteSpace).encode(encoding)
return result
class ReadException(Exception):
pass
class WriteException(Exception):
pass