Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Tree: d7a7724f21
Fetching contributors…

Cannot retrieve contributors at this time

575 lines (510 sloc) 22.648 kB
## ParseMaster, version 1.0 (pre-release) (2005/05/12) x6
## Copyright 2005, Dean Edwards
## Web: http://dean.edwards.name/
##
## This software is licensed under the CC-GNU LGPL
## Web: http://creativecommons.org/licenses/LGPL/2.1/
##
## Ported to Python by Florian Schulze
import os, re, sys
# a multi-pattern parser
class Pattern:
def __init__(self, expression, replacement, length):
self.expression = expression
self.replacement = replacement
self.length = length
def __str__(self):
return "(" + self.expression + ")"
class Patterns(list):
def __str__(self):
return '|'.join([str(e) for e in self])
class ParseMaster:
# constants
EXPRESSION = 0
REPLACEMENT = 1
LENGTH = 2
GROUPS = re.compile(r"""\(""", re.M)#g
SUB_REPLACE = re.compile(r"""\$\d""", re.M)
INDEXED = re.compile(r"""^\$\d+$""", re.M)
TRIM = re.compile(r"""(['"])\1\+(.*)\+\1\1$""", re.M)
ESCAPE = re.compile(r"""\\.""", re.M)#g
#QUOTE = re.compile(r"""'""", re.M)
DELETED = re.compile("""\x01[^\x01]*\x01""", re.M)#g
def __init__(self):
# private
self._patterns = Patterns() # patterns stored by index
self._escaped = []
self.ignoreCase = False
self.escapeChar = None
def DELETE(self, match, offset):
return "\x01" + match.group(offset) + "\x01"
def _repl(self, a, o, r, i):
while (i):
m = a.group(o+i-1)
if m is None:
s = ""
else:
s = m
r = r.replace("$" + str(i), s)
i = i - 1
r = ParseMaster.TRIM.sub("$1", r)
return r
# public
def add(self, expression="^$", replacement=None):
if replacement is None:
replacement = self.DELETE
# count the number of sub-expressions
# - add one because each pattern is itself a sub-expression
length = len(ParseMaster.GROUPS.findall(self._internalEscape(str(expression)))) + 1
# does the pattern deal with sub-expressions?
if (isinstance(replacement, str) and ParseMaster.SUB_REPLACE.match(replacement)):
# a simple lookup? (e.g. "$2")
if (ParseMaster.INDEXED.match(replacement)):
# store the index (used for fast retrieval of matched strings)
replacement = int(replacement[1:]) - 1
else: # a complicated lookup (e.g. "Hello $2 $1")
# build a function to do the lookup
i = length
r = replacement
replacement = lambda a,o: self._repl(a,o,r,i)
# pass the modified arguments
self._patterns.append(Pattern(expression, replacement, length))
# execute the global replacement
def execute(self, string):
if self.ignoreCase:
r = re.compile(str(self._patterns), re.I | re.M)
else:
r = re.compile(str(self._patterns), re.M)
string = self._escape(string, self.escapeChar)
string = r.sub(self._replacement, string)
string = self._unescape(string, self.escapeChar)
string = ParseMaster.DELETED.sub("", string)
return string
# clear the patterns collections so that this object may be re-used
def reset(self):
self._patterns = Patterns()
# this is the global replace function (it's quite complicated)
def _replacement(self, match):
i = 1
# loop through the patterns
for pattern in self._patterns:
if match.group(i) is not None:
replacement = pattern.replacement
if callable(replacement):
return replacement(match, i)
elif isinstance(replacement, (int, long)):
return match.group(replacement+i)
else:
return replacement
else:
i = i+pattern.length
# encode escaped characters
def _escape(self, string, escapeChar=None):
def repl(match):
char = match.group(1)
self._escaped.append(char)
return escapeChar
if escapeChar is None:
return string
r = re.compile("\\"+escapeChar+"(.)", re.M)
result = r.sub(repl, string)
return result
# decode escaped characters
def _unescape(self, string, escapeChar=None):
def repl(match):
try:
#result = eval("'"+escapeChar + self._escaped.pop(0)+"'")
result = escapeChar + self._escaped.pop(0)
return result
except IndexError:
return escapeChar
if escapeChar is None:
return string
r = re.compile("\\"+escapeChar, re.M)
result = r.sub(repl, string)
return result
def _internalEscape(self, string):
return ParseMaster.ESCAPE.sub("", string)
## packer, version 2.0 (2005/04/20)
## Copyright 2004-2005, Dean Edwards
## License: http://creativecommons.org/licenses/LGPL/2.1/
## Ported to Python by Florian Schulze
## http://dean.edwards.name/packer/
class JavaScriptPacker:
def __init__(self):
self._basicCompressionParseMaster = self.getCompressionParseMaster(False)
self._specialCompressionParseMaster = self.getCompressionParseMaster(True)
def basicCompression(self, script):
return self._basicCompressionParseMaster.execute(script)
def specialCompression(self, script):
return self._specialCompressionParseMaster.execute(script)
def getCompressionParseMaster(self, specialChars):
IGNORE = "$1"
parser = ParseMaster()
parser.escapeChar = '\\'
# protect strings
parser.add(r"""'[^']*?'""", IGNORE)
parser.add(r'"[^"]*?"', IGNORE)
# remove comments
parser.add(r"""//[^\n\r]*?[\n\r]""")
parser.add(r"""/\*[^*]*?\*+([^/][^*]*?\*+)*?/""")
# protect regular expressions
parser.add(r"""\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)""", "$2")
parser.add(r"""[^\w\$\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?""", IGNORE)
# remove: ;;; doSomething();
if specialChars:
parser.add(""";;;[^\n\r]+[\n\r]""")
# remove redundant semi-colons
# !!! mime change - CodeMirror uses for (;;) which breaks this
#parser.add(r""";+\s*([};])""", "$2")
# remove white-space
parser.add(r"""(\b|\$)\s+(\b|\$)""", "$2 $3")
parser.add(r"""([+\-])\s+([+\-])""", "$2 $3")
parser.add(r"""\s+""", "")
return parser
def getEncoder(self, ascii):
mapping = {}
base = ord('0')
mapping.update(dict([(i, chr(i+base)) for i in range(10)]))
base = ord('a')
mapping.update(dict([(i+10, chr(i+base)) for i in range(26)]))
base = ord('A')
mapping.update(dict([(i+36, chr(i+base)) for i in range(26)]))
base = 161
mapping.update(dict([(i+62, chr(i+base)) for i in range(95)]))
# zero encoding
# characters: 0123456789
def encode10(charCode):
return str(charCode)
# inherent base36 support
# characters: 0123456789abcdefghijklmnopqrstuvwxyz
def encode36(charCode):
l = []
remainder = charCode
while 1:
result, remainder = divmod(remainder, 36)
l.append(mapping[remainder])
if not result:
break
remainder = result
l.reverse()
return "".join(l)
# hitch a ride on base36 and add the upper case alpha characters
# characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
def encode62(charCode):
l = []
remainder = charCode
while 1:
result, remainder = divmod(remainder, 62)
l.append(mapping[remainder])
if not result:
break
remainder = result
l.reverse()
return "".join(l)
# use high-ascii values
def encode95(charCode):
l = []
remainder = charCode
while 1:
result, remainder = divmod(remainder, 95)
l.append(mapping[remainder+62])
if not result:
break
remainder = result
l.reverse()
return "".join(l)
if ascii <= 10:
return encode10
elif ascii <= 36:
return encode36
elif ascii <= 62:
return encode62
return encode95
def escape(self, script):
script = script.replace("\\","\\\\")
script = script.replace("'","\\'")
script = script.replace('\n','\\n')
#return re.sub(r"""([\\'](?!\n))""", "\\$1", script)
return script
def escape95(self, script):
result = []
for x in script:
if x>'\xa1':
x = "\\x%0x" % ord(x)
result.append(x)
return "".join(result)
def encodeKeywords(self, script, encoding, fastDecode):
# escape high-ascii values already in the script (i.e. in strings)
if (encoding > 62):
script = self.escape95(script)
# create the parser
parser = ParseMaster()
encode = self.getEncoder(encoding)
# for high-ascii, don't encode single character low-ascii
if encoding > 62:
regexp = r"""\w\w+"""
else:
regexp = r"""\w+"""
# build the word list
keywords = self.analyze(script, regexp, encode)
encoded = keywords['encoded']
# encode
def repl(match, offset):
return encoded.get(match.group(offset), "")
parser.add(regexp, repl)
# if encoded, wrap the script in a decoding function
script = parser.execute(script)
script = self.bootStrap(script, keywords, encoding, fastDecode)
return script
def analyze(self, script, regexp, encode):
# analyse
# retreive all words in the script
regexp = re.compile(regexp, re.M)
all = regexp.findall(script)
sorted = [] # list of words sorted by frequency
encoded = {} # dictionary of word->encoding
protected = {} # instances of "protected" words
if all:
unsorted = []
_protected = {}
values = {}
count = {}
all.reverse()
for word in all:
word = "$"+word
if word not in count:
count[word] = 0
j = len(unsorted)
unsorted.append(word)
# make a dictionary of all of the protected words in this script
# these are words that might be mistaken for encoding
values[j] = encode(j)
_protected["$"+values[j]] = j
count[word] = count[word] + 1
# prepare to sort the word list, first we must protect
# words that are also used as codes. we assign them a code
# equivalent to the word itself.
# e.g. if "do" falls within our encoding range
# then we store keywords["do"] = "do";
# this avoids problems when decoding
sorted = [None] * len(unsorted)
for word in unsorted:
if word in _protected and isinstance(_protected[word], int):
sorted[_protected[word]] = word[1:]
protected[_protected[word]] = True
count[word] = 0
unsorted.sort(lambda a,b: count[b]-count[a])
j = 0
for i in range(len(sorted)):
if sorted[i] is None:
sorted[i] = unsorted[j][1:]
j = j + 1
encoded[sorted[i]] = values[i]
return {'sorted': sorted, 'encoded': encoded, 'protected': protected}
def encodePrivate(self, charCode):
return "_"+str(charCode)
def encodeSpecialChars(self, script):
parser = ParseMaster()
# replace: $name -> n, $$name -> $$na
def repl(match, offset):
#print offset, match.groups()
length = len(match.group(offset + 2))
start = length - max(length - len(match.group(offset + 3)), 0)
return match.group(offset + 1)[start:start+length] + match.group(offset + 4)
parser.add(r"""((\$+)([a-zA-Z\$_]+))(\d*)""", repl)
# replace: _name -> _0, double-underscore (__name) is ignored
regexp = r"""\b_[A-Za-z\d]\w*"""
# build the word list
keywords = self.analyze(script, regexp, self.encodePrivate)
# quick ref
encoded = keywords['encoded']
def repl(match, offset):
return encoded.get(match.group(offset), "")
parser.add(regexp, repl)
return parser.execute(script)
# build the boot function used for loading and decoding
def bootStrap(self, packed, keywords, encoding, fastDecode):
ENCODE = re.compile(r"""\$encode\(\$count\)""")
# $packed: the packed script
#packed = self.escape(packed)
#packed = [packed[x*10000:(x+1)*10000] for x in range((len(packed)/10000)+1)]
#packed = "'" + "'+\n'".join(packed) + "'\n"
packed = "'" + self.escape(packed) + "'"
# $count: number of words contained in the script
count = len(keywords['sorted'])
# $ascii: base for encoding
ascii = min(count, encoding) or 1
# $keywords: list of words contained in the script
for i in keywords['protected']:
keywords['sorted'][i] = ""
# convert from a string to an array
keywords = "'" + "|".join(keywords['sorted']) + "'.split('|')"
encoding_functions = {
10: """ function($charCode) {
return $charCode;
}""",
36: """ function($charCode) {
return $charCode.toString(36);
}""",
62: """ function($charCode) {
return ($charCode < _encoding ? "" : arguments.callee(parseInt($charCode / _encoding))) +
(($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
}""",
95: """ function($charCode) {
return ($charCode < _encoding ? "" : arguments.callee($charCode / _encoding)) +
String.fromCharCode($charCode % _encoding + 161);
}"""
}
# $encode: encoding function (used for decoding the script)
encode = encoding_functions[encoding]
encode = encode.replace('_encoding',"$ascii")
encode = encode.replace('arguments.callee', "$encode")
if ascii > 10:
inline = "$count.toString($ascii)"
else:
inline = "$count"
# $decode: code snippet to speed up decoding
if fastDecode:
# create the decoder
decode = r"""// does the browser support String.replace where the
// replacement value is a function?
if (!''.replace(/^/, String)) {
// decode all the values we need
while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
// global replacement function
$keywords = [function($encoded){return $decode[$encoded]}];
// generic match
$encode = function(){return'\\w+'};
// reset the loop counter - we are now doing a global replace
$count = 1;
}"""
if encoding > 62:
decode = decode.replace('\\\\w', "[\\xa1-\\xff]")
else:
# perform the encoding inline for lower ascii values
if ascii < 36:
decode = ENCODE.sub(inline, decode)
# special case: when $count==0 there ar no keywords. i want to keep
# the basic shape of the unpacking funcion so i'll frig the code...
if not count:
raise NotImplemented
#) $decode = $decode.replace(/(\$count)\s*=\s*1/, "$1=0");
# boot function
unpack = r"""function($packed, $ascii, $count, $keywords, $encode, $decode) {
while ($count--)
if ($keywords[$count])
$packed = $packed.replace(new RegExp("\\b" + $encode($count) + "\\b", "g"), $keywords[$count]);
return $packed;
}"""
if fastDecode:
# insert the decoder
#unpack = re.sub(r"""\{""", "{" + decode + ";", unpack)
unpack = unpack.replace('{', "{" + decode + ";", 1)
if encoding > 62: # high-ascii
# get rid of the word-boundaries for regexp matches
unpack = re.sub(r"""'\\\\b'\s*\+|\+\s*'\\\\b'""", "", unpack)
if ascii > 36 or encoding > 62 or fastDecode:
# insert the encode function
#unpack = re.sub(r"""\{""", "{$encode=" + encode + ";", unpack)
unpack = unpack.replace('{', "{$encode=" + encode + ";", 1)
else:
# perform the encoding inline
unpack = ENCODE.sub(inline, unpack)
# pack the boot function too
unpack = self.pack(unpack, 0, False, True)
# arguments
params = [packed, str(ascii), str(count), keywords]
if fastDecode:
# insert placeholders for the decoder
params.extend(['0', "{}"])
# the whole thing
return "eval(" + unpack + "(" + ",".join(params) + "))\n";
def pack(self, script, encoding=0, fastDecode=False, specialChars=False, compaction=True):
script = script+"\n"
self._encoding = encoding
self._fastDecode = fastDecode
if specialChars:
script = self.specialCompression(script)
script = self.encodeSpecialChars(script)
else:
if compaction:
script = self.basicCompression(script)
if encoding:
script = self.encodeKeywords(script, encoding, fastDecode)
return script
def run():
p = JavaScriptPacker()
#script = open('test_plone.js').read()
script = sys.stdin.read()
result = p.pack(script, encoding=62, fastDecode=True)
#open('output.js','w').write(result)
print result
def run1():
test_scripts = []
test_scripts.append(("""// -----------------------------------------------------------------------
// public interface
// -----------------------------------------------------------------------
cssQuery.toString = function() {
return "function cssQuery() {\n [version " + version + "]\n}";
};""", 0, False, False, """cssQuery.toString=function(){return"function cssQuery() {\n [version "+version+"]\n}"};"""))
test_scripts.append(("""function test(_localvar) {
var $name = 'foo';
var $$dummy = 2;
return $name + $$dummy;
}""", 0, False, True, """function test(_0){var n='foo';var du=2;return n+du}"""))
test_scripts.append(("""function _test($localvar) {
var $name = 1;
var _dummy = 2;
var __foo = 3;
return $name + _dummy + $localvar + __foo;
}""", 0, False, True, """function _1(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
test_scripts.append(("""function _test($localvar) {
var $name = 1;
var _dummy = 2;
var __foo = 3;
return $name + _dummy + $localvar + __foo;
}
function _bar(_ocalvar) {
var $name = 1;
var _dummy = 2;
var __foo = 3;
return $name + _dummy + $localvar + __foo;
}""", 0, False, True, """function _3(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _2(_1){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
test_scripts.append(("cssQuery1.js", 0, False, False, "cssQuery1-p1.js"))
test_scripts.append(("cssQuery.js", 0, False, False, "cssQuery-p1.js"))
test_scripts.append(("pack.js", 0, False, False, "pack-p1.js"))
test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p2.js"))
# the following ones are different, because javascript might use an
# unstable sort algorithm while python uses an stable sort algorithm
test_scripts.append(("pack.js", 0, False, True, "pack-p2.js"))
test_scripts.append(("test.js", 0, False, True, """function _4(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _3(_1){var n=1;var _2=2;var __foo=3;return n+_2+l+__foo}"""))
test_scripts.append(("test.js", 10, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 13($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 11(12){0 $4=1;0 10=2;0 5=3;9 $4+10+$6+5}',10,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
"""))
test_scripts.append(("test.js", 62, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 d($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 b(c){0 $4=1;0 a=2;0 5=3;9 $4+a+$6+5}',14,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
"""))
test_scripts.append(("test.js", 95, False, False, "test-p4.js"))
test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p3.js"))
test_scripts.append(("cssQuery.js", 62, False, True, "cssQuery-p4.js"))
import difflib
p = JavaScriptPacker()
for script, encoding, fastDecode, specialChars, expected in test_scripts:
if os.path.exists(script):
_script = open(script).read()
else:
_script = script
if os.path.exists(expected):
_expected = open(expected).read()
else:
_expected = expected
print script[:20], encoding, fastDecode, specialChars, expected[:20]
print "="*40
result = p.pack(_script, encoding, fastDecode, specialChars)
print len(result), len(_script)
if (result != _expected):
print "ERROR!!!!!!!!!!!!!!!!"
print _expected
print result
#print list(difflib.unified_diff(result, _expected))
if __name__=='__main__':
run()
Jump to Line
Something went wrong with that request. Please try again.