Skip to content

Commit

Permalink
Unicode input fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
lrowe committed Oct 17, 2011
1 parent e077b96 commit d3f86c2
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions pijnu/library/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,8 @@ def __init__(self, format, expression=None, name=None):
self.charset = toCharset(format)
expression = "[%s]" % format
else: # from text grammar
self.charset = format
# Ensure charset is a byte string to avoid UnicodeDecodeErrors
self.charset = ''.join(chr(ord(c)) for c in format)
# We need a clean text for format (see method _cleanRepr)
expression = Klass._cleanRepr(expression)
# define common attributes
Expand All @@ -1158,7 +1159,7 @@ def _realCheck(self, source, pos):
# This way, Unicode characters, previously treated as bytes,
# will pass; this is just a basic hack; a whole class allowing
# ranges of characters would be far better.
if char in self.charset or ord(char) > 255:
if ord(char) > 255 or chr(ord(char)) in self.charset:
return Node(self, char, pos, pos+1, source)
# case failure
return MatchFailure(self, source, pos)
Expand Down Expand Up @@ -1243,7 +1244,7 @@ def _realCheck(self, source, pos):
stopPos = len(source)
# looping
charset = self.charset
while (pos < stopPos) and (source[pos] in charset):
while (pos < stopPos) and (ord(source[pos]) <= 255) and (chr(ord(source[pos])) in charset):
pos += 1
# result value:
chars = source[startPos:pos]
Expand Down

0 comments on commit d3f86c2

Please sign in to comment.