Merge branch 'master' of https://github.com/tiarno/plastex

Conflicts: plasTeX/Base/LaTeX/Index.py
plastex · Jul 17, 2014 · d296b2f · d296b2f
2 parents b1d76f9 + 94281cd
commit d296b2f
Show file tree

Hide file tree

Showing 8 changed files with 60 additions and 14 deletions.
diff --git a/README b/README
@@ -27,5 +27,5 @@ import commands):
     Renderer().render(TeX(file=sys.argv[1]).parse())
 
 plasTeX is really much more than just a LaTeX-to-other-format converter 
-though.  See the documentation at http://plastex.sf.net/ for a complete
+though.  See the documentation at http://tiarno.github.io/plastex/ for a complete
 view of what it is capable of.
diff --git a/plasTeX/Base/LaTeX/Index.py b/plasTeX/Base/LaTeX/Index.py
@@ -7,7 +7,7 @@
 
 import string, os
 from plasTeX.Tokenizer import Token, EscapeSequence
-from plasTeX import Command, Environment, IgnoreCommand
+from plasTeX import Command, Environment, IgnoreCommand, encoding
 from plasTeX.Logging import getLogger
 from Sectioning import SectionUtils
 
@@ -75,7 +75,7 @@ def groups(self):
         for item in self:
             try: 
                 label = title = item.sortkey[0].upper()
-                if title in string.letters:
+                if title in encoding.stringletters():
                     pass
                 elif title == '_':
                      title = '_ (Underscore)'

diff --git a/plasTeX/Renderers/ManPage/__init__.py b/plasTeX/Renderers/ManPage/__init__.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 from plasTeX.Renderers import Renderer as BaseRenderer
+from plasTeX import encoding
 import textwrap, re, string
 
 class ManPageRenderer(BaseRenderer):
@@ -43,7 +44,7 @@ def __init__(self, *args, **kwargs):
     def default(self, node):
         """ Rendering method for all non-text nodes """
         # Handle characters like \&, \$, \%, etc.
-        if len(node.nodeName) == 1 and node.nodeName not in string.letters:
+        if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters():
             return self.textDefault(node.nodeName)
 
         # Render child nodes

diff --git a/plasTeX/Renderers/Text/__init__.py b/plasTeX/Renderers/Text/__init__.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 from plasTeX.Renderers import Renderer as BaseRenderer
+from plasTeX import encoding
 import textwrap, re, string
 
 class TextRenderer(BaseRenderer):
@@ -50,7 +51,7 @@ def addBlock(self, s):
     def default(self, node):
         """ Rendering method for all non-text nodes """
         # Handle characters like \&, \$, \%, etc.
-        if len(node.nodeName) == 1 and node.nodeName not in string.letters:
+        if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters():
             return self.textDefault(node.nodeName)
 
         # Render child nodes

diff --git a/plasTeX/Tokenizer.py b/plasTeX/Tokenizer.py
@@ -2,6 +2,7 @@
 
 import string
 from DOM import Node, Text
+from plasTeX import encoding
 from StringIO import StringIO as UnicodeStringIO
 try: from cStringIO import StringIO
 except: from StringIO import StringIO
@@ -19,7 +20,7 @@
    '_',   # 8  - Subscript
    '\x00',# 9  - Ignored character
    ' \t\r\f', # 10 - Space
-   string.letters + '@', # - Letter
+   encoding.stringletters() + '@', # - Letter
    '',    # 12 - Other character - This isn't explicitly defined.  If it
           #                        isn't any of the other categories, then
           #                        it's an "other" character.
@@ -29,7 +30,7 @@
 ]
 
 VERBATIM_CATEGORIES = [''] * 16
-VERBATIM_CATEGORIES[11] = string.letters
+VERBATIM_CATEGORIES[11] = encoding.stringletters()
 
 class Token(Text):
     """ Base class for all TeX tokens """
@@ -441,7 +442,7 @@ def __iter__(self):
 # HACK: I couldn't get the parse() thing to work so I'm just not
 #       going to parse whitespace after EscapeSequences that end in
 #       non-letter characters as a half-assed solution.
-                        if token[-1] in string.letters:
+                        if token[-1] in encoding.stringletters():
                             # Absorb following whitespace
                             self.state = STATE_S
 

diff --git a/plasTeX/__init__.py b/plasTeX/__init__.py
@@ -5,7 +5,7 @@
 import string, re
 from DOM import Element, Text, Node, DocumentFragment, Document
 from Tokenizer import Token, BeginGroup, EndGroup, Other
-from plasTeX import Logging
+from plasTeX import Logging, encoding
 
 log = Logging.getLogger()
 status = Logging.getLogger('status')
@@ -429,7 +429,7 @@ def source(self):
         argSource = sourceArguments(self)
         if not argSource:
             argSource = ' '
-        elif argSource[0] in string.letters:
+        elif argSource[0] in encoding.stringletters():
             argSource = ' %s' % argSource
         s = '%s%s%s' % (escape, name, argSource)
 
@@ -633,7 +633,7 @@ def arguments(self):
                 pass
 
             # Argument name (and possibly type)
-            elif item[0] in string.letters:
+            elif item[0] in encoding.stringletters():
                 parts = item.split(':')
                 item = parts.pop(0)
                 # Parse for types and subtypes
@@ -1139,11 +1139,11 @@ class dimen(float):
     def __new__(cls, v):
         if isinstance(v, Macro):
             return v.__dimen__()
-        elif isinstance(v, basestring) and v[-1] in string.letters:
+        elif isinstance(v, basestring) and v[-1] in encoding.stringletters():
             # Get rid of glue components
             v = list(v.split('plus').pop(0).split('minus').pop(0).strip())
             units = []
-            while v and v[-1] in string.letters:
+            while v and v[-1] in encoding.stringletters():
                 units.insert(0, v.pop())
             v = float(''.join(v))
             units = ''.join(units) 
@@ -1503,7 +1503,7 @@ def roman(self):
 
     @property
     def Alph(self):
-        return string.letters[self.value-1].upper()
+        return encoding.stringletters()[self.value-1].upper()
 
     @property
     def alph(self):

diff --git a/plasTeX/encoding.py b/plasTeX/encoding.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import locale
+import string
+
+def stringletters():
+    encoding = locale.getlocale()[1]
+    if encoding:
+        return unicode(string.letters, encoding)
+    else:
+        return unicode(string.letters)
diff --git a/unittests/Encoding.py b/unittests/Encoding.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import locale
+import unittest
+from plasTeX.TeX import TeX
+
+class Longtables(unittest.TestCase):
+
+    def runDocument(self, content):
+        """
+        Compile a document with the given content
+
+        Arguments:
+        content - string containing the content of the document
+
+        Returns: TeX document
+
+        """
+        tex = TeX()
+        tex.disableLogging()
+        tex.input(ur'''\document{article}\begin{document}%s\end{document}''' % content)
+        return tex.parse()
+
+    def testString(self):
+        # Bad character encoding
+        locale.setlocale(locale.LC_ALL, "en_GB.iso8859-1")
+        out = self.runDocument(u"é")
+
+if __name__ == '__main__':
+    unittest.main()
+