Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/tiarno/plastex
Browse files Browse the repository at this point in the history
Conflicts:
	plasTeX/Base/LaTeX/Index.py
  • Loading branch information
kesmit13 committed Jul 17, 2014
2 parents b1d76f9 + 94281cd commit d296b2f
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README
Expand Up @@ -27,5 +27,5 @@ import commands):
Renderer().render(TeX(file=sys.argv[1]).parse())

plasTeX is really much more than just a LaTeX-to-other-format converter
though. See the documentation at http://plastex.sf.net/ for a complete
though. See the documentation at http://tiarno.github.io/plastex/ for a complete
view of what it is capable of.
4 changes: 2 additions & 2 deletions plasTeX/Base/LaTeX/Index.py
Expand Up @@ -7,7 +7,7 @@

import string, os
from plasTeX.Tokenizer import Token, EscapeSequence
from plasTeX import Command, Environment, IgnoreCommand
from plasTeX import Command, Environment, IgnoreCommand, encoding
from plasTeX.Logging import getLogger
from Sectioning import SectionUtils

Expand Down Expand Up @@ -75,7 +75,7 @@ def groups(self):
for item in self:
try:
label = title = item.sortkey[0].upper()
if title in string.letters:
if title in encoding.stringletters():
pass
elif title == '_':
title = '_ (Underscore)'
Expand Down
3 changes: 2 additions & 1 deletion plasTeX/Renderers/ManPage/__init__.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python

from plasTeX.Renderers import Renderer as BaseRenderer
from plasTeX import encoding
import textwrap, re, string

class ManPageRenderer(BaseRenderer):
Expand Down Expand Up @@ -43,7 +44,7 @@ def __init__(self, *args, **kwargs):
def default(self, node):
""" Rendering method for all non-text nodes """
# Handle characters like \&, \$, \%, etc.
if len(node.nodeName) == 1 and node.nodeName not in string.letters:
if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters():
return self.textDefault(node.nodeName)

# Render child nodes
Expand Down
3 changes: 2 additions & 1 deletion plasTeX/Renderers/Text/__init__.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python

from plasTeX.Renderers import Renderer as BaseRenderer
from plasTeX import encoding
import textwrap, re, string

class TextRenderer(BaseRenderer):
Expand Down Expand Up @@ -50,7 +51,7 @@ def addBlock(self, s):
def default(self, node):
""" Rendering method for all non-text nodes """
# Handle characters like \&, \$, \%, etc.
if len(node.nodeName) == 1 and node.nodeName not in string.letters:
if len(node.nodeName) == 1 and node.nodeName not in encoding.stringletters():
return self.textDefault(node.nodeName)

# Render child nodes
Expand Down
7 changes: 4 additions & 3 deletions plasTeX/Tokenizer.py
Expand Up @@ -2,6 +2,7 @@

import string
from DOM import Node, Text
from plasTeX import encoding
from StringIO import StringIO as UnicodeStringIO
try: from cStringIO import StringIO
except: from StringIO import StringIO
Expand All @@ -19,7 +20,7 @@
'_', # 8 - Subscript
'\x00',# 9 - Ignored character
' \t\r\f', # 10 - Space
string.letters + '@', # - Letter
encoding.stringletters() + '@', # - Letter
'', # 12 - Other character - This isn't explicitly defined. If it
# isn't any of the other categories, then
# it's an "other" character.
Expand All @@ -29,7 +30,7 @@
]

VERBATIM_CATEGORIES = [''] * 16
VERBATIM_CATEGORIES[11] = string.letters
VERBATIM_CATEGORIES[11] = encoding.stringletters()

class Token(Text):
""" Base class for all TeX tokens """
Expand Down Expand Up @@ -441,7 +442,7 @@ def __iter__(self):
# HACK: I couldn't get the parse() thing to work so I'm just not
# going to parse whitespace after EscapeSequences that end in
# non-letter characters as a half-assed solution.
if token[-1] in string.letters:
if token[-1] in encoding.stringletters():
# Absorb following whitespace
self.state = STATE_S

Expand Down
12 changes: 6 additions & 6 deletions plasTeX/__init__.py
Expand Up @@ -5,7 +5,7 @@
import string, re
from DOM import Element, Text, Node, DocumentFragment, Document
from Tokenizer import Token, BeginGroup, EndGroup, Other
from plasTeX import Logging
from plasTeX import Logging, encoding

log = Logging.getLogger()
status = Logging.getLogger('status')
Expand Down Expand Up @@ -429,7 +429,7 @@ def source(self):
argSource = sourceArguments(self)
if not argSource:
argSource = ' '
elif argSource[0] in string.letters:
elif argSource[0] in encoding.stringletters():
argSource = ' %s' % argSource
s = '%s%s%s' % (escape, name, argSource)

Expand Down Expand Up @@ -633,7 +633,7 @@ def arguments(self):
pass

# Argument name (and possibly type)
elif item[0] in string.letters:
elif item[0] in encoding.stringletters():
parts = item.split(':')
item = parts.pop(0)
# Parse for types and subtypes
Expand Down Expand Up @@ -1139,11 +1139,11 @@ class dimen(float):
def __new__(cls, v):
if isinstance(v, Macro):
return v.__dimen__()
elif isinstance(v, basestring) and v[-1] in string.letters:
elif isinstance(v, basestring) and v[-1] in encoding.stringletters():
# Get rid of glue components
v = list(v.split('plus').pop(0).split('minus').pop(0).strip())
units = []
while v and v[-1] in string.letters:
while v and v[-1] in encoding.stringletters():
units.insert(0, v.pop())
v = float(''.join(v))
units = ''.join(units)
Expand Down Expand Up @@ -1503,7 +1503,7 @@ def roman(self):

@property
def Alph(self):
return string.letters[self.value-1].upper()
return encoding.stringletters()[self.value-1].upper()

@property
def alph(self):
Expand Down
11 changes: 11 additions & 0 deletions plasTeX/encoding.py
@@ -0,0 +1,11 @@
#!/usr/bin/env python

import locale
import string

def stringletters():
encoding = locale.getlocale()[1]
if encoding:
return unicode(string.letters, encoding)
else:
return unicode(string.letters)
32 changes: 32 additions & 0 deletions unittests/Encoding.py
@@ -0,0 +1,32 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import locale
import unittest
from plasTeX.TeX import TeX

class Longtables(unittest.TestCase):

def runDocument(self, content):
"""
Compile a document with the given content
Arguments:
content - string containing the content of the document
Returns: TeX document
"""
tex = TeX()
tex.disableLogging()
tex.input(ur'''\document{article}\begin{document}%s\end{document}''' % content)
return tex.parse()

def testString(self):
# Bad character encoding
locale.setlocale(locale.LC_ALL, "en_GB.iso8859-1")
out = self.runDocument(u"é")

if __name__ == '__main__':
unittest.main()

0 comments on commit d296b2f

Please sign in to comment.