Skip to content

Commit 1bb4923

Browse files
committed
support multibyte filename handling.
https://bitbucket.org/birkenfeld/sphinx/issue/703
1 parent 9af2094 commit 1bb4923

7 files changed

Lines changed: 55 additions & 18 deletions

File tree

sphinx/builders/epub.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,12 @@ def build_epub(self, outdir, outname):
662662
zipfile.ZIP_STORED)
663663
for file in projectfiles:
664664
fp = path.join(outdir, file)
665-
if isinstance(fp, unicode):
666-
fp = fp.encode(sys.getfilesystemencoding())
665+
if sys.version_info < (2, 6):
666+
# When zipile.ZipFile.write call with unicode filename, ZipFile
667+
# encode filename to 'utf-8' (only after Python-2.6).
668+
if isinstance(file, unicode):
669+
# OEBPS Container Format (OCF) 2.0.1 specification require
670+
# "File Names MUST be UTF-8 encoded".
671+
file = file.encode('utf-8')
667672
epub.write(fp, file, zipfile.ZIP_DEFLATED)
668673
epub.close()

sphinx/cmdline.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,17 @@
2222
from sphinx.application import Sphinx
2323
from sphinx.util import Tee, format_exception_cut_frames, save_traceback
2424
from sphinx.util.console import red, nocolor, color_terminal
25+
from sphinx.util.osutil import fs_encoding
2526
from sphinx.util.pycompat import terminal_safe, bytes
2627

2728

29+
def abspath(pathdir):
30+
pathdir = path.abspath(pathdir)
31+
if isinstance(pathdir, bytes):
32+
pathdir = pathdir.decode(fs_encoding)
33+
return pathdir
34+
35+
2836
def usage(argv, msg=None):
2937
if msg:
3038
print >>sys.stderr, msg
@@ -65,7 +73,7 @@ def main(argv):
6573
try:
6674
opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:ng:NEqQWw:P')
6775
allopts = set(opt[0] for opt in opts)
68-
srcdir = confdir = path.abspath(args[0])
76+
srcdir = confdir = abspath(args[0])
6977
if not path.isdir(srcdir):
7078
print >>sys.stderr, 'Error: Cannot find source directory `%s\'.' % (
7179
srcdir,)
@@ -75,7 +83,7 @@ def main(argv):
7583
print >>sys.stderr, ('Error: Source directory doesn\'t '
7684
'contain conf.py file.')
7785
return 1
78-
outdir = path.abspath(args[1])
86+
outdir = abspath(args[1])
7987
if not path.isdir(outdir):
8088
print >>sys.stderr, 'Making output directory...'
8189
os.makedirs(outdir)
@@ -119,9 +127,9 @@ def main(argv):
119127
elif opt == '-t':
120128
tags.append(val)
121129
elif opt == '-d':
122-
doctreedir = path.abspath(val)
130+
doctreedir = abspath(val)
123131
elif opt == '-c':
124-
confdir = path.abspath(val)
132+
confdir = abspath(val)
125133
if not path.isfile(path.join(confdir, 'conf.py')):
126134
print >>sys.stderr, ('Error: Configuration directory '
127135
'doesn\'t contain conf.py file.')

sphinx/config.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from sphinx.errors import ConfigError
1818
from sphinx.locale import l_
19-
from sphinx.util.osutil import make_filename
19+
from sphinx.util.osutil import make_filename, fs_encoding
2020
from sphinx.util.pycompat import bytes, b, convert_with_2to3
2121

2222
nonascii_re = re.compile(b(r'[\x80-\xff]'))
@@ -208,14 +208,15 @@ def __init__(self, dirname, filename, overrides, tags):
208208
f.close()
209209
try:
210210
# compile to a code object, handle syntax errors
211+
config_file_enc = config_file.encode(fs_encoding)
211212
try:
212-
code = compile(source, config_file, 'exec')
213+
code = compile(source, config_file_enc, 'exec')
213214
except SyntaxError:
214215
if convert_with_2to3:
215216
# maybe the file uses 2.x syntax; try to refactor to
216217
# 3.x syntax using 2to3
217218
source = convert_with_2to3(config_file)
218-
code = compile(source, config_file, 'exec')
219+
code = compile(source, config_file_enc, 'exec')
219220
else:
220221
raise
221222
exec code in config

sphinx/environment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@
4141
FilenameUniqDict
4242
from sphinx.util.nodes import clean_astext, make_refnode, extract_messages, \
4343
WarningStream
44-
from sphinx.util.osutil import movefile, SEP, ustrftime, find_catalog
44+
from sphinx.util.osutil import movefile, SEP, ustrftime, find_catalog, \
45+
fs_encoding
4546
from sphinx.util.matching import compile_matchers
4647
from sphinx.util.pycompat import all, class_types
4748
from sphinx.util.websupport import is_commentable
4849
from sphinx.errors import SphinxError, ExtensionError
4950
from sphinx.locale import _, init as init_locale
5051
from sphinx.versioning import add_uids, merge_doctrees
5152

52-
fs_encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
5353

5454
orig_role_function = roles.role
5555
orig_directive_function = directives.directive
@@ -1321,7 +1321,7 @@ def _walk_depth(node, depth, maxdepth):
13211321
def _entries_from_toctree(toctreenode, parents,
13221322
separate=False, subtree=False):
13231323
"""Return TOC entries for a toctree node."""
1324-
refs = [(e[0], str(e[1])) for e in toctreenode['entries']]
1324+
refs = [(e[0], e[1]) for e in toctreenode['entries']]
13251325
entries = []
13261326
for (title, ref) in refs:
13271327
try:

sphinx/util/osutil.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,6 @@ def find_catalog(docname, compaction):
148148
ret = docname
149149

150150
return ret
151+
152+
fs_encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
153+

tests/path.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@
1616
FILESYSTEMENCODING = sys.getfilesystemencoding() or sys.getdefaultencoding()
1717

1818

19-
class path(str):
19+
class path(unicode):
2020
"""
2121
Represents a path which behaves like a string.
2222
"""
2323
if sys.version_info < (3, 0):
2424
def __new__(cls, s, encoding=FILESYSTEMENCODING, errors='strict'):
25-
if isinstance(s, unicode):
26-
s = s.encode(encoding, errors=errors)
27-
return str.__new__(cls, s)
28-
return str.__new__(cls, s)
25+
if isinstance(s, str):
26+
s = s.decode(encoding, errors)
27+
return unicode.__new__(cls, s)
28+
return unicode.__new__(cls, s)
2929

3030
@property
3131
def parent(self):
@@ -193,4 +193,4 @@ def joinpath(self, *args):
193193
__div__ = __truediv__ = joinpath
194194

195195
def __repr__(self):
196-
return '%s(%s)' % (self.__class__.__name__, str.__repr__(self))
196+
return '%s(%s)' % (self.__class__.__name__, unicode.__repr__(self))

tests/test_build.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"""
1111

1212
from util import *
13+
from textwrap import dedent
1314

1415

1516
def teardown_module():
@@ -61,3 +62,22 @@ def test_man(app):
6162
@with_app(buildername='singlehtml', cleanenv=True)
6263
def test_singlehtml(app):
6364
app.builder.build_all()
65+
66+
@with_app(buildername='html', srcdir='(temp)')
67+
def test_multibyte_path(app):
68+
srcdir = path(app.srcdir)
69+
mb_name = u'\u65e5\u672c\u8a9e'
70+
(srcdir / mb_name).makedirs()
71+
(srcdir / mb_name / (mb_name + '.txt')).write_text(dedent("""
72+
multi byte file name page
73+
==========================
74+
"""))
75+
76+
master_doc = srcdir / 'contents.txt'
77+
master_doc.write_bytes((master_doc.text() + dedent("""
78+
.. toctree::
79+
80+
%(mb_name)s/%(mb_name)s
81+
""" % locals())
82+
).encode('utf-8'))
83+
app.builder.build_all()

0 commit comments

Comments
 (0)