Permalink
Browse files

fix bugs handling unicode file names

Fix crashes when handling non-ASCII characters in directory names and in
track titles.  In two places filenames were inadvertantly left as, or
converted to, a plain non-unicode byte string.  This caused decode
errors when we tried to concatenate them with Unicode strings, and
python attempted to automatically encode them as ASCII.

In album_dir.py, the input path was left as a plain byte string, rather
than being decoded as UTF-8.

In rename.py the safe_filename() function was inadvertantly converting
unicode input strings to plain byte strings in output, due to passing
the input through a non-unicode format string.
  • Loading branch information...
1 parent d01142e commit bcc3c8389a162fa56cdfcc51a5ffea6c63dc38e2 @simpkins committed Jul 24, 2011
Showing with 44 additions and 29 deletions.
  1. +2 −1 src/amass/archive/album_dir.py
  2. +2 −15 src/amass/archive/util.py
  3. +38 −0 src/amass/file_util.py
  4. +2 −13 src/rename.py
@@ -8,6 +8,7 @@
from .. import cdrom
from .. import cddb
+from .. import file_util
from .. import mb
from .. import metadata
from . import err
@@ -99,7 +100,7 @@ class DirLayout(object):
layouts.
"""
def __init__(self, path):
- self.path = path
+ self.path = file_util.decode_path(path)
def getMetadataDir(self):
return os.path.join(self.path, 'metadata')
View
@@ -6,25 +6,12 @@
import re
from .. import cdrom
+from .. import file_util
class FileInfo(object):
def __init__(self, path, metadata):
- # We generally encode the path names as UTF-8 when writing them.
- # When reading path names, attempt to decode them from UTF-8 and
- # convert them back to a unicode string. This way we can compare them
- # with the unicode strings that we use internally.
- if isinstance(path, str):
- try:
- self.path = path.decode('utf-8')
- except UnicodeDecodeError:
- # Not a valid UTF-8 string. Just keep using the plain
- # byte array
- self.path = path
- else:
- # Already a unicode string
- self.path = path
-
+ self.path = file_util.decode_path(path)
self.metadata = metadata
@property
View
@@ -53,3 +53,41 @@ def find_files_by_suffix(dir, suffix):
files.append(path)
return files
+
+
+def decode_path(path):
+ """
+ decode_path(str) --> unicode
+
+ Attempt to decode a byte string path name to a unicode string.
+
+ If the input is already a unicode string, it is returned as-is.
+ If the input is not valid UTF-8 and cannot be decoded, the byte string will
+ be returned as-is rather than throwing an error.
+ """
+ # We generally encode the path names as UTF-8 when writing them.
+ # When reading path names, attempt to decode them from UTF-8 and
+ # convert them back to a unicode string. This way we can compare them
+ # with the unicode strings that we use internally.
+ if isinstance(path, str):
+ try:
+ return path.decode('utf-8')
+ except UnicodeDecodeError:
+ # Not a valid UTF-8 string. Just keep using the plain
+ # byte array
+ return path
+ else:
+ # Already a unicode string
+ return path
+
+
+def safe_filename(name):
+ """
+ safe_filename(name) --> name
+
+ Make a string safe for use as a file name.
+ """
+ # TODO: It would be nice to strip out non-printable characters, or replace
+ # them with some dummy character.
+ # TODO: Support making the name safe for Windows, too.
+ return name.replace('/', '\\')
View
@@ -9,18 +9,7 @@
from amass import archive
from amass import metadata
-
-
-def safe_filename(name):
- """
- safe_filename(name) --> name
-
- Make a string safe for use as a file name.
- """
- # TODO: It would be nice to strip out non-printable characters, or replace
- # them with some dummy character.
- # TODO: Support making the name safe for Windows, too.
- return name.replace('/', '\\')
+from amass import file_util
def get_track_name(track):
@@ -30,7 +19,7 @@ def get_track_name(track):
else:
title = track.trackTitle
- return safe_filename('%02d - %s' % (track.number, title))
+ return file_util.safe_filename(u'%02d - %s' % (track.number, title))
def rename_files(info_list):

0 comments on commit bcc3c83

Please sign in to comment.