Skip to content

Commit

Permalink
fix bugs handling unicode file names
Browse files Browse the repository at this point in the history
Fix crashes when handling non-ASCII characters in directory names and in
track titles.  In two places filenames were inadvertantly left as, or
converted to, a plain non-unicode byte string.  This caused decode
errors when we tried to concatenate them with Unicode strings, and
python attempted to automatically encode them as ASCII.

In album_dir.py, the input path was left as a plain byte string, rather
than being decoded as UTF-8.

In rename.py the safe_filename() function was inadvertantly converting
unicode input strings to plain byte strings in output, due to passing
the input through a non-unicode format string.
  • Loading branch information
simpkins committed Jul 24, 2011
1 parent d01142e commit bcc3c83
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 29 deletions.
3 changes: 2 additions & 1 deletion src/amass/archive/album_dir.py
Expand Up @@ -8,6 +8,7 @@

from .. import cdrom
from .. import cddb
from .. import file_util
from .. import mb
from .. import metadata
from . import err
Expand Down Expand Up @@ -99,7 +100,7 @@ class DirLayout(object):
layouts.
"""
def __init__(self, path):
self.path = path
self.path = file_util.decode_path(path)

def getMetadataDir(self):
return os.path.join(self.path, 'metadata')
Expand Down
17 changes: 2 additions & 15 deletions src/amass/archive/util.py
Expand Up @@ -6,25 +6,12 @@
import re

from .. import cdrom
from .. import file_util


class FileInfo(object):
def __init__(self, path, metadata):
# We generally encode the path names as UTF-8 when writing them.
# When reading path names, attempt to decode them from UTF-8 and
# convert them back to a unicode string. This way we can compare them
# with the unicode strings that we use internally.
if isinstance(path, str):
try:
self.path = path.decode('utf-8')
except UnicodeDecodeError:
# Not a valid UTF-8 string. Just keep using the plain
# byte array
self.path = path
else:
# Already a unicode string
self.path = path

self.path = file_util.decode_path(path)
self.metadata = metadata

@property
Expand Down
38 changes: 38 additions & 0 deletions src/amass/file_util.py
Expand Up @@ -53,3 +53,41 @@ def find_files_by_suffix(dir, suffix):
files.append(path)

return files


def decode_path(path):
"""
decode_path(str) --> unicode
Attempt to decode a byte string path name to a unicode string.
If the input is already a unicode string, it is returned as-is.
If the input is not valid UTF-8 and cannot be decoded, the byte string will
be returned as-is rather than throwing an error.
"""
# We generally encode the path names as UTF-8 when writing them.
# When reading path names, attempt to decode them from UTF-8 and
# convert them back to a unicode string. This way we can compare them
# with the unicode strings that we use internally.
if isinstance(path, str):
try:
return path.decode('utf-8')
except UnicodeDecodeError:
# Not a valid UTF-8 string. Just keep using the plain
# byte array
return path
else:
# Already a unicode string
return path


def safe_filename(name):
"""
safe_filename(name) --> name
Make a string safe for use as a file name.
"""
# TODO: It would be nice to strip out non-printable characters, or replace
# them with some dummy character.
# TODO: Support making the name safe for Windows, too.
return name.replace('/', '\\')
15 changes: 2 additions & 13 deletions src/rename.py
Expand Up @@ -9,18 +9,7 @@

from amass import archive
from amass import metadata


def safe_filename(name):
"""
safe_filename(name) --> name
Make a string safe for use as a file name.
"""
# TODO: It would be nice to strip out non-printable characters, or replace
# them with some dummy character.
# TODO: Support making the name safe for Windows, too.
return name.replace('/', '\\')
from amass import file_util


def get_track_name(track):
Expand All @@ -30,7 +19,7 @@ def get_track_name(track):
else:
title = track.trackTitle

return safe_filename('%02d - %s' % (track.number, title))
return file_util.safe_filename(u'%02d - %s' % (track.number, title))


def rename_files(info_list):
Expand Down

0 comments on commit bcc3c83

Please sign in to comment.