Skip to content

Commit

Permalink
Add platform specific unicode normalization.
Browse files Browse the repository at this point in the history
  • Loading branch information
musically-ut committed Apr 27, 2015
1 parent c4018d5 commit ee7b829
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions seqfile/seqfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import glob as _glob
import errno as _errno
import unicodedata as _u
import sys as _sys

import natsort as _natsort

Expand Down Expand Up @@ -41,15 +42,22 @@ def _findNextFile(folder, prefix, suffix, fnameGen, base, maxattempts, loop):
suffix = suffix if suffix is not None else u''

globPattern = _os.path.join(folder, prefix + u'*' + suffix)
allFiles = _glob.glob(_u.normalize('NFD', globPattern))
rawRegEx = prefix + u'([0-9]+)' + suffix + u'$'

# Mac uses NFD normalization for Unicode filenames while windows and
# linux use NFC normalization.

This comment has been minimized.

Copy link
@remram44

remram44 Apr 30, 2015

No, Windows and Linux don't normalize at all.

if _sys.platform == 'darwin':
normalizedGlobPattern = _u.normalize('NFD', globPattern)
normalizedRegEx = _u.normalize('NFD', rawRegEx)
else:
normalizedGlobPattern = _u.normalize('NFC', globPattern)
normalizedRegEx = _u.normalize('NFC', rawRegEx)

allFiles = _glob.glob(normalizedGlobPattern)
sortedFiles = _natsort.natsorted(allFiles,
alg=_natsort.ns.INT,
reverse=True)

# Not using complete path here, since Windows paths contain
# back-slashes, which will be interpreted as escaped special regex.
rawRegEx = prefix + u'([0-9]+)' + suffix + u'$'
normalizedRegEx = _u.normalize('NFD', rawRegEx)
numFilesRegEx = _re.compile(normalizedRegEx, _re.UNICODE)
numberedFiles = (_re.search(numFilesRegEx, f) for f in sortedFiles
if _re.search(numFilesRegEx, f))
Expand Down

0 comments on commit ee7b829

Please sign in to comment.