Skip to content

Commit

Permalink
Update natsort for Python 3 support
Browse files Browse the repository at this point in the history
  • Loading branch information
dougwt committed Jun 26, 2014
1 parent 59c5ca8 commit 72cc8fc
Show file tree
Hide file tree
Showing 5 changed files with 573 additions and 256 deletions.
4 changes: 4 additions & 0 deletions natsort/__init__.py
@@ -1,4 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import print_function, division, unicode_literals, absolute_import

from .natsort import natsort_key, natsorted, index_natsorted
from ._version import __version__

__all__ = [
'natsort_key',
Expand Down
305 changes: 231 additions & 74 deletions natsort/__main__.py
@@ -1,15 +1,72 @@
from __future__ import print_function, division
# -*- coding: utf-8 -*-
from __future__ import print_function, division, unicode_literals, absolute_import

import sys
import os
import re
from natsort import natsort_key, natsorted
from _version import __version__

from .natsort import natsort_key, natsorted, int_nosign_re, int_sign_re
from .natsort import float_sign_exp_re, float_nosign_exp_re
from .natsort import float_sign_noexp_re, float_nosign_noexp_re
from .natsort import regex_and_num_function_chooser
from ._version import __version__
from .py23compat import py23_str


def main():
"""\
Performs a natural sort on pathnames given on the command-line.
Performs a natural sort on entries given on the command-line.
A natural sort sorts numerically then alphabetically, and will sort
by numbers in the middle of a pathname.
by numbers in the middle of an entry.
>>> import sys
>>> sys.argv[1:] = ['num-2', 'num-6', 'num-1']
>>> main()
num-6
num-2
num-1
>>> sys.argv[1:] = ['-r', 'num-2', 'num-6', 'num-1']
>>> main()
num-1
num-2
num-6
>>> sys.argv[1:] = ['--nosign', 'num-2', 'num-6', 'num-1']
>>> main()
num-1
num-2
num-6
>>> sys.argv[1:] = ['-t', 'digit', 'num-2', 'num-6', 'num-1']
>>> main()
num-1
num-2
num-6
>>> sys.argv[1:] = ['-t', 'int', '-e', '-1', '-e', '6',
... 'num-2', 'num-6', 'num-1']
>>> main()
num-6
num-2
>>> sys.argv[1:] = ['-t', 'digit', '-e', '1', '-e', '6',
... 'num-2', 'num-6', 'num-1']
>>> main()
num-2
>>> sys.argv[1:] = ['a1.0e3', 'a5.3', 'a453.6']
>>> main()
a5.3
a453.6
a1.0e3
>>> sys.argv[1:] = ['-f', '1', '10', 'a1.0e3', 'a5.3', 'a453.6']
>>> main()
a5.3
>>> sys.argv[1:] = ['-f', '1', '10', '-f', '400', '500', 'a1.0e3', 'a5.3', 'a453.6']
>>> main()
a5.3
a453.6
>>> sys.argv[1:] = ['--noexp', 'a1.0e3', 'a5.3', 'a453.6']
>>> main()
a1.0e3
a5.3
a453.6
"""

from argparse import ArgumentParser, RawDescriptionHelpFormatter
Expand All @@ -18,111 +75,211 @@ def main():
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
parser.add_argument('-F', '--onlyfiles', help='Only files that '
'are readable and non-empty are read in. '
'This will exculude folders from being read in.',
action='store_true', default=False)
parser.add_argument('-f', '--filter', help='Used for '
'filtering out only the files that have a number '
'keeping only the entries that have a number '
'falling in the given range.', nargs=2, type=float,
metavar=('LOW', 'HIGH'))
parser.add_argument('-e', '--exclude', help='Used to exclude a specific '
'number.')
metavar=('LOW', 'HIGH'), action='append')
parser.add_argument('-e', '--exclude', type=float, action='append',
help='Used to exclude an entry '
'that contains a specific number.')
parser.add_argument('-r', '--reverse', help='Returns in reversed order.',
action='store_true', default=False)
parser.add_argument('-R', '--recursive', help='Recursively decend the '
'directory tree.', action='store_true', default=False)
parser.add_argument('-t', '--number_type', choices=('digit', 'int', 'float'),
default='float', help='Choose the type of number '
'to search for.')
parser.add_argument('paths', help='The paths to sort.', nargs='*',
'to search for. "float" will search for floating-point '
'numbers. "int" will only search for integers. '
'"digit" is a shortcut for "int" with --nosign.')
parser.add_argument('--nosign', default=True, action='store_false',
dest='signed', help='Do not consider "+" or "-" as part '
'of a number, i.e. do not take sign into consideration.')
parser.add_argument('--noexp', default=True, action='store_false',
dest='exp', help='Do not consider an exponential as part '
'of a number, i.e. 1e4, would be considered as 1, "e", '
'and 4, not as 10000. This only effects the '
'--number_type=float.')
parser.add_argument('entries', help='The entries to sort. Taken from stdin '
'if nothing is given on the command line.', nargs='*',
default=sys.stdin)
args = parser.parse_args()

# Make sure the filter range is given properly. Does nothing if no filter
filterdata = check_filter(args.filter)

# Recursively collect paths, if necessary.
if args.recursive:
jn = os.path.join
paths = [jn(p, fn) for p, d, f in os.walk(os.curdir) for fn in f]
# Collect paths either from a pipe or the command-line arguments.
else:
paths = [f.strip() for f in args.paths]
args.filter = check_filter(args.filter)

# Split into directory path and filenames
paths = split_paths(paths, args.onlyfiles)
# Remove trailing whitespace from all the entries
entries = [e.strip() for e in args.entries]

# Sort by directory then by file within directory and print.
sort_and_print_paths(paths, filterdata, args.exclude, args.reverse, args.number_type)
sort_and_print_entries(entries, args)

def range_check(low, high):
"""\
Verifies that that given range has a low lower than the high.
>>> range_check(10, 11)
(10.0, 11.0)
>>> range_check(6.4, 30)
(6.4, 30.0)
>>> try:
... range_check(7, 2)
... except ValueError as e:
... print(e)
low >= high
"""
low, high = float(low), float(high)
if low >= high:
raise ValueError ('low >= high')
raise ValueError('low >= high')
else:
return low, high


def check_filter(filt):
"""Check that the low value of the filter is lower than the high."""
"""\
Check that the low value of the filter is lower than the high.
If there is to be no filter, return 'None'.
>>> check_filter(())
>>> check_filter(False)
>>> check_filter(None)
>>> check_filter([(6, 7)])
[(6.0, 7.0)]
>>> check_filter([(6, 7), (2, 8)])
[(6.0, 7.0), (2.0, 8.0)]
>>> try:
... check_filter([(7, 2)])
... except ValueError as e:
... print(e)
Error in --filter: low >= high
"""
# Quick return if no filter.
if not filt:
return None
try:
low, high = range_check(filt[0], filt[1])
return [range_check(f[0], f[1]) for f in filt]
except ValueError as a:
raise ValueError ('Error in --filter: '+str(a))
return low, high, re.compile(r'[+-]?\d+\.?\d*')
raise ValueError('Error in --filter: '+py23_str(a))


def keep_entry_range(entry, lows, highs, converter, regex):
"""\
Boolean function to determine if an entry should be kept out
based on if any numbers are in a given range.
>>> import re
>>> regex = re.compile(r'\d+')
>>> keep_entry_range('a56b23c89', [0], [100], int, regex)
True
>>> keep_entry_range('a56b23c89', [1, 88], [20, 90], int, regex)
True
>>> keep_entry_range('a56b23c89', [1], [20], int, regex)
False
"""
return any(low <= converter(num) <= high
for num in regex.findall(entry)
for low, high in zip(lows, highs))


def exclude_entry(entry, values, converter, regex):
"""\
Boolean function to determine if an entry should be kept out
based on if it contains a specific number.
>>> import re
>>> regex = re.compile(r'\d+')
>>> exclude_entry('a56b23c89', [100], int, regex)
True
>>> exclude_entry('a56b23c89', [23], int, regex)
False
def split_paths(paths, a):
"""For each file, separate into directory and filename. Store all files
in a dir into a dict where the dir is the key and filename is the value.
"""
dirs = {}
for path in paths:
if a:
try:
with open(path) as fl:
pass
except IOError:
continue
dir, file = os.path.split(path)
try:
dirs[dir].append(file)
except KeyError:
dirs[dir] = []
dirs[dir].append(file)
return dirs

def sort_and_print_paths(dirs, filterdata, exclude, reverse, number_type):
"""Sort the paths by directoy then by file within that directory.
Print off the results.
return not any(converter(num) in values for num in regex.findall(entry))


def sort_and_print_entries(entries, args):
"""\
Sort the entries, applying the filters first if necessary.
>>> class Args:
... def __init__(self, filter, exclude, reverse):
... self.filter = filter
... self.exclude = exclude
... self.reverse = reverse
... self.number_type = 'float'
... self.signed = True
... self.exp = True
>>> entries = ['tmp/a57/path2',
... 'tmp/a23/path1',
... 'tmp/a1/path1',
... 'tmp/a130/path1',
... 'tmp/a64/path1',
... 'tmp/a64/path2']
>>> sort_and_print_entries(entries, Args(None, False, False))
tmp/a1/path1
tmp/a23/path1
tmp/a57/path2
tmp/a64/path1
tmp/a64/path2
tmp/a130/path1
>>> sort_and_print_entries(entries, Args([(20, 100)], False, False))
tmp/a23/path1
tmp/a57/path2
tmp/a64/path1
tmp/a64/path2
>>> sort_and_print_entries(entries, Args(None, [23, 130], False))
tmp/a1/path1
tmp/a57/path2
tmp/a64/path1
tmp/a64/path2
>>> sort_and_print_entries(entries, Args(None, [2], False))
tmp/a1/path1
tmp/a23/path1
tmp/a64/path1
tmp/a130/path1
>>> sort_and_print_entries(entries, Args(None, False, True))
tmp/a130/path1
tmp/a64/path2
tmp/a64/path1
tmp/a57/path2
tmp/a23/path1
tmp/a1/path1
"""
number_type = {'digit': None, 'int': int, 'float': float}[number_type]
for dir in natsorted(dirs.keys(), number_type=number_type):
dirs[dir].sort(key=lambda x: natsort_key(x, number_type=number_type))
if reverse:
dirs[dir] = reversed(dirs[dir])
for file in dirs[dir]:
if filterdata is not None:
# Find all the numbers in the filename.
nums = filterdata[2].findall(file)
# If any numbers are between the range, print.
# Otherwise, move to next file.
for num in nums:
if filterdata[0] <= float(num) <= filterdata[1]: break
else:
continue
if exclude and exclude in file: continue
print(os.path.join(dir, file))

# Extract the proper number type.
kwargs = {'number_type': {'digit': None, 'int': int, 'float': float}[args.number_type],
'signed': args.signed,
'exp': args.exp}

# Pre-remove entries that don't pass the filtering criteria
# Make sure we use the same searching algorithm for filtering as for sorting.
if args.filter is not None or args.exclude:
inp_options = (kwargs['number_type'], args.signed, args.exp)
regex, num_function = regex_and_num_function_chooser[inp_options]
if args.filter is not None:
lows, highs = [f[0] for f in args.filter], [f[1] for f in args.filter]
entries = [entry for entry in entries
if keep_entry_range(entry, lows, highs, num_function, regex)]
if args.exclude:
exclude = set(args.exclude)
entries = [entry for entry in entries
if exclude_entry(entry, exclude, num_function, regex)]

# Print off the sorted results
entries.sort(key=lambda x: natsort_key(x, **kwargs), reverse=args.reverse)
for entry in entries:
print(entry)


if __name__ == '__main__':
try:
main()
except ValueError as a:
sys.exit(str(a))
sys.exit(py23_str(a))
except KeyboardInterrupt:
sys.exit(1)
# import doctest
# ret = doctest.testmod()
# if ret[0] == 0:
# print('natsort: All {0[1]} tests successful!'.format(ret))
5 changes: 4 additions & 1 deletion natsort/_version.py
@@ -1 +1,4 @@
__version__ = '3.0.1'
# -*- coding: utf-8 -*-
from __future__ import print_function, division, unicode_literals, absolute_import

__version__ = '3.2.1'

0 comments on commit 72cc8fc

Please sign in to comment.