Skip to content

Commit

Permalink
"Merge pull request #19 from ankostis/preserve_matches\n\nfeat: confi…
Browse files Browse the repository at this point in the history
…gurable norm/case-sensitive/slashes preserving matches"
  • Loading branch information
miracle2k committed May 1, 2017
2 parents bd6cf8a + b777c1f commit 1dd8f3a
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 40 deletions.
75 changes: 51 additions & 24 deletions glob2/fnmatch.py
Expand Up @@ -10,7 +10,6 @@
corresponding to PATTERN. (It does not compile it.)
"""
import os
import posixpath
import re
try:
from functools import lru_cache
Expand All @@ -19,7 +18,16 @@

__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]

def fnmatch(name, pat):

def _norm_paths(path, norm_paths, sep):
if norm_paths is None:
path = re.sub(r'\/', sep or os.sep, path) # cached internally
elif norm_paths:
path = os.path.normcase(path)
return path


def fnmatch(name, pat, norm_paths=True, case_sensitive=True, sep=None):
"""Test whether FILENAME matches PATTERN.
Patterns are Unix shell style:
Expand All @@ -33,46 +41,65 @@ def fnmatch(name, pat):
Both FILENAME and PATTERN are first case-normalized
if the operating system requires it.
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
:param slashes:
:param norm_paths:
A tri-state boolean:
when true, invokes `os.path,.normcase()` on both paths,
when `None`, just equalize slashes/backslashes to `os.sep`,
when false, does not touch paths at all.
Note that a side-effect of `normcase()` on *Windows* is that
it converts to lower-case all matches of `?glob()` functions.
:param case_sensitive:
defines the case-sensitiviness of regex doing the matches
:param sep:
in case only slahes replaced, what sep-char to substitute with;
if false, `os.sep` is used.
Notice that by default, `normcase()` causes insensitive matching
on *Windows*, regardless of `case_insensitive` param.
Set ``norm_paths=None, case_sensitive=False`` to preserve
verbatim mathces.
"""
name = os.path.normcase(name)
pat = os.path.normcase(pat)
return fnmatchcase(name, pat)
name, pat = [_norm_paths(p, norm_paths, sep)
for p in (name, pat)]

return fnmatchcase(name, pat, case_sensitive=case_sensitive)

lru_cache(maxsize=256, typed=True)
def _compile_pattern(pat):

@lru_cache(maxsize=256, typed=True)
def _compile_pattern(pat, case_sensitive):
if isinstance(pat, bytes):
pat_str = pat.decode('ISO-8859-1')
res_str = translate(pat_str)
res = res_str.encode('ISO-8859-1')
else:
res = translate(pat)
return re.compile(res).match
flags = 0 if case_sensitive else re.IGNORECASE
return re.compile(res, flags).match


def filter(names, pat):
def filter(names, pat, norm_paths=True, case_sensitive=True, sep=None):
"""Return the subset of the list NAMES that match PAT."""
result = []
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop.
for name in names:
m = match(name)
if m:
result.append((name, m.groups()))
else:
for name in names:
m = match(os.path.normcase(name))
if m:
result.append((name, m.groups()))
pat = _norm_paths(pat, norm_paths, sep)
match = _compile_pattern(pat, case_sensitive)
for name in names:
m = match(_norm_paths(name, norm_paths, sep))
if m:
result.append((name,
tuple(_norm_paths(p, norm_paths, sep) for p in m.groups())))
return result

def fnmatchcase(name, pat):

def fnmatchcase(name, pat, case_sensitive=True):
"""Test whether FILENAME matches PATTERN, including case.
This is a version of fnmatch() which doesn't case-normalize
its arguments.
"""
match = _compile_pattern(pat)
match = _compile_pattern(pat, case_sensitive)
return match(name) is not None


Expand Down
51 changes: 35 additions & 16 deletions glob2/impl.py
Expand Up @@ -5,6 +5,7 @@
import sys
import os
import re
from os.path import join
from . import fnmatch

try:
Expand All @@ -20,7 +21,7 @@ class Globber(object):
islink = staticmethod(os.path.islink)
exists = staticmethod(os.path.lexists)

def walk(self, top, followlinks=False):
def walk(self, top, followlinks=False, sep=None):
"""A simplified version of os.walk (code copied) that uses
``self.listdir``, and the other local filesystem methods.
Expand All @@ -39,12 +40,13 @@ def walk(self, top, followlinks=False):
yield top, items

for name in items:
new_path = os.path.join(top, name)
new_path = _join_paths([top, name], sep=sep)
if followlinks or not self.islink(new_path):
for x in self.walk(new_path, followlinks):
yield x

def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True):
def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
norm_paths=True, case_sensitive=True, sep=None):
"""Return a list of paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la
Expand All @@ -55,9 +57,11 @@ def glob(self, pathname, with_matches=False, include_hidden=False, recursive=Tru
If ``include_hidden`` is True, then files and folders starting with
a dot are also returned.
"""
return list(self.iglob(pathname, with_matches, include_hidden))
return list(self.iglob(pathname, with_matches, include_hidden,
norm_paths, case_sensitive, sep))

def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True):
def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
norm_paths=True, case_sensitive=True, sep=None):
"""Return an iterator which yields the paths matching a pathname
pattern.
Expand All @@ -74,12 +78,14 @@ def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=Tr
If ``include_hidden`` is True, then files and folders starting with
a dot are also returned.
"""
result = self._iglob(pathname, include_hidden=include_hidden)
result = self._iglob(pathname, True, include_hidden,
norm_paths, case_sensitive, sep)
if with_matches:
return result
return imap(lambda s: s[0], result)

def _iglob(self, pathname, rootcall=True, include_hidden=False):
def _iglob(self, pathname, rootcall, include_hidden,
norm_paths, case_sensitive, sep):
"""Internal implementation that backs :meth:`iglob`.
``rootcall`` is required to differentiate between the user's call to
Expand Down Expand Up @@ -111,17 +117,20 @@ def _iglob(self, pathname, rootcall=True, include_hidden=False):
# Note that this may return files, which will be ignored
# later when we try to use them as directories.
# Prefiltering them here would only require more IO ops.
dirs = self._iglob(dirname, False, include_hidden)
dirs = self._iglob(dirname, False, include_hidden,
norm_paths, case_sensitive, sep)
else:
dirs = [(dirname, ())]

# Resolve ``basename`` expr for every directory found
for dirname, dir_groups in dirs:
for name, groups in self.resolve_pattern(
dirname, basename, not rootcall, include_hidden):
yield os.path.join(dirname, name), dir_groups + groups
for name, groups in self.resolve_pattern(dirname, basename,
not rootcall, include_hidden,
norm_paths, case_sensitive, sep):
yield _join_paths([dirname, name], sep=sep), dir_groups + groups

def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden):
def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden,
norm_paths, case_sensitive, sep):
"""Apply ``pattern`` (contains no path elements) to the
literal directory in ``dirname``.
Expand All @@ -145,7 +154,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden):
if self.isdir(dirname):
return [(pattern, ())]
else:
if self.exists(os.path.join(dirname, pattern)):
if self.exists(_join_paths([dirname, pattern], sep=sep)):
return [(pattern, ())]
return []

Expand All @@ -158,8 +167,8 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden):
# an empty string as opposed to '.', we spare ourselves
# having to deal with os.path.normpath() later.
names = [''] if globstar_with_root else []
for top, entries in self.walk(dirname):
_mkabs = lambda s: os.path.join(top[len(dirname)+1:], s)
for top, entries in self.walk(dirname, sep=sep):
_mkabs = lambda s: _join_paths([top[len(dirname) + 1:], s], sep=sep)
names.extend(map(_mkabs, entries))
# Reset pattern so that fnmatch(), which does not understand
# ** specifically, will only return a single group match.
Expand All @@ -174,7 +183,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden):
# that the empty string we may have added earlier remains.
# Do not filter out the '' that we might have added earlier
names = filter(lambda x: not x or not _ishidden(x), names)
return fnmatch.filter(names, pattern)
return fnmatch.filter(names, pattern, norm_paths, case_sensitive, sep)


default_globber = Globber()
Expand All @@ -186,12 +195,22 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden):
magic_check = re.compile('[*?[]')
magic_check_bytes = re.compile(b'[*?[]')


def has_magic(s):
if isinstance(s, bytes):
match = magic_check_bytes.search(s)
else:
match = magic_check.search(s)
return match is not None


def _ishidden(path):
return path[0] in ('.', b'.'[0])


def _join_paths(paths, sep=None):
path = join(*paths)
if sep:
path = re.sub(r'\/', sep, path) # cached internally
return path

0 comments on commit 1dd8f3a

Please sign in to comment.