diff --git a/rpaths.py b/rpaths.py index cd0c92f..f6862eb 100644 --- a/rpaths.py +++ b/rpaths.py @@ -585,19 +585,22 @@ def listdir(self, pattern=None): pass elif callable(pattern): files = filter(pattern, files) - elif isinstance(pattern, backend_types): - if isinstance(pattern, bytes): - pattern = pattern.decode(self._encoding, 'replace') - start, full_re, int_re = pattern2re(pattern) + else: + if isinstance(pattern, backend_types): + if isinstance(pattern, bytes): + pattern = pattern.decode(self._encoding, 'replace') + start, full_re, _int_re = pattern2re(pattern) + elif isinstance(pattern, Pattern): + start, full_re = pattern.start_dir, pattern.full_regex + else: + raise TypeError("listdir() expects pattern to be a callable, " + "a regular expression or a string pattern, " + "got %r" % type(pattern)) # If pattern contains slashes (other than first and last chars), # listdir() will never match anything if start: return [] files = [f for f in files if full_re.search(f.unicodename)] - else: - raise TypeError("listdir() expects pattern to be a callable, " - "a regular expression or a string pattern, " - "got %r" % type(pattern)) return files def recursedir(self, pattern=None, top_down=True): @@ -627,10 +630,18 @@ def recursedir(self, pattern=None, top_down=True): pattern = lambda p: True elif callable(pattern): pass - elif isinstance(pattern, backend_types): - if isinstance(pattern, bytes): - pattern = pattern.decode(self._encoding, 'replace') - start, full_re, int_re = pattern2re(pattern) + else: + if isinstance(pattern, backend_types): + if isinstance(pattern, bytes): + pattern = pattern.decode(self._encoding, 'replace') + start, full_re, int_re = pattern2re(pattern) + elif isinstance(pattern, Pattern): + start, full_re, int_re = \ + pattern.start_dir, pattern.full_regex, pattern.int_regex + else: + raise TypeError("recursedir() expects pattern to be a " + "callable, a regular expression or a string " + "pattern, got %r" % type(pattern)) if self._lib.sep != '/': pattern = lambda p: full_re.search( unicode(p).replace(self._lib.sep, '/')) @@ -641,10 +652,6 @@ def recursedir(self, pattern=None, top_down=True): pattern = lambda p: full_re.search(unicode(p)) if int_re is not None: int_pattern = lambda p: int_re.search(unicode(p)) - else: - raise TypeError("recursedir() expects pattern to be a callable, " - "a regular expression or a string pattern, got " - "%r" % type(pattern)) if not start: path = self else: @@ -966,6 +973,74 @@ def rewrite(self, mode='r', name=None, temp=None, tempext='~', **kwargs): pathw.rename(pathr) +class Pattern(object): + """A pattern that paths can be matched against. + + You can check if a filename matches this pattern by using `matches()`, or + pass it to the `Path.listdir` and `Path.recursedir` methods. + + `may_contain_matches()` is a special method which you can feed directories + to; if it returns False, no path under that one will match the pattern. + + >>> pattern = Pattern('/usr/l*/**.so') + >>> pattern.matches('/usr/local/irc/mod_user.so') + True + >>> pattern.matches('/usr/bin/thing.so') + False + >>> pattern.may_contain_matches('/usr') + True + >>> pattern.may_contain_matches('/usr/lib') + True + >>> pattern.may_contain_matches('/usr/bin') + False + """ + def __init__(self, pattern): + if isinstance(pattern, bytes): + pattern = pattern.decode(sys.getfilesystemencoding()) + self.start_dir, self.full_regex, self.int_regex = pattern2re(pattern) + + @staticmethod + def _prepare_path(path): + # Here we want to force the use of replacement characters. + # The __unicode__ implementation might use 'surrogateescape' + replace = False + if isinstance(path, AbstractPath): + replace = path._lib.sep if path._lib.sep != '/' else None + path = path.path + else: + replace = Path._lib.sep if Path._lib.sep != '/' else None + if isinstance(path, bytes): + path = path.decode(sys.getfilesystemencoding(), 'replace') + elif not isinstance(path, unicode): + raise TypeError("Expected a path, got %r" % type(path)) + + if path.startswith('/'): + path = path[1:] + + if replace is not None: + path = path.replace(replace, '/') + + return path + + def matches(self, path): + """Tests if the given path matches the pattern. + + Note that the unicode translation of the patch is matched, so + replacement characters might have been added. + """ + path = self._prepare_path(path) + return self.full_regex.search(path) is not None + + def may_contain_matches(self, path): + """Tests whether it's possible for paths under the given one to match. + + If this method returns None, no path under the given one will match the + pattern. + """ + path = self._prepare_path(path) + return self.int_regex.search(path) is not None + + no_special_chars = re.compile(r'^(?:[^\\*?\[\]]|\\.)*$') @@ -1034,7 +1109,7 @@ def pattern2re(pattern): return '', re.compile(''), None elif '/' in pattern: full_regex = '^' # Start at beginning of path - int_regex = ['^'] + int_regex = [] int_regex_done = False start_dir = [] start_dir_done = False diff --git a/tests/test_concrete.py b/tests/test_concrete.py index 5b4f7bf..b261d3d 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -8,7 +8,7 @@ import unittest from rpaths import unicode, dict_union, Path, PosixPath, WindowsPath, \ - pattern2re + Pattern, pattern2re windows_only = unittest.skipUnless(issubclass(Path, WindowsPath), @@ -152,28 +152,38 @@ def test_recursedir(self): [b'file', b'r\xC3\xA9pertoire', b'r\xC3\xA9pertoire/file'])) - self.compare_paths(self.tmp, self.tmp.recursedir('/file'), + self.compare_paths(self.tmp, self.tmp.recursedir(Pattern('/file')), (['file'], [b'file'])) self.compare_paths(self.tmp, self.tmp.recursedir('/r\xE9pertoire/file'), (['r\xE9pertoire\\file'], [b'r\xC3\xA9pertoire/file'])) + self.compare_paths(self.tmp, + self.tmp.recursedir(Pattern('/r\xE9pertoire/file')), + (['r\xE9pertoire\\file'], + [b'r\xC3\xA9pertoire/file'])) class TestPattern2Re(unittest.TestCase): """Tests the pattern2re() function, used to recognize extended patterns. """ - def do_test_pattern(self, pattern, start, tests): + def do_test_pattern(self, pattern, start, tests, interm=False): s, fr, ir = pattern2re(pattern) error = '' if s != start: error += "\n%r didn't start at %r (but %r)" % (pattern, start, s) + if interm: + r = ir + suffix = " (interm=True)" + else: + r = fr + suffix = "" for path, expected in tests: - passed = fr.search(path) + passed = r.search(path) if passed and not expected: - error += "\n%r matched %r" % (pattern, path) + error += "\n%r matched %r%s" % (pattern, path, suffix) elif not passed and expected: - error += "\n%r didn't match %r" % (pattern, path) + error += "\n%r didn't match %r%s" % (pattern, path, suffix) if error: self.fail(error) @@ -281,6 +291,40 @@ def test_classes(self): ('someb]file', False), ('somebfile', False)]) + def test_iterm(self): + """Tests the int_regex return value.""" + self.do_test_pattern( + r'/usr/path/*.txt', + 'usr/path', + [('usr', True), + ('usr/path', True), + ('usr/lib', False)], + interm=True) + + def test_pattern(self): + """Tests the high-level Pattern class.""" + for pattern in ('/usr/l*/**/*.txt', b'/usr/l*/**/*.txt'): + pattern = Pattern(pattern) + self.assertTrue(pattern.matches('/usr/lib/irc/test.txt')) + self.assertTrue(pattern.matches(b'/usr/local/lib/test.txt')) + self.assertFalse(pattern.matches('/usr/bin/test.txt')) + self.assertTrue(pattern.may_contain_matches('/usr/lib')) + self.assertTrue(pattern.may_contain_matches('/usr')) + self.assertFalse(pattern.may_contain_matches(b'/usr/bin')) + + self.assertTrue(pattern.matches('usr/lib/irc/test.txt')) + self.assertFalse(pattern.matches('smthg/usr/lib/irc/test.txt')) + self.assertTrue(pattern.may_contain_matches('usr/lib')) + self.assertTrue(pattern.may_contain_matches('usr')) + + self.assertTrue(pattern.matches(WindowsPath( + 'usr\\localuser\\Binaries\\readme.txt'))) + self.assertFalse(pattern.matches(WindowsPath( + 'usr\\otheruser\\Binaries\\readme.txt'))) + + self.assertEqual(pattern.matches('usr\\lib\\thing\\readme.txt'), + issubclass(Path, WindowsPath)) + class TestDictUnion(unittest.TestCase): def test_union(self):