diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index 5183eedc8e5..6cc8f4c45ef 100755 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -3,6 +3,7 @@ Create a distribution's .egg-info directory and contents""" from distutils.filelist import FileList as _FileList +from distutils.errors import DistutilsInternalError from distutils.util import convert_path from distutils import log import distutils.errors @@ -27,6 +28,7 @@ parse_requirements, safe_name, parse_version, safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) import setuptools.unicode_utils as unicode_utils +from setuptools.glob import glob from pkg_resources.extern import packaging @@ -36,6 +38,88 @@ pass +def translate_pattern(glob): + """ + Translate a file path glob like '*.txt' in to a regular expression. + This differs from fnmatch.translate which allows wildcards to match + directory separators. It also knows about '**/' which matches any number of + directories. + """ + pat = '' + + # This will split on '/' within [character classes]. This is deliberate. + chunks = glob.split(os.path.sep) + + sep = re.escape(os.sep) + valid_char = '[^%s]' % (sep,) + + for c, chunk in enumerate(chunks): + last_chunk = c == len(chunks) - 1 + + # Chunks that are a literal ** are globstars. They match anything. + if chunk == '**': + if last_chunk: + # Match anything if this is the last component + pat += '.*' + else: + # Match '(name/)*' + pat += '(?:%s+%s)*' % (valid_char, sep) + continue # Break here as the whole path component has been handled + + # Find any special characters in the remainder + i = 0 + chunk_len = len(chunk) + while i < chunk_len: + char = chunk[i] + if char == '*': + # Match any number of name characters + pat += valid_char + '*' + elif char == '?': + # Match a name character + pat += valid_char + elif char == '[': + # Character class + inner_i = i + 1 + # Skip initial !/] chars + if inner_i < chunk_len and chunk[inner_i] == '!': + inner_i = inner_i + 1 + if inner_i < chunk_len and chunk[inner_i] == ']': + inner_i = inner_i + 1 + + # Loop till the closing ] is found + while inner_i < chunk_len and chunk[inner_i] != ']': + inner_i = inner_i + 1 + + if inner_i >= chunk_len: + # Got to the end of the string without finding a closing ] + # Do not treat this as a matching group, but as a literal [ + pat += re.escape(char) + else: + # Grab the insides of the [brackets] + inner = chunk[i + 1:inner_i] + char_class = '' + + # Class negation + if inner[0] == '!': + char_class = '^' + inner = inner[1:] + + char_class += re.escape(inner) + pat += '[%s]' % (char_class,) + + # Skip to the end ] + i = inner_i + else: + pat += re.escape(char) + i += 1 + + # Join each chunk with the dir separator + if not last_chunk: + pat += sep + + return re.compile(pat + r'\Z(?ms)') + + class egg_info(Command): description = "create a distribution's .egg-info directory" @@ -239,7 +323,151 @@ def check_broken_egg_info(self): class FileList(_FileList): - """File list that accepts only existing, platform-independent paths""" + # Implementations of the various MANIFEST.in commands + + def process_template_line(self, line): + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dir_pattern). + (action, patterns, dir, dir_pattern) = self._parse_template_line(line) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + self.debug_print("include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include(pattern): + log.warn("warning: no files found matching '%s'", pattern) + + elif action == 'exclude': + self.debug_print("exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude(pattern): + log.warn(("warning: no previously-included files " + "found matching '%s'"), pattern) + + elif action == 'global-include': + self.debug_print("global-include " + ' '.join(patterns)) + for pattern in patterns: + if not self.global_include(pattern): + log.warn(("warning: no files found matching '%s' " + "anywhere in distribution"), pattern) + + elif action == 'global-exclude': + self.debug_print("global-exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.global_exclude(pattern): + log.warn(("warning: no previously-included files matching " + "'%s' found anywhere in distribution"), + pattern) + + elif action == 'recursive-include': + self.debug_print("recursive-include %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.recursive_include(dir, pattern): + log.warn(("warning: no files found matching '%s' " + "under directory '%s'"), + pattern, dir) + + elif action == 'recursive-exclude': + self.debug_print("recursive-exclude %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.recursive_exclude(dir, pattern): + log.warn(("warning: no previously-included files matching " + "'%s' found under directory '%s'"), + pattern, dir) + + elif action == 'graft': + self.debug_print("graft " + dir_pattern) + if not self.graft(dir_pattern): + log.warn("warning: no directories found matching '%s'", + dir_pattern) + + elif action == 'prune': + self.debug_print("prune " + dir_pattern) + if not self.prune(dir_pattern): + log.warn(("no previously-included directories found " + "matching '%s'"), dir_pattern) + + else: + raise DistutilsInternalError( + "this cannot happen: invalid action '%s'" % action) + + def _remove_files(self, predicate): + """ + Remove all files from the file list that match the predicate. + Return True if any matching files were removed + """ + found = False + for i in range(len(self.files) - 1, -1, -1): + if predicate(self.files[i]): + self.debug_print(" removing " + self.files[i]) + del self.files[i] + found = True + return found + + def include(self, pattern): + """Include files that match 'pattern'.""" + found = [f for f in glob(pattern) if not os.path.isdir(f)] + self.extend(found) + return bool(found) + + def exclude(self, pattern): + """Exclude files that match 'pattern'.""" + match = translate_pattern(pattern) + return self._remove_files(match.match) + + def recursive_include(self, dir, pattern): + """ + Include all files anywhere in 'dir/' that match the pattern. + """ + full_pattern = os.path.join(dir, '**', pattern) + found = [f for f in glob(full_pattern, recursive=True) + if not os.path.isdir(f)] + self.extend(found) + return bool(found) + + def recursive_exclude(self, dir, pattern): + """ + Exclude any file anywhere in 'dir/' that match the pattern. + """ + match = translate_pattern(os.path.join(dir, '**', pattern)) + return self._remove_files(match.match) + + def graft(self, dir): + """Include all files from 'dir/'.""" + found = distutils.filelist.findall(dir) + self.extend(found) + return bool(found) + + def prune(self, dir): + """Filter out files from 'dir/'.""" + match = translate_pattern(os.path.join(dir, '**')) + return self._remove_files(match.match) + + def global_include(self, pattern): + """ + Include all files anywhere in the current directory that match the + pattern. This is very inefficient on large file trees. + """ + if self.allfiles is None: + self.findall() + match = translate_pattern(os.path.join('**', pattern)) + found = [f for f in self.allfiles if match.match(f)] + self.extend(found) + return bool(found) + + def global_exclude(self, pattern): + """ + Exclude all files anywhere that match the pattern. + """ + match = translate_pattern(os.path.join('**', pattern)) + return self._remove_files(match.match) def append(self, item): if item.endswith('\r'): # Fix older sdists built on Windows @@ -302,7 +530,6 @@ def run(self): self.filelist = FileList() if not os.path.exists(self.manifest): self.write_manifest() # it must exist so it'll get in the list - self.filelist.findall() self.add_defaults() if os.path.exists(self.template): self.read_template() @@ -341,38 +568,13 @@ def add_defaults(self): elif os.path.exists(self.manifest): self.read_manifest() ei_cmd = self.get_finalized_command('egg_info') - self._add_egg_info(cmd=ei_cmd) - self.filelist.include_pattern("*", prefix=ei_cmd.egg_info) - - def _add_egg_info(self, cmd): - """ - Add paths for egg-info files for an external egg-base. - - The egg-info files are written to egg-base. If egg-base is - outside the current working directory, this method - searchs the egg-base directory for files to include - in the manifest. Uses distutils.filelist.findall (which is - really the version monkeypatched in by setuptools/__init__.py) - to perform the search. - - Since findall records relative paths, prefix the returned - paths with cmd.egg_base, so add_default's include_pattern call - (which is looking for the absolute cmd.egg_info) will match - them. - """ - if cmd.egg_base == os.curdir: - # egg-info files were already added by something else - return - - discovered = distutils.filelist.findall(cmd.egg_base) - resolved = (os.path.join(cmd.egg_base, path) for path in discovered) - self.filelist.allfiles.extend(resolved) + self.filelist.graft(ei_cmd.egg_info) def prune_file_list(self): build = self.get_finalized_command('build') base_dir = self.distribution.get_fullname() - self.filelist.exclude_pattern(None, prefix=build.build_base) - self.filelist.exclude_pattern(None, prefix=base_dir) + self.filelist.prune(build.build_base) + self.filelist.prune(base_dir) sep = re.escape(os.sep) self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1) diff --git a/setuptools/glob.py b/setuptools/glob.py new file mode 100644 index 00000000000..f51b9c83639 --- /dev/null +++ b/setuptools/glob.py @@ -0,0 +1,165 @@ +""" +Filename globbing utility. Mostly a copy of `glob` from Python 3.5. + +Changes include: + * `yield from` and PEP3102 `*` removed. + * `bytes` changed to `six.binary_type`. + * Hidden files are not ignored. +""" + +import os +import re +import fnmatch +from setuptools.extern.six import binary_type + +__all__ = ["glob", "iglob", "escape"] + +def glob(pathname, recursive=False): + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + return list(iglob(pathname, recursive=recursive)) + +def iglob(pathname, recursive=False): + """Return an iterator which yields the paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + it = _iglob(pathname, recursive) + if recursive and _isrecursive(pathname): + s = next(it) # skip empty string + assert not s + return it + +def _iglob(pathname, recursive): + dirname, basename = os.path.split(pathname) + if not has_magic(pathname): + if basename: + if os.path.lexists(pathname): + yield pathname + else: + # Patterns ending with a slash should match only directories + if os.path.isdir(dirname): + yield pathname + return + if not dirname: + if recursive and _isrecursive(basename): + for x in glob2(dirname, basename): + yield x + else: + for x in glob1(dirname, basename): + yield x + return + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): + dirs = _iglob(dirname, recursive) + else: + dirs = [dirname] + if has_magic(basename): + if recursive and _isrecursive(basename): + glob_in_dir = glob2 + else: + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# takes a literal basename (so it only has to check for its existence). + +def glob1(dirname, pattern): + if not dirname: + if isinstance(pattern, binary_type): + dirname = os.curdir.encode('ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except OSError: + return [] + return fnmatch.filter(names, pattern) + +def glob0(dirname, basename): + if not basename: + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if os.path.isdir(dirname): + return [basename] + else: + if os.path.lexists(os.path.join(dirname, basename)): + return [basename] + return [] + +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def glob2(dirname, pattern): + assert _isrecursive(pattern) + yield pattern[:0] + for x in _rlistdir(dirname): + yield x + + +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname): + if not dirname: + if isinstance(dirname, binary_type): + dirname = binary_type(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return + for x in names: + yield x + path = os.path.join(dirname, x) if dirname else x + for y in _rlistdir(path): + yield os.path.join(x, y) + + +magic_check = re.compile('([*?[])') +magic_check_bytes = re.compile(b'([*?[])') + +def has_magic(s): + if isinstance(s, binary_type): + match = magic_check_bytes.search(s) + else: + match = magic_check.search(s) + return match is not None + +def _isrecursive(pattern): + if isinstance(pattern, binary_type): + return pattern == b'**' + else: + return pattern == '**' + +def escape(pathname): + """Escape all special characters. + """ + # Escaping is done by wrapping any of "*?[" between square brackets. + # Metacharacters do not work in the drive part and shouldn't be escaped. + drive, pathname = os.path.splitdrive(pathname) + if isinstance(pathname, binary_type): + pathname = magic_check_bytes.sub(br'[\1]', pathname) + else: + pathname = magic_check.sub(r'[\1]', pathname) + return drive + pathname diff --git a/setuptools/tests/test_manifest.py b/setuptools/tests/test_manifest.py index 6360270d5b8..558de2c7752 100644 --- a/setuptools/tests/test_manifest.py +++ b/setuptools/tests/test_manifest.py @@ -9,7 +9,7 @@ from distutils import log from distutils.errors import DistutilsTemplateError -from setuptools.command.egg_info import FileList, egg_info +from setuptools.command.egg_info import FileList, egg_info, translate_pattern from setuptools.dist import Distribution from setuptools.extern import six from setuptools.tests.textwrap import DALS @@ -66,6 +66,34 @@ def touch(filename): ])) +def get_pattern(glob): + return translate_pattern(make_local_path(glob)).pattern + + +def test_translated_pattern_test(): + l = make_local_path + assert get_pattern('foo') == r'foo\Z(?ms)' + assert get_pattern(l('foo/bar')) == l(r'foo\/bar\Z(?ms)') + + # Glob matching + assert get_pattern('*.txt') == l(r'[^\/]*\.txt\Z(?ms)') + assert get_pattern('dir/*.txt') == l(r'dir\/[^\/]*\.txt\Z(?ms)') + assert get_pattern('*/*.py') == l(r'[^\/]*\/[^\/]*\.py\Z(?ms)') + assert get_pattern('docs/page-?.txt') \ + == l(r'docs\/page\-[^\/]\.txt\Z(?ms)') + + # Globstars change what they mean depending upon where they are + assert get_pattern(l('foo/**/bar')) == l(r'foo\/(?:[^\/]+\/)*bar\Z(?ms)') + assert get_pattern(l('foo/**')) == l(r'foo\/.*\Z(?ms)') + assert get_pattern(l('**')) == r'.*\Z(?ms)' + + # Character classes + assert get_pattern('pre[one]post') == r'pre[one]post\Z(?ms)' + assert get_pattern('hello[!one]world') == r'hello[^one]world\Z(?ms)' + assert get_pattern('[]one].txt') == r'[\]one]\.txt\Z(?ms)' + assert get_pattern('foo[!]one]bar') == r'foo[^\]one]bar\Z(?ms)' + + class TempDirTestCase(object): def setup_method(self, method): @@ -346,23 +374,21 @@ def test_exclude_pattern(self): def test_include_pattern(self): # return False if no match file_list = FileList() - file_list.set_allfiles([]) + self.make_files([]) assert not file_list.include_pattern('*.py') # return True if files match file_list = FileList() - file_list.set_allfiles(['a.py', 'b.txt']) + self.make_files(['a.py', 'b.txt']) assert file_list.include_pattern('*.py') # test * matches all files file_list = FileList() - assert file_list.allfiles is None - file_list.set_allfiles(['a.py', 'b.txt']) + self.make_files(['a.py', 'b.txt']) file_list.include_pattern('*') - assert file_list.allfiles == ['a.py', 'b.txt'] + assert file_list.files == ['a.py', 'b.txt'] - def test_process_template(self): - l = make_local_path + def test_process_template_line_invalid(self): # invalid lines file_list = FileList() for action in ('include', 'exclude', 'global-include', @@ -377,9 +403,11 @@ def test_process_template(self): else: assert False, "Should have thrown an error" + def test_include(self): + l = make_local_path # include file_list = FileList() - file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')]) + self.make_files(['a.py', 'b.txt', l('d/c.py')]) file_list.process_template_line('include *.py') assert file_list.files == ['a.py'] @@ -389,6 +417,8 @@ def test_process_template(self): assert file_list.files == ['a.py'] self.assertWarnings() + def test_exclude(self): + l = make_local_path # exclude file_list = FileList() file_list.files = ['a.py', 'b.txt', l('d/c.py')] @@ -401,9 +431,11 @@ def test_process_template(self): assert file_list.files == ['b.txt', l('d/c.py')] self.assertWarnings() + def test_global_include(self): + l = make_local_path # global-include file_list = FileList() - file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')]) + self.make_files(['a.py', 'b.txt', l('d/c.py')]) file_list.process_template_line('global-include *.py') assert file_list.files == ['a.py', l('d/c.py')] @@ -413,6 +445,8 @@ def test_process_template(self): assert file_list.files == ['a.py', l('d/c.py')] self.assertWarnings() + def test_global_exclude(self): + l = make_local_path # global-exclude file_list = FileList() file_list.files = ['a.py', 'b.txt', l('d/c.py')] @@ -425,10 +459,11 @@ def test_process_template(self): assert file_list.files == ['b.txt'] self.assertWarnings() + def test_recursive_include(self): + l = make_local_path # recursive-include file_list = FileList() - file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'), - l('d/d/e.py')]) + self.make_files(['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]) file_list.process_template_line('recursive-include d *.py') assert file_list.files == [l('d/b.py'), l('d/d/e.py')] @@ -438,6 +473,8 @@ def test_process_template(self): assert file_list.files == [l('d/b.py'), l('d/d/e.py')] self.assertWarnings() + def test_recursive_exclude(self): + l = make_local_path # recursive-exclude file_list = FileList() file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')] @@ -450,10 +487,11 @@ def test_process_template(self): assert file_list.files == ['a.py', l('d/c.txt')] self.assertWarnings() + def test_graft(self): + l = make_local_path # graft file_list = FileList() - file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'), - l('f/f.py')]) + self.make_files(['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]) file_list.process_template_line('graft d') assert file_list.files == [l('d/b.py'), l('d/d/e.py')] @@ -463,6 +501,8 @@ def test_process_template(self): assert file_list.files == [l('d/b.py'), l('d/d/e.py')] self.assertWarnings() + def test_prune(self): + l = make_local_path # prune file_list = FileList() file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]