-
Notifications
You must be signed in to change notification settings - Fork 0
/
smartpath.py
265 lines (242 loc) · 11.4 KB
/
smartpath.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
## {{{ http://code.activestate.com/recipes/577230/ (r4)
def _should_include_path(path, includes, excludes):
"""Return True iff the given path should be included."""
from os.path import basename
from fnmatch import fnmatch
base = basename(path)
if includes:
for include in includes:
if fnmatch(base, include):
try:
log.debug("include `%s' (matches `%s')", path, include)
except (NameError, AttributeError):
pass
break
else:
try:
log.debug("exclude `%s' (matches no includes)", path)
except (NameError, AttributeError):
pass
return False
for exclude in excludes:
if fnmatch(base, exclude):
try:
log.debug("exclude `%s' (matches `%s')", path, exclude)
except (NameError, AttributeError):
pass
return False
return True
def _walk(top, topdown=True, onerror=None, follow_links=False):
"""A version of `os.walk()` with a couple differences regarding symlinks.
1. follow_links=False (the default): A symlink to a dir is
returned as a *non*-dir. In `os.walk()`, a symlink to a dir is
returned in the *dirs* list, but it is not recursed into.
2. follow_links=True: A symlink to a dir is returned in the
*dirs* list (as with `os.walk()`) but it *is conditionally*
recursed into (unlike `os.walk()`).
A symlinked dir is only recursed into if it is to a deeper dir
within the same tree. This is my understanding of how `find -L
DIR` works.
"""
import os
from os.path import join, isdir, islink, abspath
# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.path.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
names = os.listdir(top)
except OSError, err:
if onerror is not None:
onerror(err)
return
dirs, nondirs = [], []
if follow_links:
for name in names:
if isdir(join(top, name)):
dirs.append(name)
else:
nondirs.append(name)
else:
for name in names:
path = join(top, name)
if islink(path):
nondirs.append(name)
elif isdir(path):
dirs.append(name)
else:
nondirs.append(name)
if topdown:
yield top, dirs, nondirs
for name in dirs:
path = join(top, name)
if follow_links and islink(path):
# Only walk this path if it links deeper in the same tree.
top_abs = abspath(top)
link_abs = abspath(join(top, os.readlink(path)))
if not link_abs.startswith(top_abs + os.sep):
continue
if '.gvfs' in name: # FIXME
continue
for x in _walk(path, topdown, onerror, follow_links=follow_links):
yield x
if not topdown:
yield top, dirs, nondirs
_NOT_SPECIFIED = ("NOT", "SPECIFIED")
def _paths_from_path_patterns(path_patterns, files=True, dirs="never",
recursive=True, includes=None, excludes=None,
skip_dupe_dirs=False,
follow_links=False,
on_error=_NOT_SPECIFIED):
"""_paths_from_path_patterns([<path-patterns>, ...]) -> file paths
Generate a list of paths (files and/or dirs) represented by the given path
patterns.
"path_patterns" is a list of paths optionally using the '*', '?' and
'[seq]' glob patterns.
"files" is boolean (default True) indicating if file paths
should be yielded
"dirs" is string indicating under what conditions dirs are
yielded. It must be one of:
never (default) never yield dirs
always yield all dirs matching given patterns
if-not-recursive only yield dirs for invocations when
recursive=False
See use cases below for more details.
"recursive" is boolean (default True) indicating if paths should
be recursively yielded under given dirs.
"includes" is a list of file patterns to include in recursive
searches.
"excludes" is a list of file and dir patterns to exclude.
(Note: This is slightly different than GNU grep's --exclude
option which only excludes *files*. I.e. you cannot exclude
a ".svn" dir.)
"skip_dupe_dirs" can be set True to watch for and skip
descending into a dir that has already been yielded. Note
that this currently does not dereference symlinks.
"follow_links" is a boolean indicating whether to follow
symlinks (default False). To guard against infinite loops
with circular dir symlinks, only dir symlinks to *deeper*
dirs are followed.
"on_error" is an error callback called when a given path pattern
matches nothing:
on_error(PATH_PATTERN)
If not specified, the default is look for a "log" global and
call:
log.error("`%s': No such file or directory")
Specify None to do nothing.
Typically this is useful for a command-line tool that takes a list
of paths as arguments. (For Unix-heads: the shell on Windows does
NOT expand glob chars, that is left to the app.)
Use case #1: like `grep -r`
{files=True, dirs='never', recursive=(if '-r' in opts)}
script FILE # yield FILE, else call on_error(FILE)
script DIR # yield nothing
script PATH* # yield all files matching PATH*; if none,
# call on_error(PATH*) callback
script -r DIR # yield files (not dirs) recursively under DIR
script -r PATH* # yield files matching PATH* and files recursively
# under dirs matching PATH*; if none, call
# on_error(PATH*) callback
Use case #2: like `file -r` (if it had a recursive option)
{files=True, dirs='if-not-recursive', recursive=(if '-r' in opts)}
script FILE # yield FILE, else call on_error(FILE)
script DIR # yield DIR, else call on_error(DIR)
script PATH* # yield all files and dirs matching PATH*; if none,
# call on_error(PATH*) callback
script -r DIR # yield files (not dirs) recursively under DIR
script -r PATH* # yield files matching PATH* and files recursively
# under dirs matching PATH*; if none, call
# on_error(PATH*) callback
Use case #3: kind of like `find .`
{files=True, dirs='always', recursive=(if '-r' in opts)}
script FILE # yield FILE, else call on_error(FILE)
script DIR # yield DIR, else call on_error(DIR)
script PATH* # yield all files and dirs matching PATH*; if none,
# call on_error(PATH*) callback
script -r DIR # yield files and dirs recursively under DIR
# (including DIR)
script -r PATH* # yield files and dirs matching PATH* and recursively
# under dirs; if none, call on_error(PATH*)
# callback
"""
from os.path import basename, exists, isdir, join, normpath, abspath, \
lexists, islink, realpath
from glob import glob
assert not isinstance(path_patterns, basestring), \
"'path_patterns' must be a sequence, not a string: %r" % path_patterns
if includes is None: includes = []
if excludes is None: excludes = []
GLOB_CHARS = '*?['
if skip_dupe_dirs:
searched_dirs = set()
for path_pattern in path_patterns:
# Determine the set of paths matching this path_pattern.
for glob_char in GLOB_CHARS:
if glob_char in path_pattern:
paths = glob(path_pattern)
break
else:
if follow_links:
paths = exists(path_pattern) and [path_pattern] or []
else:
paths = lexists(path_pattern) and [path_pattern] or []
if not paths:
if on_error is None:
pass
elif on_error is _NOT_SPECIFIED:
try:
log.error("`%s': No such file or directory", path_pattern)
except (NameError, AttributeError):
pass
else:
on_error(path_pattern)
for path in paths:
if (follow_links or not islink(path)) and isdir(path):
if skip_dupe_dirs:
canon_path = normpath(abspath(path))
if follow_links:
canon_path = realpath(canon_path)
if canon_path in searched_dirs:
continue
else:
searched_dirs.add(canon_path)
# 'includes' SHOULD affect whether a dir is yielded.
if (dirs == "always"
or (dirs == "if-not-recursive" and not recursive)
) and _should_include_path(path, includes, excludes):
yield path
# However, if recursive, 'includes' should NOT affect
# whether a dir is recursed into. Otherwise you could
# not:
# script -r --include="*.py" DIR
if recursive and _should_include_path(path, [], excludes):
for dirpath, dirnames, filenames in _walk(path,
follow_links=follow_links):
dir_indeces_to_remove = []
for i, dirname in enumerate(dirnames):
d = join(dirpath, dirname)
if skip_dupe_dirs:
canon_d = normpath(abspath(d))
if follow_links:
canon_d = realpath(canon_d)
if canon_d in searched_dirs:
dir_indeces_to_remove.append(i)
continue
else:
searched_dirs.add(canon_d)
if dirs == "always" \
and _should_include_path(d, includes, excludes):
yield d
if not _should_include_path(d, [], excludes):
dir_indeces_to_remove.append(i)
for i in reversed(dir_indeces_to_remove):
del dirnames[i]
if files:
for filename in sorted(filenames):
f = join(dirpath, filename)
if _should_include_path(f, includes, excludes):
yield f
elif files and _should_include_path(path, includes, excludes):
yield path
## end of http://code.activestate.com/recipes/577230/ }}}