Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 503 lines (445 sloc) 20.248 kB
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
1 ## Create and compare lists of files/objects
2 ## Author: Michal Ludvig <michal@logix.cz>
3 ## http://www.logix.cz/michal
4 ## License: GPL Version 2
5
6 from S3 import S3
7 from Config import Config
8 from S3Uri import S3Uri
9 from SortedDict import SortedDict
10 from Utils import *
16d5faf @mludvig Import S3.Exceptions.ParameterError
mludvig authored
11 from Exceptions import ParameterError
488c956 @mdomsch add local tree MD5 caching
authored
12 from HashCache import HashCache
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
13
14 from logging import debug, info, warning, error
15
16 import os
17 import glob
d609665 @mdomsch Apply excludes/includes at local os.walk() time
authored
18 import copy
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
19
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
20 __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
21
22 def _fswalk_follow_symlinks(path):
23 '''
24 Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
25
26 If a recursive directory link is detected, emit a warning and skip.
27 '''
28 assert os.path.isdir(path) # only designed for directory argument
22c60c1 @jbraeuer Remove recursion detection for symlinks.
jbraeuer authored
29 walkdirs = [path]
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
30 for dirpath, dirnames, filenames in os.walk(path):
d192937 @mdomsch fix os.walk() exclusions for new upstream code
authored
31 handle_exclude_include_walk(dirpath, dirnames, [])
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
32 for dirname in dirnames:
33 current = os.path.join(dirpath, dirname)
34 if os.path.islink(current):
22c60c1 @jbraeuer Remove recursion detection for symlinks.
jbraeuer authored
35 walkdirs.append(current)
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
36 for walkdir in walkdirs:
d192937 @mdomsch fix os.walk() exclusions for new upstream code
authored
37 for dirpath, dirnames, filenames in os.walk(walkdir):
38 handle_exclude_include_walk(dirpath, dirnames, [])
39 yield (dirpath, dirnames, filenames)
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
40
41 def _fswalk(path, follow_symlinks):
42 '''
43 Directory tree generator
44
45 path (str) is the root of the directory tree to walk
46
47 follow_symlinks (bool) indicates whether to descend into symbolically linked directories
48 '''
49 if follow_symlinks:
d192937 @mdomsch fix os.walk() exclusions for new upstream code
authored
50 yield _fswalk_follow_symlinks(path)
51 for dirpath, dirnames, filenames in os.walk(path):
52 handle_exclude_include_walk(dirpath, dirnames, filenames)
53 yield (dirpath, dirnames, filenames)
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
54
55 def filter_exclude_include(src_list):
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
56 info(u"Applying --exclude/--include")
57 cfg = Config()
58 exclude_list = SortedDict(ignore_case = False)
59 for file in src_list.keys():
60 debug(u"CHECK: %s" % file)
61 excluded = False
62 for r in cfg.exclude:
63 if r.search(file):
64 excluded = True
65 debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
66 break
67 if excluded:
68 ## No need to check for --include if not excluded
69 for r in cfg.include:
70 if r.search(file):
71 excluded = False
72 debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
73 break
74 if excluded:
75 ## Still excluded - ok, action it
76 debug(u"EXCLUDE: %s" % file)
77 exclude_list[file] = src_list[file]
78 del(src_list[file])
79 continue
80 else:
81 debug(u"PASS: %s" % (file))
82 return src_list, exclude_list
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
83
d609665 @mdomsch Apply excludes/includes at local os.walk() time
authored
84 def handle_exclude_include_walk(root, dirs, files):
85 cfg = Config()
86 copydirs = copy.copy(dirs)
87 copyfiles = copy.copy(files)
88
89 # exclude dir matches in the current directory
90 # this prevents us from recursing down trees we know we want to ignore
91 for x in copydirs:
92 d = os.path.join(root, x, '')
93 debug(u"CHECK: %s" % d)
94 excluded = False
95 for r in cfg.exclude:
96 if r.search(d):
97 excluded = True
98 debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
99 break
100 if excluded:
101 ## No need to check for --include if not excluded
102 for r in cfg.include:
103 if r.search(d):
104 excluded = False
105 debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
106 break
107 if excluded:
108 ## Still excluded - ok, action it
109 debug(u"EXCLUDE: %s" % d)
110 dirs.remove(x)
111 continue
112 else:
113 debug(u"PASS: %s" % (d))
114
115 # exclude file matches in the current directory
116 for x in copyfiles:
117 file = os.path.join(root, x)
118 debug(u"CHECK: %s" % file)
119 excluded = False
120 for r in cfg.exclude:
121 if r.search(file):
122 excluded = True
123 debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
124 break
125 if excluded:
126 ## No need to check for --include if not excluded
127 for r in cfg.include:
128 if r.search(file):
129 excluded = False
130 debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
131 break
132 if excluded:
133 ## Still excluded - ok, action it
134 debug(u"EXCLUDE: %s" % file)
135 files.remove(x)
136 continue
137 else:
138 debug(u"PASS: %s" % (file))
139
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
140 def fetch_local_list(args, recursive = None):
488c956 @mdomsch add local tree MD5 caching
authored
141 def _get_filelist_local(loc_list, local_uri, cache):
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
142 info(u"Compiling list of local files...")
490cca0 added the ability to upload files by reading from stdin
Eric Connell authored
143
144 if deunicodise(local_uri.basename()) == "-":
145 loc_list = SortedDict(ignore_case = False)
146 loc_list["-"] = {
147 'full_name_unicode' : '-',
148 'full_name' : '-',
149 'size' : -1,
150 'mtime' : -1,
151 }
152 return loc_list, True
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
153 if local_uri.isdir():
154 local_base = deunicodise(local_uri.basename())
155 local_path = deunicodise(local_uri.path())
156 filelist = _fswalk(local_path, cfg.follow_symlinks)
157 single_file = False
158 else:
159 local_base = ""
160 local_path = deunicodise(local_uri.dirname())
161 filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
162 single_file = True
163 for root, dirs, files in filelist:
164 rel_root = root.replace(local_path, local_base, 1)
165 for f in files:
166 full_name = os.path.join(root, f)
167 if not os.path.isfile(full_name):
168 continue
169 if os.path.islink(full_name):
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
170 if not cfg.follow_symlinks:
171 continue
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
172 relative_file = unicodise(os.path.join(rel_root, f))
173 if os.path.sep != "/":
174 # Convert non-unix dir separators to '/'
175 relative_file = "/".join(relative_file.split(os.path.sep))
176 if cfg.urlencoding_mode == "normal":
177 relative_file = replace_nonprintables(relative_file)
178 if relative_file.startswith('./'):
179 relative_file = relative_file[2:]
180 sr = os.stat_result(os.lstat(full_name))
181 loc_list[relative_file] = {
182 'full_name_unicode' : unicodise(full_name),
183 'full_name' : full_name,
184 'size' : sr.st_size,
185 'mtime' : sr.st_mtime,
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
186 'dev' : sr.st_dev,
187 'inode' : sr.st_ino,
188 'uid' : sr.st_uid,
189 'gid' : sr.st_gid,
190 'sr': sr # save it all, may need it in preserve_attrs_list
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
191 ## TODO: Possibly more to save here...
192 }
ddb5ef9 @mdomsch handle remote->local transfers with local hardlink/copy if possible
authored
193 if 'md5' in cfg.sync_checks:
488c956 @mdomsch add local tree MD5 caching
authored
194 md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
195 if md5 is None:
196 md5 = loc_list.get_md5(relative_file) # this does the file I/O
197 cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
ddb5ef9 @mdomsch handle remote->local transfers with local hardlink/copy if possible
authored
198 loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
199 return loc_list, single_file
200
488c956 @mdomsch add local tree MD5 caching
authored
201 def _maintain_cache(cache, local_list):
202 if cfg.cache_file:
203 cache.mark_all_for_purge()
204 for i in local_list.keys():
205 cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
206 cache.purge()
207 cache.save(cfg.cache_file)
208
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
209 cfg = Config()
488c956 @mdomsch add local tree MD5 caching
authored
210
211 cache = HashCache()
212 if cfg.cache_file:
213 try:
214 cache.load(cfg.cache_file)
215 except IOError:
216 info(u"No cache file found, creating it.")
217
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
218 local_uris = []
219 local_list = SortedDict(ignore_case = False)
220 single_file = False
221
222 if type(args) not in (list, tuple):
223 args = [args]
224
225 if recursive == None:
226 recursive = cfg.recursive
227
228 for arg in args:
229 uri = S3Uri(arg)
230 if not uri.type == 'file':
231 raise ParameterError("Expecting filename or directory instead of: %s" % arg)
232 if uri.isdir() and not recursive:
233 raise ParameterError("Use --recursive to upload a directory: %s" % arg)
234 local_uris.append(uri)
235
236 for uri in local_uris:
488c956 @mdomsch add local tree MD5 caching
authored
237 list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
238
239 ## Single file is True if and only if the user
240 ## specified one local URI and that URI represents
241 ## a FILE. Ie it is False if the URI was of a DIR
242 ## and that dir contained only one FILE. That's not
243 ## a case of single_file==True.
244 if len(local_list) > 1:
245 single_file = False
246
488c956 @mdomsch add local tree MD5 caching
authored
247 _maintain_cache(cache, local_list)
248
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
249 return local_list, single_file
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
250
251 def fetch_remote_list(args, require_attribs = False, recursive = None):
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
252 def _get_filelist_remote(remote_uri, recursive = True):
253 ## If remote_uri ends with '/' then all remote files will have
254 ## the remote_uri prefix removed in the relative path.
255 ## If, on the other hand, the remote_uri ends with something else
256 ## (probably alphanumeric symbol) we'll use the last path part
257 ## in the relative path.
258 ##
259 ## Complicated, eh? See an example:
260 ## _get_filelist_remote("s3://bckt/abc/def") may yield:
261 ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
262 ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
263 ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
264 ## Furthermore a prefix-magic can restrict the return list:
265 ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
266 ## { 'xyz/blah.txt' : {} }
267
268 info(u"Retrieving list of remote files for %s ..." % remote_uri)
269
270 s3 = S3(Config())
271 response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
272
273 rem_base_original = rem_base = remote_uri.object()
274 remote_uri_original = remote_uri
275 if rem_base != '' and rem_base[-1] != '/':
276 rem_base = rem_base[:rem_base.rfind('/')+1]
277 remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
278 rem_base_len = len(rem_base)
279 rem_list = SortedDict(ignore_case = False)
280 break_now = False
281 for object in response['list']:
282 if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
283 ## We asked for one file and we got that file :-)
284 key = os.path.basename(object['Key'])
285 object_uri_str = remote_uri_original.uri()
286 break_now = True
287 rem_list = {} ## Remove whatever has already been put to rem_list
288 else:
289 key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
290 object_uri_str = remote_uri.uri() + key
291 rem_list[key] = {
292 'size' : int(object['Size']),
293 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
294 'md5' : object['ETag'][1:-1],
295 'object_key' : object['Key'],
296 'object_uri_str' : object_uri_str,
297 'base_uri' : remote_uri,
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
298 'dev' : None,
299 'inode' : None,
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
300 }
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
301 md5 = object['ETag'][1:-1]
302 rem_list.record_md5(key, md5)
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
303 if break_now:
304 break
305 return rem_list
306
307 cfg = Config()
308 remote_uris = []
309 remote_list = SortedDict(ignore_case = False)
310
311 if type(args) not in (list, tuple):
312 args = [args]
313
314 if recursive == None:
315 recursive = cfg.recursive
316
317 for arg in args:
318 uri = S3Uri(arg)
319 if not uri.type == 's3':
320 raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
321 remote_uris.append(uri)
322
323 if recursive:
324 for uri in remote_uris:
325 objectlist = _get_filelist_remote(uri)
326 for key in objectlist:
327 remote_list[key] = objectlist[key]
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
328 remote_list.record_md5(key, objectlist.get_md5(key))
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
329 else:
330 for uri in remote_uris:
331 uri_str = str(uri)
332 ## Wildcards used in remote URI?
333 ## If yes we'll need a bucket listing...
334 if uri_str.find('*') > -1 or uri_str.find('?') > -1:
335 first_wildcard = uri_str.find('*')
336 first_questionmark = uri_str.find('?')
337 if first_questionmark > -1 and first_questionmark < first_wildcard:
338 first_wildcard = first_questionmark
339 prefix = uri_str[:first_wildcard]
340 rest = uri_str[first_wildcard+1:]
341 ## Only request recursive listing if the 'rest' of the URI,
342 ## i.e. the part after first wildcard, contains '/'
343 need_recursion = rest.find('/') > -1
344 objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
345 for key in objectlist:
346 ## Check whether the 'key' matches the requested wildcards
347 if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
348 remote_list[key] = objectlist[key]
349 else:
350 ## No wildcards - simply append the given URI to the list
351 key = os.path.basename(uri.object())
352 if not key:
353 raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
354 remote_item = {
355 'base_uri': uri,
356 'object_uri_str': unicode(uri),
357 'object_key': uri.object()
358 }
359 if require_attribs:
360 response = S3(cfg).object_info(uri)
361 remote_item.update({
362 'size': int(response['headers']['content-length']),
363 'md5': response['headers']['etag'].strip('"\''),
364 'timestamp' : dateRFC822toUnix(response['headers']['date'])
365 })
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
366 # get md5 from header if it's present. We would have set that during upload
367 if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
368 attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
369 if attrs.has_key('md5'):
370 remote_item.update({'md5': attrs['md5']})
371
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
372 remote_list[key] = remote_item
373 return remote_list
111b7a6 @mludvig * s3cmd, S3/FileLists.py: Move file/object listing functions
mludvig authored
374
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
375 def parse_attrs_header(attrs_header):
376 attrs = {}
377 for attr in attrs_header.split("/"):
378 key, val = attr.split(":")
379 attrs[key] = val
380 return attrs
381
382
c3deb6a @mdomsch add --delay-updates option
authored
383 def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
384 def __direction_str(is_remote):
385 return is_remote and "remote" or "local"
386
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
387 def _compare(src_list, dst_lst, src_remote, dst_remote, file):
388 """Return True if src_list[file] matches dst_list[file], else False"""
389 attribs_match = True
390 if not (src_list.has_key(file) and dst_list.has_key(file)):
391 info(u"file does not exist in one side or the other: src_list=%s, dst_list=%s" % (src_list.has_key(file), dst_list.has_key(file)))
392 return False
393
394 ## check size first
395 if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
396 debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
397 attribs_match = False
398
399 ## check md5
400 compare_md5 = 'md5' in cfg.sync_checks
401 # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
402 if compare_md5:
403 if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
404 compare_md5 = False
405 info(u"disabled md5 check for %s" % file)
406 if attribs_match and compare_md5:
407 try:
408 src_md5 = src_list.get_md5(file)
409 dst_md5 = dst_list.get_md5(file)
410 except (IOError,OSError), e:
411 # md5 sum verification failed - ignore that file altogether
412 debug(u"IGNR: %s (disappeared)" % (file))
413 warning(u"%s: file disappeared, ignoring." % (file))
414 raise
415
416 if src_md5 != dst_md5:
417 ## checksums are different.
418 attribs_match = False
419 debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
420
421 return attribs_match
422
423 # we don't support local->local sync, use 'rsync' or something like that instead ;-)
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
424 assert(not(src_remote == False and dst_remote == False))
425
426 info(u"Verifying attributes...")
427 cfg = Config()
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
428 ## Items left on src_list will be transferred
429 ## Items left on update_list will be transferred after src_list
430 ## Items left on copy_pairs will be copied from dst1 to dst2
c3deb6a @mdomsch add --delay-updates option
authored
431 update_list = SortedDict(ignore_case = False)
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
432 ## Items left on dst_list will be deleted
433 copy_pairs = []
434
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
435
436 debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
437
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
438 for relative_file in src_list.keys():
ddb5ef9 @mdomsch handle remote->local transfers with local hardlink/copy if possible
authored
439 debug(u"CHECK: %s" % (relative_file))
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
440
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
441 if dst_list.has_key(relative_file):
442 ## Was --skip-existing requested?
443 if cfg.skip_existing:
444 debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
445 del(src_list[relative_file])
446 del(dst_list[relative_file])
447 continue
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
448
ddb5ef9 @mdomsch handle remote->local transfers with local hardlink/copy if possible
authored
449 try:
450 compare_result = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
451 except (IOError,OSError), e:
452 del(src_list[relative_file])
453 del(dst_list[relative_file])
454 continue
455
456 if compare_result:
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
457 debug(u"IGNR: %s (transfer not needed)" % relative_file)
458 del(src_list[relative_file])
459 del(dst_list[relative_file])
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
460
c3deb6a @mdomsch add --delay-updates option
authored
461 else:
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
462 # look for matching file in src
463 md5 = src_list.get_md5(relative_file)
464 if md5 is not None and dst_list.by_md5.has_key(md5):
465 # Found one, we want to copy
466 dst1 = list(dst_list.by_md5[md5])[0]
467 debug(u"REMOTE COPY src: %s -> %s" % (dst1, relative_file))
d4e5a52 @mdomsch hardlink/copy fix
authored
468 copy_pairs.append((dst1, relative_file))
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
469 del(src_list[relative_file])
470 del(dst_list[relative_file])
471 else:
472 # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
473 # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
474 dst_list.record_md5(relative_file, md5)
475 update_list[relative_file] = src_list[relative_file]
476 del src_list[relative_file]
477 del dst_list[relative_file]
478
479 else:
480 # dst doesn't have this file
481 # look for matching file elsewhere in dst
482 md5 = src_list.get_md5(relative_file)
483 dst1 = dst_list.find_md5_one(md5)
484 if dst1 is not None:
485 # Found one, we want to copy
486 debug(u"REMOTE COPY dst: %s -> %s" % (dst1, relative_file))
d4e5a52 @mdomsch hardlink/copy fix
authored
487 copy_pairs.append((dst1, relative_file))
1703df7 @mdomsch Handle hardlinks and duplicate files
authored
488 del(src_list[relative_file])
489 else:
490 # we don't have this file, and we don't have a copy of this file elsewhere. Get it.
491 # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
492 # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
493 dst_list.record_md5(relative_file, md5)
494
495 for f in dst_list.keys():
496 if not src_list.has_key(f) and not update_list.has_key(f):
497 # leave only those not on src_list + update_list
498 del dst_list[f]
499
500 return src_list, dst_list, update_list, copy_pairs
d439efb @mludvig ATTENTION -- Mega WhiteSpace conversion !!!
mludvig authored
501
502 # vim:et:ts=4:sts=4:ai
Something went wrong with that request. Please try again.