Browse files

Merge pull request #134 from mdomsch/cleanup/filedict

Split md5/hardlink stuff out of SortedDict into FileDict
  • Loading branch information...
2 parents 12acd8a + 7800900 commit 454c20e563b8e38e5ca64b27e441b86dcc4c1396 @mludvig mludvig committed Mar 8, 2013
Showing with 63 additions and 50 deletions.
  1. +53 −0 S3/FileDict.py
  2. +7 −7 S3/FileLists.py
  3. +0 −41 S3/SortedDict.py
  4. +3 −2 s3cmd
View
53 S3/FileDict.py
@@ -0,0 +1,53 @@
+## Amazon S3 manager
+## Author: Michal Ludvig <michal@logix.cz>
+## http://www.logix.cz/michal
+## License: GPL Version 2
+
+from SortedDict import SortedDict
+import Utils
+
+class FileDict(SortedDict):
+ def __init__(self, mapping = {}, ignore_case = True, **kwargs):
+ SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs)
+ self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
+ self.by_md5 = dict() # {md5: set(relative_files)}
+
+ def record_md5(self, relative_file, md5):
+ if md5 not in self.by_md5:
+ self.by_md5[md5] = set()
+ self.by_md5[md5].add(relative_file)
+
+ def find_md5_one(self, md5):
+ try:
+ return list(self.by_md5.get(md5, set()))[0]
+ except:
+ return None
+
+ def get_md5(self, relative_file):
+ """returns md5 if it can, or raises IOError if file is unreadable"""
+ md5 = None
+ if 'md5' in self[relative_file]:
+ return self[relative_file]['md5']
+ md5 = self.get_hardlink_md5(relative_file)
+ if md5 is None:
+ md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
+ self.record_md5(relative_file, md5)
+ self[relative_file]['md5'] = md5
+ return md5
+
+ def record_hardlink(self, relative_file, dev, inode, md5):
+ if dev not in self.hardlinks:
+ self.hardlinks[dev] = dict()
+ if inode not in self.hardlinks[dev]:
+ self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
+ self.hardlinks[dev][inode]['relative_files'].add(relative_file)
+
+ def get_hardlink_md5(self, relative_file):
+ md5 = None
+ dev = self[relative_file]['dev']
+ inode = self[relative_file]['inode']
+ try:
+ md5 = self.hardlinks[dev][inode]['md5']
+ except:
+ pass
+ return md5
View
14 S3/FileLists.py
@@ -6,7 +6,7 @@
from S3 import S3
from Config import Config
from S3Uri import S3Uri
-from SortedDict import SortedDict
+from FileDict import FileDict
from Utils import *
from Exceptions import ParameterError
from HashCache import HashCache
@@ -58,7 +58,7 @@ def _fswalk_no_symlinks(path):
def filter_exclude_include(src_list):
info(u"Applying --exclude/--include")
cfg = Config()
- exclude_list = SortedDict(ignore_case = False)
+ exclude_list = FileDict(ignore_case = False)
for file in src_list.keys():
debug(u"CHECK: %s" % file)
excluded = False
@@ -224,7 +224,7 @@ def _maintain_cache(cache, local_list):
info(u"No cache file found, creating it.")
local_uris = []
- local_list = SortedDict(ignore_case = False)
+ local_list = FileDict(ignore_case = False)
single_file = False
if type(args) not in (list, tuple):
@@ -284,15 +284,15 @@ def _get_filelist_remote(remote_uri, recursive = True):
rem_base = rem_base[:rem_base.rfind('/')+1]
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
rem_base_len = len(rem_base)
- rem_list = SortedDict(ignore_case = False)
+ rem_list = FileDict(ignore_case = False)
break_now = False
for object in response['list']:
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
## We asked for one file and we got that file :-)
key = os.path.basename(object['Key'])
object_uri_str = remote_uri_original.uri()
break_now = True
- rem_list = SortedDict(ignore_case = False) ## Remove whatever has already been put to rem_list
+ rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
else:
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
object_uri_str = remote_uri.uri() + key
@@ -314,7 +314,7 @@ def _get_filelist_remote(remote_uri, recursive = True):
cfg = Config()
remote_uris = []
- remote_list = SortedDict(ignore_case = False)
+ remote_list = FileDict(ignore_case = False)
if type(args) not in (list, tuple):
args = [args]
@@ -436,7 +436,7 @@ def _compare(src_list, dst_lst, src_remote, dst_remote, file):
## Items left on src_list will be transferred
## Items left on update_list will be transferred after src_list
## Items left on copy_pairs will be copied from dst1 to dst2
- update_list = SortedDict(ignore_case = False)
+ update_list = FileDict(ignore_case = False)
## Items left on dst_list will be deleted
copy_pairs = []
View
41 S3/SortedDict.py
@@ -27,8 +27,6 @@ def __init__(self, mapping = {}, ignore_case = True, **kwargs):
"""
dict.__init__(self, mapping, **kwargs)
self.ignore_case = ignore_case
- self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
- self.by_md5 = dict() # {md5: set(relative_files)}
def keys(self):
keys = dict.keys(self)
@@ -49,45 +47,6 @@ def __iter__(self):
return SortedDictIterator(self, self.keys())
- def record_md5(self, relative_file, md5):
- if md5 not in self.by_md5:
- self.by_md5[md5] = set()
- self.by_md5[md5].add(relative_file)
-
- def find_md5_one(self, md5):
- try:
- return list(self.by_md5.get(md5, set()))[0]
- except:
- return None
-
- def get_md5(self, relative_file):
- """returns md5 if it can, or raises IOError if file is unreadable"""
- md5 = None
- if 'md5' in self[relative_file]:
- return self[relative_file]['md5']
- md5 = self.get_hardlink_md5(relative_file)
- if md5 is None:
- md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
- self.record_md5(relative_file, md5)
- self[relative_file]['md5'] = md5
- return md5
-
- def record_hardlink(self, relative_file, dev, inode, md5):
- if dev not in self.hardlinks:
- self.hardlinks[dev] = dict()
- if inode not in self.hardlinks[dev]:
- self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
- self.hardlinks[dev][inode]['relative_files'].add(relative_file)
-
- def get_hardlink_md5(self, relative_file):
- md5 = None
- dev = self[relative_file]['dev']
- inode = self[relative_file]['inode']
- try:
- md5 = self.hardlinks[dev][inode]['md5']
- except:
- pass
- return md5
if __name__ == "__main__":
d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 }
View
5 s3cmd
@@ -911,7 +911,7 @@ def local_copy(copy_pairs, destination_base):
# Do NOT hardlink local files by default, that'd be silly
# For instance all empty files would become hardlinked together!
- failed_copy_list = SortedDict()
+ failed_copy_list = FileDict()
for (src_obj, dst1, relative_file) in copy_pairs:
src_file = os.path.join(destination_base, dst1)
dst_file = os.path.join(destination_base, relative_file)
@@ -1076,7 +1076,7 @@ def cmd_sync_local2remote(args):
## Make remote_key same as local_key for comparison if we're dealing with only one file
remote_list_entry = remote_list[remote_list.keys()[0]]
# Flush remote_list, by the way
- remote_list = SortedDict()
+ remote_list = FileDict()
remote_list[local_list.keys()[0]] = remote_list_entry
local_list, remote_list, update_list, copy_pairs = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True, delay_updates = cfg.delay_updates)
@@ -2079,6 +2079,7 @@ if __name__ == '__main__':
from S3.S3 import S3
from S3.Config import Config
from S3.SortedDict import SortedDict
+ from S3.FileDict import FileDict
from S3.S3Uri import S3Uri
from S3 import Utils
from S3.Utils import *

0 comments on commit 454c20e

Please sign in to comment.