Skip to content

Loading…

Adding encryption to sync command #12

Closed
wants to merge 4 commits into from

3 participants

@firstclown

I added encryption to the sync command by storing extra metadata for encrypted files, the original file's md5 and the original file's size. There needs to be a HEAD call on every resource now for a sync, but it shouldn't happen on a straight get or put.

@mludvig

Hi, thanks for your work. I'm keen to merge such a functionality, however the overhead of calling HEAD every time seems to be too huge. Many people run s3cmd on buckets with millions of files and some others (me) run it in on a remote South Pacific island with a high latency to S3 datacentres.

I would much prefer to store the attributes locally, for example in .s3cmd.info in every directory which could be a Python Pickle file, or sqlite3 database file or something like that. Alternatively store the attributes in xattr on filesystems that support it (most current Linux/Unix filesystems do). That way s3cmd would need to make the HEAD calls only if the required attributes couldn't be found locally.

How does that sound? Are you ok to implement that?

Thanks!

@firstclown
@firstclown

I'm going to close this request and work on the new way of doing it. I also shouldn't have been working in master anyway, so I'm going to refactor my git branches so this won't cause problems in the future. I'll re request a pull when I'm finished.

@firstclown firstclown closed this
@vsespb

I know this ticket is closed now.

But it looks like implementing this similar was is not a good idea: md5 of original, unencrypted file is stored in x-amz-meta-s3tools-orig_md5 headers ?
IMHO it's a security problem.

1.
http://stackoverflow.com/questions/2845986/does-having-an-unencrypted-sha-224-checksum-create-a-vulnerability

2.
http://www.cs.jhu.edu/~astubble/dss/winzip.pdf
«Due to a security flaw in AE-1 (CRC of plaintext is included in unencrypted format in the output), it was replaced by AE-2 in WinZip 9.0 Beta 3.»

@vsespb vsespb referenced this pull request in vsespb/mt-aws-glacier
Open

Encryption #43

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Nov 8, 2011
  1. @firstclown

    Adding encryption to sync command

    firstclown committed
    Need to call HEAD on each resource though
  2. @firstclown
Commits on Nov 9, 2011
  1. @firstclown
  2. @firstclown
This page is out of date. Refresh to see the latest.
Showing with 24 additions and 15 deletions.
  1. +1 −0 S3/Config.py
  2. +10 −5 S3/FileLists.py
  3. +13 −10 s3cmd
View
1 S3/Config.py
@@ -39,6 +39,7 @@ class Config(object):
proxy_host = ""
proxy_port = 3128
encrypt = False
+ temp_location = "/tmp/tmpfile-"
dry_run = False
preserve_attrs = True
preserve_attrs_list = [
View
15 S3/FileLists.py
@@ -195,13 +195,17 @@ def _get_filelist_remote(remote_uri, recursive = True):
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
object_uri_str = remote_uri.uri() + key
rem_list[key] = {
- 'size' : int(object['Size']),
- 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
- 'md5' : object['ETag'][1:-1],
'object_key' : object['Key'],
'object_uri_str' : object_uri_str,
'base_uri' : remote_uri,
}
+ if require_attribs:
+ response = S3(cfg).object_info(S3Uri( object_uri_str ))
+ rem_list[key].update({
+ 'size' : int(response['headers']['x-amz-meta-s3tools-orig_size'].strip('"\'')) if response['headers'].has_key('x-amz-meta-s3tools-orig_size') else int(response['headers']['content-length']),
+ 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
+ 'md5': response['headers']['x-amz-meta-s3tools-orig_md5'].strip('"\'') if response['headers'].has_key('x-amz-meta-s3tools-orig_md5') else response['headers']['etag'].strip('"\''),
+ })
if break_now:
break
return rem_list
@@ -260,8 +264,9 @@ def _get_filelist_remote(remote_uri, recursive = True):
if require_attribs:
response = S3(cfg).object_info(uri)
remote_item.update({
- 'size': int(response['headers']['content-length']),
- 'md5': response['headers']['etag'].strip('"\''),
+ 'size':
+ int(response['headers']['x-amz-meta-s3tools-orig_size'].strip('"\'')) if response['headers'].has_key('x-amz-meta-s3tools-orig_size') else int(response['headers']['content-length']),
+ 'md5': response['headers']['x-amz-meta-s3tools-orig_md5'].strip('"\'') if response['headers'].has_key('x-amz-meta-s3tools-orig_md5') else response['headers']['etag'].strip('"\''),
'timestamp' : dateRFC822toUnix(response['headers']['date'])
})
remote_list[key] = remote_item
View
23 s3cmd
@@ -294,7 +294,7 @@ def cmd_object_put(args):
full_name = full_name_orig
seq_label = "[%d of %d]" % (seq, local_count)
if Config().encrypt:
- exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig)
+ exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"], extra_headers["x-amz-meta-s3tools-orig_md5"], extra_headers["x-amz-meta-s3tools-orig_size"] = gpg_encrypt(full_name_orig)
try:
response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label)
except S3UploadError, e:
@@ -746,6 +746,9 @@ def cmd_sync_remote2local(args):
dst_stream = open(dst_file, "wb")
response = s3.object_get(uri, dst_stream, extra_label = seq_label)
dst_stream.close()
+ if response["headers"].has_key("x-amz-meta-s3tools-gpgenc"):
+ gpg_decrypt(destination, response["headers"]["x-amz-meta-s3tools-gpgenc"])
+ response["size"] = os.stat(destination)[6]
if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs:
attrs = _parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
if attrs.has_key('mode'):
@@ -837,12 +840,6 @@ def cmd_sync_local2remote(args):
s3 = S3(cfg)
- if cfg.encrypt:
- error(u"S3cmd 'sync' doesn't yet support GPG encryption, sorry.")
- error(u"Either use unconditional 's3cmd put --recursive'")
- error(u"or disable encryption with --no-encrypt parameter.")
- sys.exit(1)
-
## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash)
destination_base_uri = S3Uri(args[-1])
if destination_base_uri.type != 's3':
@@ -919,6 +916,8 @@ def cmd_sync_local2remote(args):
attr_header = _build_attr_header(src)
debug(u"attr_header: %s" % attr_header)
extra_headers.update(attr_header)
+ if cfg.encrypt:
+ exitcode, src, extra_headers["x-amz-meta-s3tools-gpgenc"], extra_headers["x-amz-meta-s3tools-orig_md5"], extra_headers["x-amz-meta-s3tools-orig_size"] = gpg_encrypt(src)
response = s3.object_put(src, uri, extra_headers, extra_label = seq_label)
except InvalidFileError, e:
warning(u"File can not be uploaded: %s" % e)
@@ -1162,7 +1161,7 @@ def gpg_command(command, passphrase = ""):
return p_exitcode
def gpg_encrypt(filename):
- tmp_filename = Utils.mktmpfile()
+ tmp_filename = Utils.mktmpfile( cfg.temp_location )
args = {
"gpg_command" : cfg.gpg_command,
"passphrase_fd" : "0",
@@ -1172,7 +1171,9 @@ def gpg_encrypt(filename):
info(u"Encrypting file %(input_file)s to %(output_file)s..." % args)
command = resolve_list(cfg.gpg_encrypt.split(" "), args)
code = gpg_command(command, cfg.gpg_passphrase)
- return (code, tmp_filename, "gpg")
+ orig_md5 = Utils.hash_file_md5(filename);
+ orig_size = os.stat_result(os.lstat(filename)).st_size;
+ return (code, tmp_filename, "gpg", orig_md5, orig_size)
def gpg_decrypt(filename, gpgenc_header = "", in_place = True):
tmp_filename = Utils.mktmpfile(filename)
@@ -1285,13 +1286,14 @@ def run_configure(config_file, args):
ret_dec = gpg_decrypt(ret_enc[1], ret_enc[2], False)
hash = [
Utils.hash_file_md5(filename),
+ ret_enc[3],
Utils.hash_file_md5(ret_enc[1]),
Utils.hash_file_md5(ret_dec[1]),
]
os.unlink(filename)
os.unlink(ret_enc[1])
os.unlink(ret_dec[1])
- if hash[0] == hash[2] and hash[0] != hash[1]:
+ if hash[0] == hash[3] and hash[0] != hash[2] and hash[0] == hash[1]:
output ("Success. Encryption and decryption worked fine :-)")
else:
raise Exception("Encryption verification error.")
@@ -1486,6 +1488,7 @@ def main():
optparser.add_option("-e", "--encrypt", dest="encrypt", action="store_true", help="Encrypt files before uploading to S3.")
optparser.add_option( "--no-encrypt", dest="encrypt", action="store_false", help="Don't encrypt files.")
+ optparser.add_option( "--temp-location", dest="temp_location", metavar="FOLDER", help="Location to store temporary files for encrypt. Add trailing / to signify directory and leave off to signify file prefix. Defaults to /tmp/tmpfile-")
optparser.add_option("-f", "--force", dest="force", action="store_true", help="Force overwrite and other dangerous operations.")
optparser.add_option( "--continue", dest="get_continue", action="store_true", help="Continue getting a partially downloaded file (only for [get] command).")
optparser.add_option( "--skip-existing", dest="skip_existing", action="store_true", help="Skip over files that exist at the destination (only for [get] and [sync] commands).")
Something went wrong with that request. Please try again.