Skip to content

Commit

Permalink
Fix object ACLs for multipart uploads; avoid modifying XML files stor…
Browse files Browse the repository at this point in the history
…ed in S3 (#1814)
  • Loading branch information
whummer authored Nov 28, 2019
1 parent 03c69b1 commit 9f7df7b
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 13 deletions.
28 changes: 22 additions & 6 deletions localstack/services/s3/s3_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,20 @@ def expand_redirect_url(starting_url, key, bucket):
return redirect_url


def is_bucket_specified_in_domain_name(path, headers):
host = headers.get('host', '')
return re.match(r'.*s3(\-website)?\.([^\.]+\.)?amazonaws.com', host)


def is_object_specific_request(path, headers):
""" Return whether the given request is specific to a certain S3 object.
Note: the bucket name is usually specified as a path parameter,
but may also be part of the domain name! """
bucket_in_domain = is_bucket_specified_in_domain_name(path, headers)
parts = len(path.split('/'))
return parts > (1 if bucket_in_domain else 2)


def normalize_bucket_name(bucket_name):
bucket_name = bucket_name or ''
# AWS appears to automatically convert upper to lower case chars in bucket names
Expand Down Expand Up @@ -761,17 +775,19 @@ def return_response(self, method, path, data, headers, response):
if param_name in query_map:
response.headers[header_name] = query_map[param_name][0]

# We need to un-pretty-print the XML, otherwise we run into this issue with Spark:
# https://github.com/jserver/mock-s3/pull/9/files
# https://github.com/localstack/localstack/issues/183
# Note: yet, we need to make sure we have a newline after the first line: <?xml ...>\n
if response_content_str and response_content_str.startswith('<'):
is_bytes = isinstance(response._content, six.binary_type)
response._content = response_content_str

append_last_modified_headers(response=response, content=response_content_str)

# un-pretty-print the XML
response._content = re.sub(r'([^\?])>\n\s*<', r'\1><', response_content_str, flags=re.MULTILINE)
# We need to un-pretty-print the XML, otherwise we run into this issue with Spark:
# https://github.com/jserver/mock-s3/pull/9/files
# https://github.com/localstack/localstack/issues/183
# Note: yet, we need to make sure we have a newline after the first line: <?xml ...>\n
# Note: make sure to return XML docs verbatim: https://github.com/localstack/localstack/issues/1037
if method != 'GET' or not is_object_specific_request(path, headers):
response._content = re.sub(r'([^\?])>\n\s*<', r'\1><', response_content_str, flags=re.MULTILINE)

# update Location information in response payload
response._content = self._update_location(response._content, bucket_name)
Expand Down
35 changes: 30 additions & 5 deletions localstack/services/s3/s3_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
# max file size for S3 objects (in MB)
S3_MAX_FILE_SIZE_MB = 2048

# temporary state
TMP_STATE = {}


def check_s3(expect_shutdown=False, print_error=False):
out = None
Expand Down Expand Up @@ -56,19 +59,41 @@ def init(self, name, value, storage='STANDARD', etag=None,
original_init = s3_models.FakeKey.__init__
s3_models.FakeKey.__init__ = init

def s3_update_acls(self, request, query, bucket_name, key_name):
# fix for - https://github.com/localstack/localstack/issues/1733
# - https://github.com/localstack/localstack/issues/1170
acl_key = 'acl|%s|%s' % (bucket_name, key_name)
acl = self._acl_from_headers(request.headers)
if acl:
TMP_STATE[acl_key] = acl
if not query.get('uploadId'):
return
bucket = self.backend.get_bucket(bucket_name)
key = bucket and self.backend.get_key(bucket_name, key_name)
if not key:
return
acl = acl or TMP_STATE.pop(acl_key, None) or bucket.acl
if acl:
key.set_acl(acl)

def s3_key_response_post(self, request, body, bucket_name, query, key_name, *args, **kwargs):
result = s3_key_response_post_orig(request, body, bucket_name, query, key_name, *args, **kwargs)
if query.get('uploadId'):
# fix for https://github.com/localstack/localstack/issues/1733
key = self.backend.get_key(bucket_name, key_name)
acl = self._acl_from_headers(request.headers) or self.backend.get_bucket(bucket_name).acl
key.set_acl(acl)
s3_update_acls(self, request, query, bucket_name, key_name)
return result

s3_key_response_post_orig = s3_responses.S3ResponseInstance._key_response_post
s3_responses.S3ResponseInstance._key_response_post = types.MethodType(
s3_key_response_post, s3_responses.S3ResponseInstance)

def s3_key_response_put(self, request, body, bucket_name, query, key_name, headers, *args, **kwargs):
result = s3_key_response_put_orig(request, body, bucket_name, query, key_name, headers, *args, **kwargs)
s3_update_acls(self, request, query, bucket_name, key_name)
return result

s3_key_response_put_orig = s3_responses.S3ResponseInstance._key_response_put
s3_responses.S3ResponseInstance._key_response_put = types.MethodType(
s3_key_response_put, s3_responses.S3ResponseInstance)


def main():
setup_logging()
Expand Down
24 changes: 22 additions & 2 deletions tests/integration/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,26 @@ def test_s3_multipart_upload_with_small_single_part(self):
self.sqs_client.delete_queue(QueueUrl=queue_url)
self._delete_bucket(TEST_BUCKET_WITH_NOTIFICATION, [key_by_path])

def test_s3_multipart_upload_acls(self):
bucket_name = 'test-bucket-%s' % short_uid()
self.s3_client.create_bucket(Bucket=bucket_name, ACL='public-read')

def check_permissions(key, expected_perms):
grants = self.s3_client.get_object_acl(Bucket=bucket_name, Key=key)['Grants']
grants = [g for g in grants if 'AllUsers' in g.get('Grantee', {}).get('URI', '')]
self.assertEquals(len(grants), 1)
permissions = grants[0]['Permission']
permissions = permissions if isinstance(permissions, list) else [permissions]
self.assertEquals(len(permissions), expected_perms)

# perform uploads (multipart and regular) and check ACLs
self.s3_client.put_object(Bucket=bucket_name, Key='acl-key0', Body='something')
check_permissions('acl-key0', 1)
self._perform_multipart_upload(bucket=bucket_name, key='acl-key1')
check_permissions('acl-key1', 1)
self._perform_multipart_upload(bucket=bucket_name, key='acl-key2', acl='public-read-write')
check_permissions('acl-key2', 2)

def test_s3_presigned_url_upload(self):
key_by_path = 'key-by-hostname'
queue_url, queue_attributes = self._create_test_queue()
Expand Down Expand Up @@ -583,8 +603,8 @@ def _delete_bucket(self, bucket_name, keys):
self.s3_client.delete_bucket(Bucket=bucket_name)

def _perform_multipart_upload(self, bucket, key, data=None, zip=False, acl=None):
acl = acl or 'private'
multipart_upload_dict = self.s3_client.create_multipart_upload(Bucket=bucket, Key=key, ACL=acl)
kwargs = {'ACL': acl} if acl else {}
multipart_upload_dict = self.s3_client.create_multipart_upload(Bucket=bucket, Key=key, **kwargs)
uploadId = multipart_upload_dict['UploadId']

# Write contents to memory rather than a file.
Expand Down

0 comments on commit 9f7df7b

Please sign in to comment.