diff --git a/docs/api-guide.rst b/docs/api-guide.rst new file mode 100644 index 00000000..65951a51 --- /dev/null +++ b/docs/api-guide.rst @@ -0,0 +1,69 @@ +.. _api-guide: + +API User Guide +============== + +This document provides an overview of the APIs available in exodus-gw +along with general usage information. + +For a full reference on the available endpoints, see: +:ref:`api-reference`. + +.. warning:: + + exodus-gw is in early stages of development. All APIs are subject + to backwards-incompatible changes without warning. + + +Authentication +-------------- + +Authentication is currently unimplemented in exodus-gw; any clients with +access to the service can perform all operations. + + +Uploading blobs +--------------- + +.. seealso:: `Upload API reference `_ + +.. automodule:: exodus_gw.s3.api + +Using boto3 with the upload API +............................... + +As the upload API is partially compatible with S3, it is possible to use +the AWS SDK to perform uploads. This is the recommended method of using the API. + +Use ``endpoint_url`` when creating a boto resource or client to point at exodus-gw. +Region and credentials will be ignored. + +Note that, as the upload API provides only a subset of the S3 API, many boto methods +will not work. Uploading files is supported. + +Usage: + +.. code-block:: python + + import boto3 + from botocore.config import Config + + # Prepare S3 resource pointing at exodus-gw + s3 = boto3.resource('s3', + endpoint_url='https://exodus-gw.example.com/upload', + # If SSL needs to be configured: + verify='/path/to/bundle.pem', + config=Config(client_cert=('client.crt', 'client.key'))) + + # Basic APIs such as upload_file now work as usual + bucket = s3.Bucket('exodus-cdn-dev') + bucket.upload_file('/tmp/hello.txt', + 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f') + + + +Publishing content +------------------ + +exodus-gw is expected to provide an API for making uploaded content +externally accessible on the exodus CDN. This API is not yet implemented. diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..2b286ce9 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,10 @@ +.. _api-reference: + +API Reference +============= + +This page is a placeholder, expected to be overwritten with openapi/api.html +during doc build. + +If you are seeing this page in published docs, something has gone wrong, +please report it! diff --git a/docs/conf.py b/docs/conf.py index 39032fef..647e5301 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -94,7 +94,6 @@ "github_banner": True, "description": "Publishing microservice for Red Hat's Content Delivery Network", "extra_nav_links": { - "API": "api.html", "Source": "https://github.com/release-engineering/exodus-gw", "Index": "genindex.html", }, @@ -109,6 +108,9 @@ # This dir contains the generated openapi spec and the static redoc-based # api.html for viewing. Copy it alongside the other docs. +# Note: to enable sphinx linking to api.html, we also have an api.rst +# and it's expected to be overwritten here. We depend on the implementation +# detail that html_extra_path copying happens after .rst rendering. html_extra_path = ["openapi"] # Custom sidebar templates, must be a dictionary that maps document names diff --git a/docs/index.rst b/docs/index.rst index a1382991..06a1be78 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,22 @@ exodus-gw ========= -Publishing microservice for Red Hat's Content Delivery Network +Publishing microservice for Red Hat's Content Delivery Network. .. toctree:: :maxdepth: 2 :caption: Contents: + + api-guide + api + +Overview +-------- + +exodus-gw (or "exodus gateway") is a microservice for uploading and publishing content +onto Red Hat's CDN. It exposes an API for use by publishing tools. + +- For general usage information on the exodus-gw API, see :ref:`api-guide`. +- For a technical reference on the exodus-gw API, see :ref:`api-reference`. +- For more general information on the CDN architecture and how exodus-gw fits in, + see `Architecture `_. diff --git a/examples/s3-upload b/examples/s3-upload new file mode 100755 index 00000000..2e4c22e0 --- /dev/null +++ b/examples/s3-upload @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# Upload a list of files from the command-line to s3, possibly +# using a custom endpoint. +# +# This command is used as a baseline for testing s3 uploads +# via exodus-gw or via s3 directly. It can be used to compare +# performance and compatibility. +# +# Usage: +# +# # Using default S3 +# examples/s3-upload file1 [file2 [...]] +# +# # Testing same thing via exodus-gw +# uvicorn exodus_gw.gateway:app & +# examples/s3-upload --endpoint-url http://localhost:8000/upload file1 [file2 [...]] +# +# It is recommended to test using a mixture of files both smaller +# and larger than 10MB to cover both multipart and single part +# uploads. + +import boto3 +import sys +import hashlib +import argparse + + +def get_object_key(filename): + with open(filename, "rb") as f: + hasher = hashlib.sha256() + while True: + chunk = f.read(1024 * 1024 * 10) + if not chunk: + break + hasher.update(chunk) + return hasher.hexdigest() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--endpoint-url", default=None) + parser.add_argument("--bucket", default="exodus-cdn-dev") + parser.add_argument("files", nargs="+") + + args = parser.parse_args() + s3 = boto3.resource("s3", endpoint_url=args.endpoint_url) + bucket = s3.Bucket(args.bucket) + + print( + "Using endpoint:", + "[default]" if not args.endpoint_url else args.endpoint_url, + ) + + for filename in args.files: + bucket.upload_file(filename, get_object_key(filename)) + print("Uploaded:", filename) + + +if __name__ == "__main__": + main() diff --git a/exodus_gw/__init__.py b/exodus_gw/__init__.py index 2072b875..d81923dd 100644 --- a/exodus_gw/__init__.py +++ b/exodus_gw/__init__.py @@ -1,5 +1,5 @@ # Any modules which add endpoints onto the app must be imported here -from . import gateway +from . import gateway, s3 # Per ASGI conventions, main entry point is accessible as "exodus_gw:application" from .app import app as application diff --git a/exodus_gw/s3/__init__.py b/exodus_gw/s3/__init__.py new file mode 100644 index 00000000..4dc7607e --- /dev/null +++ b/exodus_gw/s3/__init__.py @@ -0,0 +1 @@ +from . import api diff --git a/exodus_gw/s3/api.py b/exodus_gw/s3/api.py new file mode 100644 index 00000000..babfc3b2 --- /dev/null +++ b/exodus_gw/s3/api.py @@ -0,0 +1,266 @@ +"""An API for uploading binary data. + +This API provides endpoints for uploading files into the data store +used by the Exodus CDN. Uploading files does not immediately expose +them to clients of the CDN, but is a prerequisite of publishing files, +which is achieved via other APIs. + +The upload API is a partially compatible subset of the S3 API. +It supports at least enough functionality such that the boto S3 +client and resource may be used. + +Differences from the AWS S3 API include: + +- Most optional arguments are not supported. + +- All `x-amz-*` headers are omitted from responses. + +- The usual AWS authentication mechanism is unused; request signatures are ignored. + Authentication is expected to be performed by other means. + +- Object keys should always be SHA256 checksums of the objects being stored, + in lowercase hex digest form. This allows the object store to be used + as content-addressable storage. + +- When uploading content, the Content-MD5 and Content-Length headers are mandatory; + chunked encoding is not supported. + +- The API may enforce stricter limits or policies on uploads than those imposed + by the AWS API. +""" +from typing import Optional +import textwrap +import logging +import os + +import aioboto3 +from fastapi import Request, Response, Path, Query, HTTPException + +from ..app import app + +from .util import extract_mpu_parts, xml_response, RequestReader + + +LOG = logging.getLogger("s3") + +# A partial TODO list for this API: +# - format of 'key' should be enforced (sha256sum) +# - a way to check if object is already uploaded, e.g. HEAD +# - limits on chunk sizes during multipart upload should be decided and enforced +# - should support configuration of the target S3 environment rather than using +# whatever's the boto default +# - {bucket} shouldn't be a parameter, as the target bucket for each exodus env +# is predefined and not controlled by the caller. +# - requests should be authenticated + + +def s3_client(): + # Certain aspects of the boto client can be tweaked by environment variables + # for development. + # This is expected to be replaced with a proper configuration system at some point. + return aioboto3.client( + "s3", endpoint_url=os.environ.get("EXODUS_GW_S3_ENDPOINT_URL") or None + ) + + +@app.post( + "/upload/{bucket}/{key}", + tags=["upload"], + summary="Create/complete multipart upload", + response_class=Response, +) +async def multipart_upload( + request: Request, + bucket: str = Path(..., description="S3 bucket name"), + key: str = Path(..., description="S3 object key"), + uploadId: Optional[str] = Query( + None, + description=textwrap.dedent( + """ + ID of an existing multi-part upload. + + If this argument is provided, it must be the ID of a multi-part upload + created previously. The upload will be validated and completed. + + Must not be passed together with ``uploads``.""" + ), + ), + uploads: Optional[str] = Query( + None, + description=textwrap.dedent( + """ + If this argument is provided, a new multi-part upload will be created + and its ID returned. The provided value should be an empty string. + + Must not be passed together with ``uploadId``.""" + ), + ), +): + """Create or complete a multi-part upload. + + To create a multi-part upload: + - include ``uploads`` in query string, with no value (e.g. ``POST /upload/{bucket}/{key}?uploads``) + - see also: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html + + To complete a multi-part upload: + - include ``uploadId`` in query string + - include parts with ETags in request body + - see also: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html + """ + + if uploads == "": + # Means a new upload is requested + return await create_multipart_upload(bucket, key) + + elif uploads is None and uploadId: + # Given an existing upload to complete + return await complete_multipart_upload(bucket, key, uploadId, request) + + # Caller did something wrong + raise HTTPException( + status_code=400, + detail="Invalid uploadId=%s, uploads=%s" + % (repr(uploadId), repr(uploads)), + ) + + +@app.put( + "/upload/{bucket}/{key}", + tags=["upload"], + summary="Upload bytes", + response_class=Response, +) +async def upload( + request: Request, + bucket: str = Path(..., description="S3 bucket name"), + key: str = Path(..., description="S3 object key"), + uploadId: Optional[str] = Query( + None, description="ID of an existing multi-part upload." + ), + partNumber: Optional[int] = Query( + None, description="Part number, where multi-part upload is used." + ), +): + """Write to an object, either as a standalone operation or within a multi-part upload. + + To upload an entire object: + - include all object bytes in request body + - see also: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html + + To upload part of an object: + - provide multipart upload ID in ``uploadId`` + - provide part number from 1 to 10,000 in ``partNumber`` + - include part of an object in request body (must be at least 5MB in size, except last part) + - retain the `ETag` from the response, as it will be required to complete the upload + - see also: https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html + """ + + if uploadId is None and partNumber is None: + # Single-part upload + return await object_put(bucket, key, request) + + # Multipart upload + return await multipart_put(bucket, key, uploadId, partNumber, request) + + +async def object_put(bucket: str, key: str, request: Request): + # Single-part upload handler: entire object is written via one PUT. + reader = RequestReader.get_reader(request) + + async with s3_client() as s3: + response = await s3.put_object( + Bucket=bucket, + Key=key, + Body=reader, + ContentMD5=request.headers["Content-MD5"], + ContentLength=int(request.headers["Content-Length"]), + ) + + return Response(headers={"ETag": response["ETag"]}) + + +async def complete_multipart_upload( + bucket: str, key: str, uploadId: str, request: Request +): + body = await request.body() + + parts = extract_mpu_parts(body) + + LOG.debug("completing mpu for parts %s", parts) + + async with s3_client() as s3: + response = await s3.complete_multipart_upload( + Bucket=bucket, + Key=key, + UploadId=uploadId, + MultipartUpload={"Parts": parts}, + ) + + LOG.debug("Completed mpu: %s", response) + return xml_response( + "CompleteMultipartUploadOutput", + Location=response["Location"], + Bucket=response["Bucket"], + Key=response["Key"], + ETag=response["ETag"], + ) + + +async def create_multipart_upload(bucket: str, key: str): + async with s3_client() as s3: + response = await s3.create_multipart_upload(Bucket=bucket, Key=key) + + return xml_response( + "CreateMultipartUploadOutput", + Bucket=response["Bucket"], + Key=response["Key"], + UploadId=response["UploadId"], + ) + + +async def multipart_put( + bucket: str, key: str, uploadId: str, partNumber: int, request: Request +): + reader = RequestReader.get_reader(request) + + async with s3_client() as s3: + response = await s3.upload_part( + Body=reader, + Bucket=bucket, + Key=key, + PartNumber=partNumber, + UploadId=uploadId, + ContentMD5=request.headers["Content-MD5"], + ContentLength=int(request.headers["Content-Length"]), + ) + + return Response(headers={"ETag": response["ETag"]}) + + +@app.delete( + "/upload/{bucket}/{key}", + tags=["upload"], + summary="Abort multipart upload", + response_description="Empty response", + response_class=Response, +) +async def abort_multipart_upload( + bucket: str = Path(..., description="S3 bucket name"), + key: str = Path(..., description="S3 object key"), + uploadId: str = Query(..., description="ID of a multipart upload"), +): + """Abort a multipart upload. + + If an upload cannot be completed, explicitly aborting it is recommended in order + to free up resources as early as possible, although this is not mandatory. + + See also: https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html + """ + LOG.debug("Abort %s", uploadId) + + async with s3_client() as s3: + await s3.abort_multipart_upload( + Bucket=bucket, Key=key, UploadId=uploadId + ) + + return Response() diff --git a/exodus_gw/s3/util.py b/exodus_gw/s3/util.py new file mode 100644 index 00000000..15246e5b --- /dev/null +++ b/exodus_gw/s3/util.py @@ -0,0 +1,96 @@ +import io +from xml.etree.ElementTree import ElementTree, Element, SubElement + +from fastapi import Response +from defusedxml.ElementTree import fromstring + + +def extract_mpu_parts( + body: str, xmlns: str = "http://s3.amazonaws.com/doc/2006-03-01/" +): + """Extract part data from an XML-formatted CompleteMultipartUpload request. + + This function parses the request body used by this operation: + https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html + + Arguments: + body (str) + Body of incoming request; expected to be a valid XML document. + xmlns (str) + Namespace used by the XML document. + + Returns: + list[dict] + A list of dicts in the format used for ``Parts`` in the boto s3 client's + ``complete_multipart_upload`` method, e.g. + + [{"PartNumber": 1, "ETag": "abc123..."}, + {"PartNumber": 2, "ETag": "xxxyyy..."}, + ...] + """ + namespaces = {"s3": xmlns} + + etree = fromstring(body) + tags = etree.findall(".//s3:ETag", namespaces) + partnums = etree.findall(".//s3:PartNumber", namespaces) + + return [ + {"ETag": tag.text, "PartNumber": int(partnum.text)} + for (tag, partnum) in zip(tags, partnums) + ] + + +def xml_response(operation: str, **kwargs) -> Response: + """Get an XML response of the style used by S3 APIs. + + Arguments: + operation (str) + The name of the top-level element + (e.g. "CompleteMultipartUploadOutput") + kwargs (dict) + keys/values to include in the document. + Each item will result in a tag within the XML document. + """ + root = Element(operation) + + for (key, value) in kwargs.items(): + child = SubElement(root, key) + child.text = str(value) + + xml = io.BytesIO() + ElementTree(root).write(xml, encoding="UTF-8", xml_declaration=True) + return Response(content=xml.getvalue(), media_type="application/xml") + + +class RequestReader: + """Tiny wrapper to help pass streaming requests into aiobotocore. + + This class is a bit of a trick to work around one point where botocore + and aiobotocore are not working well together: + + - aiobotocore uses aiohttp and it fully supports accepting an async iterable + for a request body, so request.stream() should work there. + + - but, botocore performs schema validation on incoming arguments and expects + Body to be a str, bytes or file-like object, so it refuses to accept request.stream(), + even though the underlying layer can cope with it just fine. + + This wrapper makes the request stream look like a file-like object so + that boto will accept it (though note that actually *using it* as a file + would raise an error). + """ + + def __init__(self, request): + self._req = request + + def __aiter__(self): + return self._req.stream().__aiter__() + + def read(self, *_, **__): + raise NotImplementedError() + + @classmethod + def get_reader(cls, request): + # a helper to make tests easier to write. + # tests can patch over this to effectively disable streaming. + return cls(request) diff --git a/requirements.in b/requirements.in index 4d56688f..0713c766 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,5 @@ fastapi pydantic starlette +aioboto3 +defusedxml diff --git a/requirements.txt b/requirements.txt index 442eb1f5..ca6f7a43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,10 +4,92 @@ # # pip-compile --generate-hashes requirements.in # +aioboto3==8.0.5 \ + --hash=sha256:0942ca5feff0b95b96a79ba93a70042daf9304015f15f489801709173796c486 \ + --hash=sha256:909e01ad05c43c0fc5c0d3e1e1f795377c2d0091bc37d969bf2cbc46ee4545ba \ + # via -r requirements.in +aiobotocore[boto3]==1.0.4 \ + --hash=sha256:1e89ef97c52eb77d89c7c4a9130cab162ae3b89d2709c6e45da30824163ed8d3 \ + --hash=sha256:4103d90b9e162176203dc5295124b15f56c37eee0ddbcddc6929760443714ff8 \ + # via aioboto3 +aiohttp==3.6.2 \ + --hash=sha256:1e984191d1ec186881ffaed4581092ba04f7c61582a177b187d3a2f07ed9719e \ + --hash=sha256:259ab809ff0727d0e834ac5e8a283dc5e3e0ecc30c4d80b3cd17a4139ce1f326 \ + --hash=sha256:2f4d1a4fdce595c947162333353d4a44952a724fba9ca3205a3df99a33d1307a \ + --hash=sha256:32e5f3b7e511aa850829fbe5aa32eb455e5534eaa4b1ce93231d00e2f76e5654 \ + --hash=sha256:344c780466b73095a72c616fac5ea9c4665add7fc129f285fbdbca3cccf4612a \ + --hash=sha256:460bd4237d2dbecc3b5ed57e122992f60188afe46e7319116da5eb8a9dfedba4 \ + --hash=sha256:4c6efd824d44ae697814a2a85604d8e992b875462c6655da161ff18fd4f29f17 \ + --hash=sha256:50aaad128e6ac62e7bf7bd1f0c0a24bc968a0c0590a726d5a955af193544bcec \ + --hash=sha256:6206a135d072f88da3e71cc501c59d5abffa9d0bb43269a6dcd28d66bfafdbdd \ + --hash=sha256:65f31b622af739a802ca6fd1a3076fd0ae523f8485c52924a89561ba10c49b48 \ + --hash=sha256:ae55bac364c405caa23a4f2d6cfecc6a0daada500274ffca4a9230e7129eac59 \ + --hash=sha256:b778ce0c909a2653741cb4b1ac7015b5c130ab9c897611df43ae6a58523cb965 \ + # via aiobotocore +aioitertools==0.7.0 \ + --hash=sha256:341cb05a0903177ef1b73d4cc12c92aee18e81c364e0138f4efc7ec3c47b8177 \ + --hash=sha256:e931a2f0dcabd4a8446b5cc2fc71b8bb14716e6adf37728a70869213f1f741cd \ + # via aiobotocore +async-timeout==3.0.1 \ + --hash=sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f \ + --hash=sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3 \ + # via aiohttp +attrs==19.3.0 \ + --hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c \ + --hash=sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72 \ + # via aiohttp +boto3==1.12.32 \ + --hash=sha256:57398de1b5e074e715c866441e69f90c9468959d5743a021d8aeed04fbaa1078 \ + --hash=sha256:60ac1124597231ed36a7320547cd0d16a001bb92333ab30ad20514f77e585225 \ + # via aiobotocore +botocore==1.15.32 \ + --hash=sha256:3ea89601ee452b65084005278bd832be854cfde5166685dcb14b6c8f19d3fc6d \ + --hash=sha256:a963af564d94107787ff3d2c534e8b7aed7f12e014cdd609f8fcb17bf9d9b19a \ + # via aiobotocore, boto3, s3transfer +chardet==3.0.4 \ + --hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae \ + --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \ + # via aiohttp +defusedxml==0.6.0 \ + --hash=sha256:6687150770438374ab581bb7a1b327a847dd9c5749e396102de3fad4e8a3ef93 \ + --hash=sha256:f684034d135af4c6cbb949b8a4d2ed61634515257a67299e5f940fbaa34377f5 \ + # via -r requirements.in +docutils==0.15.2 \ + --hash=sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0 \ + --hash=sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827 \ + --hash=sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99 \ + # via botocore fastapi==0.60.0 \ --hash=sha256:0676191f7822da4499783c22a2355d0d25442b62a47273de518e8ea51567a12b \ --hash=sha256:89eb70159b211a4d3861acfb238d066f6660b6435ec99650bbea2f8ef02526fa \ # via -r requirements.in +idna==2.10 \ + --hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6 \ + --hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0 \ + # via yarl +jmespath==0.10.0 \ + --hash=sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9 \ + --hash=sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f \ + # via boto3, botocore +multidict==4.7.6 \ + --hash=sha256:1ece5a3369835c20ed57adadc663400b5525904e53bae59ec854a5d36b39b21a \ + --hash=sha256:275ca32383bc5d1894b6975bb4ca6a7ff16ab76fa622967625baeebcf8079000 \ + --hash=sha256:3750f2205b800aac4bb03b5ae48025a64e474d2c6cc79547988ba1d4122a09e2 \ + --hash=sha256:4538273208e7294b2659b1602490f4ed3ab1c8cf9dbdd817e0e9db8e64be2507 \ + --hash=sha256:5141c13374e6b25fe6bf092052ab55c0c03d21bd66c94a0e3ae371d3e4d865a5 \ + --hash=sha256:51a4d210404ac61d32dada00a50ea7ba412e6ea945bbe992e4d7a595276d2ec7 \ + --hash=sha256:5cf311a0f5ef80fe73e4f4c0f0998ec08f954a6ec72b746f3c179e37de1d210d \ + --hash=sha256:6513728873f4326999429a8b00fc7ceddb2509b01d5fd3f3be7881a257b8d463 \ + --hash=sha256:7388d2ef3c55a8ba80da62ecfafa06a1c097c18032a501ffd4cabbc52d7f2b19 \ + --hash=sha256:9456e90649005ad40558f4cf51dbb842e32807df75146c6d940b6f5abb4a78f3 \ + --hash=sha256:c026fe9a05130e44157b98fea3ab12969e5b60691a276150db9eda71710cd10b \ + --hash=sha256:d14842362ed4cf63751648e7672f7174c9818459d169231d03c56e84daf90b7c \ + --hash=sha256:e0d072ae0f2a179c375f67e3da300b47e1a83293c554450b29c900e50afaae87 \ + --hash=sha256:f07acae137b71af3bb548bd8da720956a3bc9f9a0b87733e0899226a2317aeb7 \ + --hash=sha256:fbb77a75e529021e7c4a8d4e823d88ef4d23674a202be4f5addffc72cbb91430 \ + --hash=sha256:fcfbb44c59af3f8ea984de67ec7c306f618a3ec771c2843804069917a8f2e255 \ + --hash=sha256:feed85993dbdb1dbc29102f50bca65bdc68f2c0c8d352468c25b54874f23c39d \ + # via aiohttp, yarl pydantic==1.6.1 \ --hash=sha256:1783c1d927f9e1366e0e0609ae324039b2479a1a282a98ed6a6836c9ed02002c \ --hash=sha256:2dc946b07cf24bee4737ced0ae77e2ea6bc97489ba5a035b603bd1b40ad81f7e \ @@ -27,7 +109,50 @@ pydantic==1.6.1 \ --hash=sha256:f769141ab0abfadf3305d4fcf36660e5cf568a666dd3efab7c3d4782f70946b1 \ --hash=sha256:f8af9b840a9074e08c0e6dc93101de84ba95df89b267bf7151d74c553d66833b \ # via -r requirements.in, fastapi +python-dateutil==2.8.1 \ + --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \ + --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \ + # via botocore +s3transfer==0.3.3 \ + --hash=sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13 \ + --hash=sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db \ + # via boto3 +six==1.15.0 \ + --hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \ + --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \ + # via python-dateutil starlette==0.13.4 \ --hash=sha256:04fe51d86fd9a594d9b71356ed322ccde5c9b448fc716ac74155e5821a922f8d \ --hash=sha256:0fb4b38d22945b46acb880fedee7ee143fd6c0542992501be8c45c0ed737dd1a \ # via -r requirements.in, fastapi +typing-extensions==3.7.4.2 \ + --hash=sha256:6e95524d8a547a91e08f404ae485bbb71962de46967e1b71a0cb89af24e761c5 \ + --hash=sha256:79ee589a3caca649a9bfd2a8de4709837400dfa00b6cc81962a1e6a1815969ae \ + --hash=sha256:f8d2bd89d25bc39dabe7d23df520442fa1d8969b82544370e03d88b5a591c392 \ + # via aioitertools +urllib3==1.25.9 \ + --hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527 \ + --hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \ + # via botocore +wrapt==1.12.1 \ + --hash=sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7 \ + # via aiobotocore +yarl==1.4.2 \ + --hash=sha256:0c2ab325d33f1b824734b3ef51d4d54a54e0e7a23d13b86974507602334c2cce \ + --hash=sha256:0ca2f395591bbd85ddd50a82eb1fde9c1066fafe888c5c7cc1d810cf03fd3cc6 \ + --hash=sha256:2098a4b4b9d75ee352807a95cdf5f10180db903bc5b7270715c6bbe2551f64ce \ + --hash=sha256:25e66e5e2007c7a39541ca13b559cd8ebc2ad8fe00ea94a2aad28a9b1e44e5ae \ + --hash=sha256:26d7c90cb04dee1665282a5d1a998defc1a9e012fdca0f33396f81508f49696d \ + --hash=sha256:308b98b0c8cd1dfef1a0311dc5e38ae8f9b58349226aa0533f15a16717ad702f \ + --hash=sha256:3ce3d4f7c6b69c4e4f0704b32eca8123b9c58ae91af740481aa57d7857b5e41b \ + --hash=sha256:58cd9c469eced558cd81aa3f484b2924e8897049e06889e8ff2510435b7ef74b \ + --hash=sha256:5b10eb0e7f044cf0b035112446b26a3a2946bca9d7d7edb5e54a2ad2f6652abb \ + --hash=sha256:6faa19d3824c21bcbfdfce5171e193c8b4ddafdf0ac3f129ccf0cdfcb083e462 \ + --hash=sha256:944494be42fa630134bf907714d40207e646fd5a94423c90d5b514f7b0713fea \ + --hash=sha256:a161de7e50224e8e3de6e184707476b5a989037dcb24292b391a3d66ff158e70 \ + --hash=sha256:a4844ebb2be14768f7994f2017f70aca39d658a96c786211be5ddbe1c68794c1 \ + --hash=sha256:c2b509ac3d4b988ae8769901c66345425e361d518aecbe4acbfc2567e416626a \ + --hash=sha256:c9959d49a77b0e07559e579f38b2f3711c2b8716b8410b320bf9713013215a1b \ + --hash=sha256:d8cdee92bc930d8b09d8bd2043cedd544d9c8bd7436a77678dd602467a993080 \ + --hash=sha256:e15199cdb423316e15f108f51249e44eb156ae5dba232cb73be555324a1d49c2 \ + # via aiohttp diff --git a/test-requirements.txt b/test-requirements.txt index 3bd27a2f..f10f00da 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,11 +1,47 @@ -pytest==5.4.3 --hash=sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1 -importlib-metadata==1.6.1 --hash=sha256:15ec6c0fd909e893e3a08b3a7c76ecb149122fb14b7efe1199ddd4c7c57ea958 -py==1.8.1 --hash=sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0 -pluggy==0.13.1 --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d -attrs==19.3.0 --hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c -packaging==20.4 --hash=sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181 -more-itertools==8.3.0 --hash=sha256:7818f596b1e87be009031c7653d01acc46ed422e6656b394b0f765ce66ed4982 -wcwidth==0.2.4 --hash=sha256:79375666b9954d4a1a10739315816324c3e73110af9d0e102d906fdb0aec009f -zipp==3.1.0 --hash=sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b -pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b -six==1.15.0 --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --generate-hashes test-requirements.txt +# +attrs==19.3.0 \ + --hash=sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c \ + # via -r test-requirements.txt, pytest +importlib-metadata==1.6.1 \ + --hash=sha256:15ec6c0fd909e893e3a08b3a7c76ecb149122fb14b7efe1199ddd4c7c57ea958 \ + # via -r test-requirements.txt, pluggy, pytest +mock==4.0.2 \ + --hash=sha256:3f9b2c0196c60d21838f307f5825a7b86b678cedc58ab9e50a8988187b4d81e0 \ + --hash=sha256:dd33eb70232b6118298d516bbcecd26704689c386594f0f3c4f13867b2c56f72 \ + # via -r test-requirements.txt +more-itertools==8.3.0 \ + --hash=sha256:7818f596b1e87be009031c7653d01acc46ed422e6656b394b0f765ce66ed4982 \ + # via -r test-requirements.txt, pytest +packaging==20.4 \ + --hash=sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181 \ + # via -r test-requirements.txt, pytest +pluggy==0.13.1 \ + --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d \ + # via -r test-requirements.txt, pytest +py==1.8.1 \ + --hash=sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0 \ + # via -r test-requirements.txt, pytest +pyparsing==2.4.7 \ + --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b \ + # via -r test-requirements.txt, packaging +pytest-asyncio==0.14.0 \ + --hash=sha256:2eae1e34f6c68fc0a9dc12d4bea190483843ff4708d24277c41568d6b6044f1d \ + --hash=sha256:9882c0c6b24429449f5f969a5158b528f39bde47dc32e85b9f0403965017e700 \ + # via -r test-requirements.txt +pytest==5.4.3 \ + --hash=sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1 \ + # via -r test-requirements.txt, pytest-asyncio +six==1.15.0 \ + --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \ + # via -r test-requirements.txt, packaging +wcwidth==0.2.4 \ + --hash=sha256:79375666b9954d4a1a10739315816324c3e73110af9d0e102d906fdb0aec009f \ + # via -r test-requirements.txt, pytest +zipp==3.1.0 \ + --hash=sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b \ + # via -r test-requirements.txt, importlib-metadata diff --git a/tests/s3/test_extract_mpu_parts.py b/tests/s3/test_extract_mpu_parts.py new file mode 100644 index 00000000..530bcdf3 --- /dev/null +++ b/tests/s3/test_extract_mpu_parts.py @@ -0,0 +1,29 @@ +from exodus_gw.s3.util import extract_mpu_parts + + +def test_typical_body(): + """extract_mpu_parts can extract data from a typical request body.""" + + body = """ + + + + someval + 123 + + + + "otherval" + 234 + + + """.strip() + + parts = extract_mpu_parts(body) + + # It should accurately parse content from the request + assert parts == [ + {"ETag": "someval", "PartNumber": 123}, + {"ETag": '"otherval"', "PartNumber": 234}, + ] diff --git a/tests/s3/test_manage_mpu.py b/tests/s3/test_manage_mpu.py new file mode 100644 index 00000000..8e4c376e --- /dev/null +++ b/tests/s3/test_manage_mpu.py @@ -0,0 +1,155 @@ +import mock +import pytest +import textwrap + +from fastapi import HTTPException + +from exodus_gw.s3.api import multipart_upload, abort_multipart_upload, upload +from exodus_gw.s3.util import xml_response + +TEST_KEY = "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c" + + +@pytest.fixture(autouse=True) +def mock_s3_client(): + with mock.patch("aioboto3.client") as mock_client: + s3_client = mock.AsyncMock() + s3_client.__aenter__.return_value = s3_client + mock_client.return_value = s3_client + yield s3_client + + +@pytest.mark.asyncio +async def test_create_mpu(mock_s3_client): + """Creating a multipart upload is delegated correctly to S3.""" + + mock_s3_client.create_multipart_upload.return_value = { + "Bucket": "my-bucket", + "Key": TEST_KEY, + "UploadId": "my-great-upload", + } + + response = await multipart_upload( + None, bucket="my-bucket", key=TEST_KEY, uploads="", + ) + + # It should delegate request to real S3 + mock_s3_client.create_multipart_upload.assert_called_once_with( + Bucket="my-bucket", Key=TEST_KEY, + ) + + # It should succeed + assert response.status_code == 200 + + # It should be XML + assert response.headers["content-type"] == "application/xml" + + # It should include the appropriate data + expected = xml_response( + "CreateMultipartUploadOutput", + Bucket="my-bucket", + Key=TEST_KEY, + UploadId="my-great-upload", + ).body + assert response.body == expected + + +@pytest.mark.asyncio +async def test_complete_mpu(mock_s3_client): + """Completing a multipart upload is delegated correctly to S3.""" + + mock_s3_client.complete_multipart_upload.return_value = { + "Location": "https://example.com/some-object", + "Bucket": "my-bucket", + "Key": TEST_KEY, + "ETag": "my-better-etag", + } + + # Need some valid request body to complete an MPU + request = mock.AsyncMock() + request.body.return_value = textwrap.dedent( + """ + + + + tagA + 1 + + + tagB + 2 + + + """ + ).strip() + + response = await multipart_upload( + request=request, + bucket="my-bucket", + key=TEST_KEY, + uploadId="my-better-upload", + uploads=None, + ) + + # It should delegate request to real S3 + mock_s3_client.complete_multipart_upload.assert_called_once_with( + Bucket="my-bucket", + Key=TEST_KEY, + UploadId="my-better-upload", + MultipartUpload={ + "Parts": [ + {"ETag": "tagA", "PartNumber": 1}, + {"ETag": "tagB", "PartNumber": 2}, + ] + }, + ) + + # It should succeed + assert response.status_code == 200 + + # It should be XML + assert response.headers["content-type"] == "application/xml" + + # It should include the appropriate data + expected = xml_response( + "CompleteMultipartUploadOutput", + Location="https://example.com/some-object", + Bucket="my-bucket", + Key=TEST_KEY, + ETag="my-better-etag", + ).body + assert response.body == expected + + +@pytest.mark.asyncio +async def test_bad_mpu_call(mock_s3_client): + """Mixing uploadId and uploads arguments gives a validation error.""" + + with pytest.raises(HTTPException) as exc_info: + await multipart_upload( + request=None, + bucket="my-bucket", + key=TEST_KEY, + uploadId="oops", + uploads="not valid to mix these args", + ) + + assert exc_info.value.status_code == 400 + + +@pytest.mark.asyncio +async def test_abort_mpu(mock_s3_client): + """Aborting a multipart upload is correctly delegated to S3.""" + + response = await abort_multipart_upload( + bucket="my-bucket", key=TEST_KEY, uploadId="my-lame-upload", + ) + + # It should delegate the request to real S3 + mock_s3_client.abort_multipart_upload.assert_called_once_with( + Bucket="my-bucket", Key=TEST_KEY, UploadId="my-lame-upload", + ) + + # It should be a successful, empty response + assert response.status_code == 200 + assert response.body == b"" diff --git a/tests/s3/test_request_reader.py b/tests/s3/test_request_reader.py new file mode 100644 index 00000000..7725e907 --- /dev/null +++ b/tests/s3/test_request_reader.py @@ -0,0 +1,37 @@ +import pytest + +from exodus_gw.s3.util import RequestReader + + +class FakeRequest: + def __init__(self, chunks): + self._chunks = chunks + + async def stream(self): + for chunk in self._chunks: + yield chunk + + +@pytest.mark.asyncio +async def test_iterate_stream(): + request = FakeRequest([b"first ", b"second ", b"third"]) + + reader = RequestReader.get_reader(request) + + # Can read the request + content = b"".join([chunk async for chunk in reader]) + assert content == b"first second third" + + # Can read the request a second time + content_reread = b"".join([chunk async for chunk in reader]) + assert content == content_reread + + +def test_no_blocking_operations(): + """read blocking API is not available.""" + + request = FakeRequest([]) + + reader = RequestReader.get_reader(request) + with pytest.raises(NotImplementedError): + reader.read(123) diff --git a/tests/s3/test_upload.py b/tests/s3/test_upload.py new file mode 100644 index 00000000..28b012ad --- /dev/null +++ b/tests/s3/test_upload.py @@ -0,0 +1,116 @@ +import mock +import pytest +import textwrap + +from fastapi import HTTPException + +from exodus_gw.s3.api import multipart_upload, abort_multipart_upload, upload +from exodus_gw.s3.util import xml_response + +TEST_KEY = "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c" + + +@pytest.fixture(autouse=True) +def mock_s3_client(): + with mock.patch("aioboto3.client") as mock_client: + s3_client = mock.AsyncMock() + s3_client.__aenter__.return_value = s3_client + mock_client.return_value = s3_client + yield s3_client + + +@pytest.fixture() +def mock_request_reader(): + # We don't use the real request reader for these tests as it becomes + # rather complicated to verify that boto methods were called with the + # correct expected value. The class is tested separately. + with mock.patch("exodus_gw.s3.util.RequestReader.get_reader") as m: + yield m + + +@pytest.mark.asyncio +async def test_full_upload(mock_s3_client, mock_request_reader): + """Uploading a complete object is delegated correctly to S3.""" + + mock_s3_client.put_object.return_value = { + "ETag": "a1b2c3", + } + + request = mock.Mock( + headers={ + "Content-MD5": "9d0568469d206c1aedf1b71f12f474bc", + "Content-Length": "10", + } + ) + mock_request_reader.return_value = b"some bytes" + + response = await upload( + request=request, + bucket="my-bucket", + key=TEST_KEY, + uploadId=None, + partNumber=None, + ) + + # It should delegate request to real S3 + mock_s3_client.put_object.assert_called_once_with( + Bucket="my-bucket", + Key=TEST_KEY, + Body=b"some bytes", + ContentMD5="9d0568469d206c1aedf1b71f12f474bc", + ContentLength=10, + ) + + # It should succeed + assert response.status_code == 200 + + # It should return the ETag + assert response.headers["ETag"] == "a1b2c3" + + # It should have an empty body + assert response.body == b"" + + +@pytest.mark.asyncio +async def test_part_upload(mock_s3_client, mock_request_reader): + """Uploading part of an object is delegated correctly to S3.""" + + mock_s3_client.upload_part.return_value = { + "ETag": "aabbcc", + } + + request = mock.Mock( + headers={ + "Content-MD5": "e8b7c279de413b7b15f44bf71a796f95", + "Content-Length": "10", + } + ) + mock_request_reader.return_value = b"best bytes" + + response = await upload( + request=request, + bucket="my-bucket", + key=TEST_KEY, + uploadId="my-best-upload", + partNumber=88, + ) + + # It should delegate request to real S3 + mock_s3_client.upload_part.assert_called_once_with( + Bucket="my-bucket", + Key=TEST_KEY, + Body=b"best bytes", + PartNumber=88, + UploadId="my-best-upload", + ContentMD5="e8b7c279de413b7b15f44bf71a796f95", + ContentLength=10, + ) + + # It should succeed + assert response.status_code == 200 + + # It should return the ETag + assert response.headers["ETag"] == "aabbcc" + + # It should have an empty body + assert response.body == b"" diff --git a/tests/s3/test_xml_response.py b/tests/s3/test_xml_response.py new file mode 100644 index 00000000..8200df00 --- /dev/null +++ b/tests/s3/test_xml_response.py @@ -0,0 +1,25 @@ +import textwrap + +from exodus_gw.s3.util import xml_response + + +def test_typical_response(): + """xml_response creates response objects with the expected content.""" + + response = xml_response("SomeOperation", Foo="Bar", Baz=123) + + # It should be successful + assert response.status_code == 200 + + # It should be XML + assert response.media_type == "application/xml" + + # It should look like this + body = response.body.decode("utf-8") + expected = textwrap.dedent( + """ + + Bar123 + """ + ).strip() + assert body == expected