diff --git a/.travis.yml b/.travis.yml index 24c9f724..983f400f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,12 @@ os: linux language: python +addons: + apt: + packages: + # for docs + - graphviz + install: pip install tox jobs: diff --git a/docs/arch.gv b/docs/arch.gv new file mode 100644 index 00000000..498c43a5 --- /dev/null +++ b/docs/arch.gv @@ -0,0 +1,139 @@ +digraph { + ranksep="1.4"; + + # These are arranged and labelled to communicate the + # sequence of events when a request is processed. + # Try to keep them in this order. + client:sw -> controller [ + xlabel=<
1
> + ] + + controller:sw -> origin_request [ + xlabel=<
2
> + ] + + origin_request -> db [ + xlabel=<
3
>, + dir=both + ] + + origin_request -> controller:s [ + xlabel=<
4
> + ] + + controller -> S3 [ + xlabel=<
5
>, + dir=both + ] + + controller:se -> origin_response [ + xlabel=<
6
>, + dir=both + ] + + controller -> client:se [ + xlabel=<
7
> + ] + + # publishing tools are mentioned, but do not participate + # in the request processing. + # Connection order here is reversed to force the publishing tools to the bottom + # of the graph, which makes them stand out a bit more. + S3 -> publish_tools [dir="back"] + db -> publish_tools [dir="back"] + + client [label="💻 client"] + publish_tools [label="publishing tools", style="rounded", rank="max", shape="box"] + + db [ + shape=plaintext + fontsize=9 + label=< + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
☁ DynamoDB
web_uri (partition key)from_date (sort key)object_key
/content/dist/rhel/server/7/7Server/x86_64/os/Packages/t/tar-1.26-34.el7.x86_64.rpm2020-03-26T01:07:39+00:008e7750e50734f...
/content/dist/rhel/server/7/7Server/x86_64/os/Packages/z/zlib-1.2.7-18.el7.x86_64.rpm2020-03-26T01:07:39+00:00db8dd5164d117...
/content/dist/rhel/server/7/7Server/x86_64/os/repodata/repomd.xml2020-03-26T01:07:39+00:00aec070645fe53...
/content/dist/rhel/server/7/7Server/x86_64/os/repodata/repomd.xml2020-01-22T02:07:20+00:005d70f436aa013...
...
+ > + ]; + + S3 [ + shape=plaintext + fontsize=9 + label=< + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
☁ S3
keyobjectmetadata
8e7750e50734f...[blob tar-1.26-34.el7.x86_64.rpm]-
db8dd5164d117...[blob zlib-1.2.7-18.el7.x86_64.rpm]-
aec070645fe5...[blob some repomd.xml]{ContentType: application/xml}
5d70f436aa01...[blob other repomd.xml]{ContentType: application/xml}
49ae93732fcf...[blob some primary.sqlite.bz2]{ContentType: application/x-bzip2}
...
+ > + ]; + + subgraph cluster_0 { + label=< 🖧 CloudFront CDN > + style="rounded"; + controller; + subgraph cluster_1 { + label=<cdn-lambda>; + style="dashed"; + rank=same + origin_request; + origin_response; + } + } +} diff --git a/docs/arch.rst b/docs/arch.rst new file mode 100644 index 00000000..691e9c32 --- /dev/null +++ b/docs/arch.rst @@ -0,0 +1,111 @@ +Architecture +============ + + +Overview +-------- + +This diagram shows the relationship between all major components used +in the delivery of content via the CDN. + +.. graphviz:: arch.gv + +- Numbered connections represent the sequence of events when the CDN processes a request. +- For clarity, SHA256 checksums have been truncated (as in ``8e7750e50734f...``). In reality, + the system stores complete checksums. +- The CloudFront CDN shown in the above diagram may itself be hosted behind another CDN, + so client requests may pass through additional layers not expressed here. + + +Components +---------- + +client + A client requesting data from the CDN. + + This could be ``dnf``, ``yum``, Satellite, ``curl``, a web browser, etc. + +CloudFront CDN + The `Amazon CloudFront`_ content delivery network. + +controller + An abstract component representing the built-in behaviors of CloudFront, + such as: + + - basic HTTP request handling + - serving responses from cache + - invoking Lambda functions + - delegating requests to S3 + + ...and so on. + +DynamoDB + `Amazon DynamoDB`_ NoSQL database service. + + The CDN uses a single DynamoDB table which primarily contains mappings + between URIs and S3 object keys. + + For more information about the data contained here, see :ref:`schema_ref`. + +S3 + `Amazon S3`_, Simple Storage Service. + + The CDN uses S3 to store the binary objects retrievable by clients. + A single bucket is used, configured as the origin of the CloudFront CDN. + + One object corresponds to one file which can be downloaded from the CDN; + this includes files considered to be content (such as RPMs) and files considered + to be metadata (such as yum repo metadata files). + + Each object's key is its own SHA256 checksum, ensuring that content accessible + via many paths on the CDN need only be stored once. + + S3 metadata is used in some cases to customize the response behavior of each object; + for example, metadata is used to adjust ``Content-Type`` headers in responses. + Publishing tools are responsible for setting this metadata accurately. + + For more information about the data contained here, see :ref:`schema_ref`. + +cdn-lambda + A project including Python-based implementations of `Lambda@Edge`_ functions for the CDN. + + You are currently reading the documentation of this project. + +origin_request + A `Lambda@Edge`_ function connected to "origin request" events in CloudFront. + + This function is primarily responsible for translating the path given in the client's + request into an S3 object key via a DynamoDB query. Assuming the client has requested + existing content, this Lambda function will rewrite the request's URI into a valid S3 + object key before returning the request to the controller. The function itself does + not request data from S3, nor generate a response directly. + + For more information about this function's behavior, see :ref:`function_ref`. + +origin_response + A `Lambda@Edge`_ function connected to "origin response" events in CloudFront. + + This function is primarily responsible for tweaking certain response headers + before allowing CloudFront to serve the response to clients. For example, + caching behavior is influenced by setting a Cache-Control header for certain + responses. + + For more information about this function's behavior, see :ref:`function_ref`. + +publishing tools + Represents the tools used by Red Hat to publish content onto the CDN. + + These tools insert data into the CDN's S3 and DynamoDB services in order to publish + content. + + A further explanation of these tools is out of scope for this document; it suffices + to know that the tools are designed with an awareness of the CDN architecture + described here. + +.. _Lambda@Edge: https://aws.amazon.com/lambda/edge/ + +.. _Amazon CloudFront: https://aws.amazon.com/cloudfront/ + +.. _Amazon DynamoDB: https://aws.amazon.com/dynamodb/ + +.. _Amazon S3: https://aws.amazon.com/s3/ diff --git a/docs/conf.py b/docs/conf.py index 6a9655ea..179f8a34 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,6 +45,7 @@ "sphinx.ext.napoleon", "sphinx.ext.githubpages", "sphinx.ext.viewcode", + "sphinx.ext.graphviz", ] # Add any paths that contain templates here, relative to this directory. @@ -133,3 +134,4 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3", None), } +graphviz_output_format = "png" diff --git a/docs/function-reference.rst b/docs/function-reference.rst index 97f97568..12366e1f 100644 --- a/docs/function-reference.rst +++ b/docs/function-reference.rst @@ -1,3 +1,5 @@ +.. _function_ref: + Function Reference ================== diff --git a/docs/index.rst b/docs/index.rst index 7f270c09..f552938b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,5 +7,6 @@ AWS Lambda functions for Red Hat's Content Delivery Network :maxdepth: 2 :caption: Contents: + arch function-reference schema-reference diff --git a/docs/schema-reference.rst b/docs/schema-reference.rst index 4552c15a..617dda6c 100644 --- a/docs/schema-reference.rst +++ b/docs/schema-reference.rst @@ -1,3 +1,5 @@ +.. _schema_ref: + Schema Reference ================