diff --git a/.travis.yml b/.travis.yml
index 24c9f724..983f400f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,12 @@ os: linux
language: python
+addons:
+ apt:
+ packages:
+ # for docs
+ - graphviz
+
install: pip install tox
jobs:
diff --git a/docs/arch.gv b/docs/arch.gv
new file mode 100644
index 00000000..498c43a5
--- /dev/null
+++ b/docs/arch.gv
@@ -0,0 +1,139 @@
+digraph {
+ ranksep="1.4";
+
+ # These are arranged and labelled to communicate the
+ # sequence of events when a request is processed.
+ # Try to keep them in this order.
+ client:sw -> controller [
+ xlabel=<
>
+ ]
+
+ controller:sw -> origin_request [
+ xlabel=< >
+ ]
+
+ origin_request -> db [
+ xlabel=< >,
+ dir=both
+ ]
+
+ origin_request -> controller:s [
+ xlabel=< >
+ ]
+
+ controller -> S3 [
+ xlabel=< >,
+ dir=both
+ ]
+
+ controller:se -> origin_response [
+ xlabel=< >,
+ dir=both
+ ]
+
+ controller -> client:se [
+ xlabel=< >
+ ]
+
+ # publishing tools are mentioned, but do not participate
+ # in the request processing.
+ # Connection order here is reversed to force the publishing tools to the bottom
+ # of the graph, which makes them stand out a bit more.
+ S3 -> publish_tools [dir="back"]
+ db -> publish_tools [dir="back"]
+
+ client [label="💻 client"]
+ publish_tools [label="publishing tools", style="rounded", rank="max", shape="box"]
+
+ db [
+ shape=plaintext
+ fontsize=9
+ label=<
+
+
+ | ☁ DynamoDB |
+
+ | web_uri (partition key) |
+ from_date (sort key) |
+ object_key |
+
+
+ | /content/dist/rhel/server/7/7Server/x86_64/os/Packages/t/tar-1.26-34.el7.x86_64.rpm |
+ 2020-03-26T01:07:39+00:00 |
+ 8e7750e50734f... |
+
+
+ | /content/dist/rhel/server/7/7Server/x86_64/os/Packages/z/zlib-1.2.7-18.el7.x86_64.rpm |
+ 2020-03-26T01:07:39+00:00 |
+ db8dd5164d117... |
+
+
+ | /content/dist/rhel/server/7/7Server/x86_64/os/repodata/repomd.xml |
+ 2020-03-26T01:07:39+00:00 |
+ aec070645fe53... |
+
+
+ | /content/dist/rhel/server/7/7Server/x86_64/os/repodata/repomd.xml |
+ 2020-01-22T02:07:20+00:00 |
+ 5d70f436aa013... |
+
+ | ... |
+
+ >
+ ];
+
+ S3 [
+ shape=plaintext
+ fontsize=9
+ label=<
+
+
+ | ☁ S3 |
+
+ | key |
+ object |
+ metadata |
+
+
+ | 8e7750e50734f... |
+ [blob tar-1.26-34.el7.x86_64.rpm] |
+ - |
+
+
+ | db8dd5164d117... |
+ [blob zlib-1.2.7-18.el7.x86_64.rpm] |
+ - |
+
+
+ | aec070645fe5... |
+ [blob some repomd.xml] |
+ {ContentType: application/xml} |
+
+
+ | 5d70f436aa01... |
+ [blob other repomd.xml] |
+ {ContentType: application/xml} |
+
+
+ | 49ae93732fcf... |
+ [blob some primary.sqlite.bz2] |
+ {ContentType: application/x-bzip2} |
+
+ | ... |
+
+ >
+ ];
+
+ subgraph cluster_0 {
+ label=< 🖧 CloudFront CDN >
+ style="rounded";
+ controller;
+ subgraph cluster_1 {
+ label=<cdn-lambda>;
+ style="dashed";
+ rank=same
+ origin_request;
+ origin_response;
+ }
+ }
+}
diff --git a/docs/arch.rst b/docs/arch.rst
new file mode 100644
index 00000000..691e9c32
--- /dev/null
+++ b/docs/arch.rst
@@ -0,0 +1,111 @@
+Architecture
+============
+
+
+Overview
+--------
+
+This diagram shows the relationship between all major components used
+in the delivery of content via the CDN.
+
+.. graphviz:: arch.gv
+
+- Numbered connections represent the sequence of events when the CDN processes a request.
+- For clarity, SHA256 checksums have been truncated (as in ``8e7750e50734f...``). In reality,
+ the system stores complete checksums.
+- The CloudFront CDN shown in the above diagram may itself be hosted behind another CDN,
+ so client requests may pass through additional layers not expressed here.
+
+
+Components
+----------
+
+client
+ A client requesting data from the CDN.
+
+ This could be ``dnf``, ``yum``, Satellite, ``curl``, a web browser, etc.
+
+CloudFront CDN
+ The `Amazon CloudFront`_ content delivery network.
+
+controller
+ An abstract component representing the built-in behaviors of CloudFront,
+ such as:
+
+ - basic HTTP request handling
+ - serving responses from cache
+ - invoking Lambda functions
+ - delegating requests to S3
+
+ ...and so on.
+
+DynamoDB
+ `Amazon DynamoDB`_ NoSQL database service.
+
+ The CDN uses a single DynamoDB table which primarily contains mappings
+ between URIs and S3 object keys.
+
+ For more information about the data contained here, see :ref:`schema_ref`.
+
+S3
+ `Amazon S3`_, Simple Storage Service.
+
+ The CDN uses S3 to store the binary objects retrievable by clients.
+ A single bucket is used, configured as the origin of the CloudFront CDN.
+
+ One object corresponds to one file which can be downloaded from the CDN;
+ this includes files considered to be content (such as RPMs) and files considered
+ to be metadata (such as yum repo metadata files).
+
+ Each object's key is its own SHA256 checksum, ensuring that content accessible
+ via many paths on the CDN need only be stored once.
+
+ S3 metadata is used in some cases to customize the response behavior of each object;
+ for example, metadata is used to adjust ``Content-Type`` headers in responses.
+ Publishing tools are responsible for setting this metadata accurately.
+
+ For more information about the data contained here, see :ref:`schema_ref`.
+
+cdn-lambda
+ A project including Python-based implementations of `Lambda@Edge`_ functions for the CDN.
+
+ You are currently reading the documentation of this project.
+
+origin_request
+ A `Lambda@Edge`_ function connected to "origin request" events in CloudFront.
+
+ This function is primarily responsible for translating the path given in the client's
+ request into an S3 object key via a DynamoDB query. Assuming the client has requested
+ existing content, this Lambda function will rewrite the request's URI into a valid S3
+ object key before returning the request to the controller. The function itself does
+ not request data from S3, nor generate a response directly.
+
+ For more information about this function's behavior, see :ref:`function_ref`.
+
+origin_response
+ A `Lambda@Edge`_ function connected to "origin response" events in CloudFront.
+
+ This function is primarily responsible for tweaking certain response headers
+ before allowing CloudFront to serve the response to clients. For example,
+ caching behavior is influenced by setting a Cache-Control header for certain
+ responses.
+
+ For more information about this function's behavior, see :ref:`function_ref`.
+
+publishing tools
+ Represents the tools used by Red Hat to publish content onto the CDN.
+
+ These tools insert data into the CDN's S3 and DynamoDB services in order to publish
+ content.
+
+ A further explanation of these tools is out of scope for this document; it suffices
+ to know that the tools are designed with an awareness of the CDN architecture
+ described here.
+
+.. _Lambda@Edge: https://aws.amazon.com/lambda/edge/
+
+.. _Amazon CloudFront: https://aws.amazon.com/cloudfront/
+
+.. _Amazon DynamoDB: https://aws.amazon.com/dynamodb/
+
+.. _Amazon S3: https://aws.amazon.com/s3/
diff --git a/docs/conf.py b/docs/conf.py
index 6a9655ea..179f8a34 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -45,6 +45,7 @@
"sphinx.ext.napoleon",
"sphinx.ext.githubpages",
"sphinx.ext.viewcode",
+ "sphinx.ext.graphviz",
]
# Add any paths that contain templates here, relative to this directory.
@@ -133,3 +134,4 @@
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
}
+graphviz_output_format = "png"
diff --git a/docs/function-reference.rst b/docs/function-reference.rst
index 97f97568..12366e1f 100644
--- a/docs/function-reference.rst
+++ b/docs/function-reference.rst
@@ -1,3 +1,5 @@
+.. _function_ref:
+
Function Reference
==================
diff --git a/docs/index.rst b/docs/index.rst
index 7f270c09..f552938b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,5 +7,6 @@ AWS Lambda functions for Red Hat's Content Delivery Network
:maxdepth: 2
:caption: Contents:
+ arch
function-reference
schema-reference
diff --git a/docs/schema-reference.rst b/docs/schema-reference.rst
index 4552c15a..617dda6c 100644
--- a/docs/schema-reference.rst
+++ b/docs/schema-reference.rst
@@ -1,3 +1,5 @@
+.. _schema_ref:
+
Schema Reference
================