Introduce fakefront: supporting dev server

Run `tox -e fakefront' to deploy lambdas via a local development server. This server emulates enough of the CloudFront Lambda@Edge behaviors for the exodus-lambda code to be usable. It is primarily intended to be used with the localstack container deployed via the exodus-gw dev env. Possible uses of this include: - more productive development: test local changes instantly with hot reload, don't need to do a new lambda deployment for every change - work with a fully functional exodus-rsync => exodus-gw => exodus-lambda chain locally without involving any remote services - deploy a fully functional integration test environment in openshift without having to deploy any AWS resources / incur any AWS costs
release-engineering · Mar 28, 2022 · 781bb29 · 781bb29
1 parent 9f0c897
commit 781bb29
Show file tree

Hide file tree

Showing 5 changed files with 462 additions and 0 deletions.
diff --git a/support/fakefront/__init__.py b/support/fakefront/__init__.py
@@ -0,0 +1,34 @@
+"""fakefront: run exodus-lambda in a local Cloudfront-like environment.
+
+Provides a WSGI app which can be invoked by e.g. gunicorn.
+
+Primarily intended for use against a localstack environment, whose URL
+should be set in the EXODUS_AWS_ENDPOINT_URL environment variable.
+
+While it's likely possible to run this against real AWS services also,
+your S3 bucket would have to be unsecured, which is not recommended.
+"""
+from .config import ensure_config
+
+
+def new_app():
+    # Ensure various config is in place before starting the app.
+    # This will do various things including:
+    #
+    # - generate a temporary lambda_config.json
+    # - generate a public/private key pair and populate the private
+    #   key into a secret
+    # - fill in some default values of AWS env vars to make setup
+    #   a bit easier
+    #
+    ensure_config()
+
+    # Note that import of wsgi is delayed until now because some
+    # code in exodus-lambda will read config at import time, so
+    # the import must not happen until after ensure_config().
+    from .wsgi import Wsgi
+
+    return Wsgi()
+
+
+application = new_app()
diff --git a/support/fakefront/config.py b/support/fakefront/config.py
@@ -0,0 +1,163 @@
+import atexit
+import json
+import logging
+import os
+from subprocess import check_call, check_output
+from tempfile import NamedTemporaryFile
+
+import boto3
+
+LOG = logging.getLogger("fakefront")
+
+THIS_DIR = os.path.dirname(__file__)
+MK_CONFIG = os.path.join(THIS_DIR, "../../scripts/mk-config")
+KEY_DIR = os.path.expanduser("~/.config/exodus-fakefront")
+
+
+def ensure_config_file():
+    """Ensure that a lambda_config.json file exists and is pointed to
+    by the EXODUS_LAMBDA_CONF_FILE environment variable.
+
+    A new temporary config file will be generated. The config file's
+    contents can be controlled by the environment variables listed
+    within scripts/mk-config.
+    """
+    config_file = NamedTemporaryFile(
+        mode="wt", prefix="fakefront", delete=False
+    )
+    atexit.register(os.remove, config_file.name)
+
+    config_json = check_output([MK_CONFIG], env=os.environ, text=True)
+    config_file.write(config_json)
+    config_file.flush()
+
+    LOG.info("fakefront: using config at %s", config_file.name)
+    os.environ["EXODUS_LAMBDA_CONF_FILE"] = config_file.name
+
+
+def ensure_keypair():
+    """Ensures a public, private keypair exists which can be used for
+    signing requests.
+
+    Returns the paths to the (public, private) key files.
+    """
+    os.makedirs(KEY_DIR, exist_ok=True)
+
+    public_key = os.path.join(KEY_DIR, "pubkey.pem")
+    private_key = os.path.join(KEY_DIR, "privatekey.pem")
+
+    # openssl genrsa -out private_key.pem 2048
+    if not os.path.exists(private_key):
+        check_call(["openssl", "genrsa", "-out", private_key, "2048"])
+        LOG.info("fakefront: created private key: %s", private_key)
+
+    if not os.path.exists(public_key):
+        check_call(
+            [
+                "openssl",
+                "rsa",
+                "-pubout",
+                "-in",
+                private_key,
+                "-out",
+                public_key,
+            ]
+        )
+        LOG.info("fakefront: created public key: %s", public_key)
+
+    return (public_key, private_key)
+
+
+def ensure_aws_config():
+    """Check and/or set various environment variables influencing
+    the connections between exodus-lambda & AWS.
+    """
+
+    if os.environ.get("EXODUS_AWS_ENDPOINT_URL") and not os.environ.get(
+        "EXODUS_FAKEFRONT_BUCKET_URL"
+    ):
+        # If the user has set an AWS endpoint and they haven't set any bucket URL,
+        # we'll assume they're using a localstack in the default configuration as
+        # provisioned by exodus-gw dev env, which means the bucket URL is the endpoint
+        # plus bucket name.
+        default = os.path.join(
+            os.environ["EXODUS_AWS_ENDPOINT_URL"], "my-bucket"
+        )
+        os.environ["EXODUS_FAKEFRONT_BUCKET_URL"] = default
+
+        LOG.info("fakefront: defaulted bucket URL to %s", default)
+
+    if not os.environ.get("EXODUS_FAKEFRONT_BUCKET_URL"):
+        raise RuntimeError(
+            "Must set EXODUS_AWS_ENDPOINT_URL or EXODUS_FAKEFRONT_BUCKET_URL."
+        )
+
+    if os.environ.get("EXODUS_AWS_ENDPOINT_URL"):
+        # Just to make things a bit easier in the localstack case, we'll set
+        # credentials to dummy values automatically. This can be done since the
+        # creds aren't really used, and it avoids the requirement for the
+        # caller to have a valid ~/.aws/credentials .
+        for varname in (
+            "AWS_ACCESS_KEY_ID",
+            "AWS_SECRET_ACCESS_KEY",
+            "AWS_SESSION_TOKEN",
+        ):
+            if varname not in os.environ:
+                os.environ[varname] = "dummy"
+
+
+def ensure_secret():
+    """Ensure that a secret exists holding the private_key.pem found in this
+    directory, and ensure the secret is pointed to by the EXODUS_SECRET_ARN
+    environment variable.
+    """
+    if os.environ.get("EXODUS_SECRET_ARN"):
+        # Secret is already set explicitly, don't touch anything.
+        return
+
+    if os.environ.get("EXODUS_KEY_ID") != "FAKEFRONT":
+        # Not using the fake key, don't touch anything.
+        return
+
+    # There is currently no secret defined and we're using the fake key.
+    # To make setup easier, we support creating the secret on the fly.
+    sm_client = boto3.client(
+        "secretsmanager",
+        region_name="us-east-1",
+        endpoint_url=os.environ.get("EXODUS_AWS_ENDPOINT_URL") or None,
+    )
+
+    (_, privkey) = ensure_keypair()
+
+    secret = json.dumps({"cookie_key": open(privkey).read()})
+
+    try:
+        # create it
+        arn = sm_client.create_secret(
+            Name="fakefront-key",
+            SecretString=secret,
+        )["ARN"]
+    except sm_client.exceptions.ResourceExistsException:
+        # already existed, so update it instead
+        arn = sm_client.update_secret(
+            SecretId="fakefront-key",
+            SecretString=secret,
+        )["ARN"]
+
+    os.environ["EXODUS_SECRET_ARN"] = arn
+    LOG.info("Created/updated %s from %s", arn, privkey)
+
+
+def ensure_config():
+    """Ensures various configuration is in place.
+
+    This doesn't return anything and is called only for its side-effects.
+    It should be called once, before creation of the fakefront wsgi app."""
+
+    # Set up some basic logging just during our configuration; we expect
+    # the lambda code to reconfigure loggers later on.
+    logging.basicConfig(level=logging.INFO)
+
+    ensure_aws_config()
+    ensure_secret()
+    ensure_config_file()
diff --git a/support/fakefront/lambdaio.py b/support/fakefront/lambdaio.py
@@ -0,0 +1,142 @@
+import uuid
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+from requests import Response
+
+# Helpers for dealing with lambda inputs and outputs.
+
+
+class LambdaInput:
+    """Helper for generating input event(s) to a lambda."""
+
+    def __init__(
+        self,
+        wsgi_environ: Dict[str, Any],
+        request: Optional[Dict[str, Any]] = None,
+        response: Optional[Response] = None,
+    ):
+        self._wsgi_environ = wsgi_environ
+        self._request_id = str(uuid.uuid4())
+        self._response = response
+        self.request = request or self._new_request()
+
+    def config(self, event_type: str):
+        """Returns the config element of a cloudfront event."""
+        return {
+            "distributionDomainName": self._wsgi_environ["SERVER_NAME"],
+            "distributionId": "FAKEFRONT",
+            "eventType": event_type,
+            "requestId": self._request_id,
+        }
+
+    def _new_request(self):
+        headers = {}
+        for (key, value) in self._wsgi_environ.items():
+            # e.g. HTTP_USER_AGENT => user-agent
+            if key.startswith("HTTP_"):
+                key = key[len("HTTP_") :].lower()
+                key = key.replace("_", "-")
+                headers[key] = [
+                    {
+                        "key": key,
+                        "value": value,
+                    }
+                ]
+
+        return {
+            "clientIp": self._wsgi_environ["REMOTE_ADDR"],
+            "headers": headers,
+            "method": self._wsgi_environ["REQUEST_METHOD"],
+            "querystring": self._wsgi_environ["QUERY_STRING"],
+            "uri": self._wsgi_environ["PATH_INFO"],
+        }
+
+    @property
+    def response(self):
+        """Returns the response element of a cloudfront event."""
+        # not valid to call this if there's no response passed in.
+        assert self._response
+
+        headers = {}
+        for (key, val) in self._response.headers.items():
+            # TODO: should we actually copy all of the headers from
+            # origin or should it be filtered somehow? Should investigate
+            # and copy what cloudfront does.
+            headers[key] = [{"key": key, "value": val}]
+
+        return {
+            "headers": headers,
+            "status": str(self._response.status_code),
+            "statusDescription": self._response.reason,
+        }
+
+    @property
+    def origin_request(self):
+        """Returns an origin-request event corresponding to this request."""
+        cf = {
+            "config": self.config("origin-request"),
+            "request": self.request,
+        }
+
+        return {"Records": [{"cf": cf}]}
+
+    @property
+    def origin_response(self):
+        """Returns an origin-response event corresponding to this request."""
+        cf = {
+            "config": self.config("origin-response"),
+            "request": self.request,
+            "response": self.response,
+        }
+
+        return {"Records": [{"cf": cf}]}
+
+
+class LambdaOutput:
+    """Helper for handling an output event from a lambda, possibly
+    converting it to a WSGI response.
+    """
+
+    def __init__(self, raw: Dict[str, Any]):
+        self.raw = raw
+
+    @property
+    def status(self) -> Optional[str]:
+        """Status code returned by lambda (possibly None)."""
+        return self.raw.get("status")
+
+    @property
+    def wsgi_status(self) -> str:
+        """Status string as appropriate for use in WSGI output."""
+        assert self.status
+        return f"{self.status} {self.raw.get('statusDescription', '')}"
+
+    @property
+    def wsgi_headers(self) -> List[Tuple[str, str]]:
+        """Headers returned by lambda, in the structure used by WSGI."""
+        out = []
+        for headername, headerlist in (self.raw.get("headers") or {}).items():
+            for h in headerlist:
+                out.append((h.get("key", headername), h["value"]))
+
+        return out
+
+    @property
+    def wsgi_body(self) -> Iterable[bytes]:
+        if self.raw.get("body"):
+            # FIXME: this would break if our lambda ever produces a
+            # binary response. This never currently happens though.
+            return [self.raw["body"].encode("utf-8")]
+        return []
+
+    @property
+    def uri(self) -> str:
+        return self.raw["uri"]
+
+    @property
+    def querystring(self) -> Optional[str]:
+        return self.raw.get("querystring")
+
+    @property
+    def method(self) -> str:
+        return self.raw["method"]