From 5f2e1ba3c4c125c0d166ff0e8f603bed2217b1f2 Mon Sep 17 00:00:00 2001
From: Rohan McGovern <rmcgover@redhat.com>
Date: Mon, 3 Aug 2020 15:32:31 +1000
Subject: [PATCH] Introduce an authentication & authorization scheme

This commit introduces the exodus_gw.auth module containing
helpers for ensuring that the current request is authenticated
and determining the associated roles.

It is designed to integrate with an existing authentication scheme
shared by various services. In summary, it works as follows:

- exodus-gw itself does not authenticate requests

- exodus-gw should be deployed behind a reverse-proxy which performs
  authentication

- the authenticating proxy will add a json-in-base64 header to
  incoming requests, of the form (abbreviated):

  {
    "user": {
      "roles": ["some-role"],
      "authenticated": true,
      "internalUsername": "some-user"
    }
  }

- exodus-gw parses and trusts this header and uses it to decide
  whether or not the current request is authorized.

Thus, exodus-gw supports any authentication & authorization scheme
implemented by the reverse proxy. In practice, mutual TLS is
expected to be used.

This commit adds authentication functionality, but none of the
existing endpoints require authentication yet, so it's only used
from autotests.

Other concepts introduced by this commit include:

- usage of the fastapi dependency injection framework (Depends).

- usage of pydantic settings, which allows all settings to be
  overridden by environment variables (12-factor app style).

Things which could be added here but are instead planned for
later commits include:

- documentation on the authentication scheme and the settings.
  This appears to fit best in some kind of deployment guide doc,
  which doesn't exist yet.

- requiring authentication on the upload API. I think each
  target environment for uploads should have a separate role, so
  it requires implementing support for multiple environments
  first.
---
 .pylintrc                           |  5 ++
 exodus_gw/auth.py                   | 88 +++++++++++++++++++++++++++++
 exodus_gw/settings.py               | 32 +++++++++++
 tests/auth/test_call_context.py     | 86 ++++++++++++++++++++++++++++
 tests/auth/test_roles.py            | 52 +++++++++++++++++
 tests/settings/test_get_settings.py | 31 ++++++++++
 6 files changed, 294 insertions(+)
 create mode 100644 exodus_gw/auth.py
 create mode 100644 exodus_gw/settings.py
 create mode 100644 tests/auth/test_call_context.py
 create mode 100644 tests/auth/test_roles.py
 create mode 100644 tests/settings/test_get_settings.py

diff --git a/.pylintrc b/.pylintrc
index 5d6cd817..af2efbca 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,4 +1,9 @@
 [MESSAGES CONTROL]
+
+# For unknown reasons, pylint otherwise wrongly reports:
+# E0611: No name 'BaseModel' in module 'pydantic' (no-name-in-module)
+extension-pkg-whitelist=pydantic
+
 disable=print-statement,
         parameter-unpacking,
         unpacking-in-except,
diff --git a/exodus_gw/auth.py b/exodus_gw/auth.py
new file mode 100644
index 00000000..3c792dde
--- /dev/null
+++ b/exodus_gw/auth.py
@@ -0,0 +1,88 @@
+import base64
+import logging
+from typing import List, Set, Optional
+
+from fastapi import Request, Depends, HTTPException
+from pydantic import BaseModel
+
+from .settings import Settings, get_settings
+
+LOG = logging.getLogger("exodus-gw")
+
+
+class ClientContext(BaseModel):
+    """Call context data relating to service accounts / machine users."""
+
+    roles: List[str] = []
+    authenticated: bool = False
+    serviceAccountId: Optional[str] = None
+
+
+class UserContext(BaseModel):
+    """Call context data relating to human users."""
+
+    roles: List[str] = []
+    authenticated: bool = False
+    internalUsername: Optional[str] = None
+
+
+class CallContext(BaseModel):
+    """Represents an authenticated (or not) context for an incoming request.
+
+    Use the fields on this model to decide whether the current request belongs
+    to an authenticated user, and if so, to determine which role(s) are held
+    by the user.
+    """
+
+    client: ClientContext = ClientContext()
+    user: UserContext = UserContext()
+
+
+def call_context(
+    request: Request, settings: Settings = Depends(get_settings)
+) -> CallContext:
+    """Returns the CallContext for the current request."""
+
+    header = settings.call_context_header
+    header_value = request.headers.get(header)
+    if not header_value:
+        return CallContext()
+
+    try:
+        decoded = base64.b64decode(header_value, validate=True)
+        return CallContext.parse_raw(decoded)
+    except Exception:
+        summary = "Invalid %s header in request" % header
+        LOG.exception(summary)
+        raise HTTPException(400, detail=summary)
+
+
+def caller_roles(context: CallContext = Depends(call_context)) -> Set[str]:
+    """Returns all roles held by the caller of the current request.
+
+    This will be an empty set for unauthenticated requests.
+    """
+    return set(context.user.roles + context.client.roles)
+
+
+def needs_role(rolename):
+    """Returns a dependency on a specific named role.
+
+    This function is intended to be used with "dependencies" on endpoints in
+    order to associate them with specific roles. Requests to that endpoint will
+    fail unless the caller is authenticated as a user having that role.
+
+    For example:
+
+    > @app.post('/my-great-api/frobnitz', dependencies=[needs_role("xyz")])
+    > def do_frobnitz():
+    >    "If caller does not have role xyz, they will never get here."
+    """
+
+    def check_roles(roles: Set[str] = Depends(caller_roles)):
+        if rolename not in roles:
+            raise HTTPException(
+                403, "this operation requires role '%s'" % rolename
+            )
+
+    return Depends(check_roles)
diff --git a/exodus_gw/settings.py b/exodus_gw/settings.py
new file mode 100644
index 00000000..ee214697
--- /dev/null
+++ b/exodus_gw/settings.py
@@ -0,0 +1,32 @@
+from functools import lru_cache
+
+from pydantic import BaseSettings
+
+
+class Settings(BaseSettings):
+    """Settings for the server.
+
+    Each setting defined here can be overridden by an environment variable
+    of the same name, prefixed with "EXODUS_GW_".
+    """
+
+    call_context_header: str = "X-RhApiPlatform-CallContext"
+    """Name of the header from which to extract call context (for authentication
+    and authorization).
+    """
+
+    class Config:
+        env_prefix = "exodus_gw_"
+
+
+@lru_cache()
+def get_settings() -> Settings:
+    """Return the currently active settings for the server.
+
+    This function is intended for use with fastapi.Depends.
+
+    Settings are loaded the first time this function is called, and cached
+    afterward.
+    """
+
+    return Settings()
diff --git a/tests/auth/test_call_context.py b/tests/auth/test_call_context.py
new file mode 100644
index 00000000..82afe600
--- /dev/null
+++ b/tests/auth/test_call_context.py
@@ -0,0 +1,86 @@
+import json
+import base64
+
+import mock
+import pytest
+
+from fastapi import HTTPException
+from exodus_gw.auth import call_context
+from exodus_gw.settings import Settings
+
+
+def test_no_context():
+    """Unauthenticated requests return a default context."""
+
+    request = mock.Mock(headers={})
+    ctx = call_context(request, Settings())
+
+    # It should return a truthy object.
+    assert ctx
+
+    # It should have no roles.
+    assert not ctx.client.roles
+    assert not ctx.user.roles
+
+    # It should not be authenticated.
+    assert not ctx.client.authenticated
+    assert not ctx.user.authenticated
+
+
+def test_decode_context():
+    """A context can be decoded from a valid header."""
+
+    raw_context = {
+        "client": {
+            "roles": ["someRole", "anotherRole"],
+            "authenticated": True,
+            "serviceAccountId": "clientappname",
+        },
+        "user": {
+            "roles": ["viewer"],
+            "authenticated": True,
+            "internalUsername": "greatUser",
+        },
+    }
+    b64 = base64.b64encode(json.dumps(raw_context).encode("utf-8"))
+
+    settings = Settings(call_context_header="my-auth-header")
+    request = mock.Mock(headers={"my-auth-header": b64})
+
+    ctx = call_context(request=request, settings=settings)
+
+    # The details should match exactly the encoded data from the header.
+    assert ctx.client.roles == ["someRole", "anotherRole"]
+    assert ctx.client.authenticated
+    assert ctx.client.serviceAccountId == "clientappname"
+
+    assert ctx.user.roles == ["viewer"]
+    assert ctx.user.authenticated
+    assert ctx.user.internalUsername == "greatUser"
+
+
+@pytest.mark.parametrize(
+    "header_value",
+    [
+        # not valid base64
+        "oops not valid",
+        # valid base64, but not valid JSON
+        base64.b64encode(b"oops not JSON"),
+        # valid base64, valid JSON, but wrong structure
+        base64.b64encode(b'["oops schema mismatch]'),
+    ],
+)
+def test_bad_header(header_value):
+    """If header does not contain valid content, a meaningful error is raised."""
+
+    settings = Settings(call_context_header="my-auth-header")
+    request = mock.Mock(headers={"my-auth-header": header_value})
+
+    with pytest.raises(HTTPException) as exc_info:
+        call_context(request=request, settings=settings)
+
+    # It should give a 400 error (client error)
+    assert exc_info.value.status_code == 400
+
+    # It should give some hint as to what the problem is
+    assert exc_info.value.detail == "Invalid my-auth-header header in request"
diff --git a/tests/auth/test_roles.py b/tests/auth/test_roles.py
new file mode 100644
index 00000000..e890a32f
--- /dev/null
+++ b/tests/auth/test_roles.py
@@ -0,0 +1,52 @@
+import pytest
+
+from fastapi import HTTPException
+
+from exodus_gw.auth import (
+    CallContext,
+    ClientContext,
+    UserContext,
+    caller_roles,
+    needs_role,
+)
+
+
+def test_caller_roles_empty():
+    """caller_roles returns an empty set for a default (empty) context."""
+
+    assert caller_roles(CallContext()) == set()
+
+
+def test_caller_roles_nonempty():
+    """caller_roles returns all roles from the context when present."""
+
+    ctx = CallContext(
+        user=UserContext(roles=["role1", "role2"]),
+        client=ClientContext(roles=["role2", "role3"]),
+    )
+    assert caller_roles(ctx) == set(["role1", "role2", "role3"])
+
+
+def test_needs_role_success():
+    """needs_role succeeds when needed role is present."""
+
+    fn = needs_role("better-role").dependency
+
+    # It should do nothing, successfully
+    fn(roles=set(["better-role"]))
+
+
+def test_needs_role_fail():
+    """needs_role raises meaningful error when needed role is absent."""
+
+    fn = needs_role("best-role").dependency
+
+    # It should raise an exception.
+    with pytest.raises(HTTPException) as exc_info:
+        fn(roles=set(["abc", "xyz"]))
+
+    # It should use status 403 to tell the client they are unauthorized.
+    assert exc_info.value.status_code == 403
+
+    # It should give some hint as to the needed role.
+    assert exc_info.value.detail == "this operation requires role 'best-role'"
diff --git a/tests/settings/test_get_settings.py b/tests/settings/test_get_settings.py
new file mode 100644
index 00000000..432137c6
--- /dev/null
+++ b/tests/settings/test_get_settings.py
@@ -0,0 +1,31 @@
+from exodus_gw.settings import get_settings
+
+
+# Note: get_settings is wrapped in lru_cache.
+# During tests, we want to test the real original function
+# without caching, so we grab a reference to it here.
+get_settings = get_settings.__wrapped__
+
+
+def test_get_settings_default():
+    """get_settings returns an object with default settings present."""
+
+    settings = get_settings()
+
+    assert settings.call_context_header == "X-RhApiPlatform-CallContext"
+
+
+def test_get_settings_override(monkeypatch):
+    """get_settings values can be overridden by environment variables.
+
+    This test shows/proves that the pydantic BaseSettings environment variable
+    parsing feature is generally working. It is not necessary to add similar
+    tests for every value in settings.
+    """
+
+    monkeypatch.setenv("EXODUS_GW_CALL_CONTEXT_HEADER", "my-awesome-header")
+
+    settings = get_settings()
+
+    # It should have used the value from environment.
+    assert settings.call_context_header == "my-awesome-header"