diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 341c5eb6..29c2538c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,8 +56,8 @@ jobs: - name: Downgrade to oldest dependencies if: matrix.oldest_dependencies != '' - # take any dependencies in requirements.txt such as jupyterhub>=1.2 and - # transform them to jupyterhub==1.2 so we can run tests with the + # take any dependencies in requirements.txt such as jupyterhub>=2.2 and + # transform them to jupyterhub==2.2 so we can run tests with the # earliest-supported versions run: | cat requirements.txt | grep '>=' | sed -e 's@>=@==@g' > oldest-requirements.txt @@ -69,4 +69,4 @@ jobs: pytest # GitHub action reference: https://github.com/codecov/codecov-action - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5706f2ed..d297dab9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: # Autoformat: Python code - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.1.1 hooks: - id: black @@ -64,7 +64,7 @@ repos: # Lint: Python code - repo: https://github.com/pycqa/flake8 - rev: "6.1.0" + rev: "7.0.0" hooks: - id: flake8 diff --git a/docs/source/how-to/example-oauthenticator.py b/docs/source/how-to/example-oauthenticator.py index 4fd44336..c139ee14 100644 --- a/docs/source/how-to/example-oauthenticator.py +++ b/docs/source/how-to/example-oauthenticator.py @@ -1,6 +1,7 @@ """ Example OAuthenticator to use with My Service """ + from jupyterhub.auth import LocalAuthenticator from oauthenticator.oauth2 import OAuthenticator, OAuthLoginHandler diff --git a/docs/source/index.md b/docs/source/index.md index e3663520..dbd98d2c 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -47,6 +47,7 @@ Topic guides go more in-depth on a particular topic. :maxdepth: 2 :caption: Topic guides +topic/allowing topic/extending ``` diff --git a/docs/source/reference/changelog.md b/docs/source/reference/changelog.md index 543db60f..8bc1527f 100644 --- a/docs/source/reference/changelog.md +++ b/docs/source/reference/changelog.md @@ -6,6 +6,48 @@ command line for details. ## [Unreleased] +### 16.3.0 - 2024-03-20 + +```{important} +This release includes a security patch for {attr}`.GoogleOAuthenticator.hosted_domain`, +see [GHSA-55m3-44xf-hg4h] for details. + +[GHSA-55m3-44xf-hg4h]: https://github.com/jupyterhub/oauthenticator/security/advisories/GHSA-55m3-44xf-hg4h +``` + +OAuthenticator now requires JupyterHub >=2.2. + +([full changelog](https://github.com/jupyterhub/oauthenticator/compare/16.2.1...16.3.0)) + +#### New features added + +- [All] Add `userdata_from_id_token` as alternative to `userdata_url` [#725](https://github.com/jupyterhub/oauthenticator/pull/725) ([@benjimin](https://github.com/benjimin), [@minrk](https://github.com/minrk), [@consideRatio](https://github.com/consideRatio), [@manics](https://github.com/manics)) +- [All] Make `username_claim` callable (except for CILogon), like it has been in Generic [#717](https://github.com/jupyterhub/oauthenticator/pull/717) ([@yuvipanda](https://github.com/yuvipanda), [@consideRatio](https://github.com/consideRatio), [@GeorgianaElena](https://github.com/GeorgianaElena), [@manics](https://github.com/manics)) +- [AzureAD] Support `manage_groups` [#710](https://github.com/jupyterhub/oauthenticator/pull/710) ([@minrk](https://github.com/minrk), [@yuvipanda](https://github.com/yuvipanda), [@GeorgianaElena](https://github.com/GeorgianaElena)) +- [Generic] Add support for `manage_groups` [#708](https://github.com/jupyterhub/oauthenticator/pull/708) ([@benjimin](https://github.com/benjimin), [@manics](https://github.com/manics), [@minrk](https://github.com/minrk), [@yuvipanda](https://github.com/yuvipanda)) +- [AzureAD] Add an implementation for `Authenticator.manage_groups=True` [#573](https://github.com/jupyterhub/oauthenticator/pull/573) ([@thomafred](https://github.com/thomafred), [@yuvipanda](https://github.com/yuvipanda), [@GeorgianaElena](https://github.com/GeorgianaElena)) + +#### Maintenance and upkeep improvements + +- test: simplify by removing `token_request_style` parameter for mock function [#734](https://github.com/jupyterhub/oauthenticator/pull/734) ([@consideRatio](https://github.com/consideRatio), [@manics](https://github.com/manics)) +- Require jupyterhub>=2.2 [#720](https://github.com/jupyterhub/oauthenticator/pull/720) ([@manics](https://github.com/manics), [@yuvipanda](https://github.com/yuvipanda), [@consideRatio](https://github.com/consideRatio)) +- temporary pin for pytest-asyncio [#715](https://github.com/jupyterhub/oauthenticator/pull/715) ([@minrk](https://github.com/minrk), [@consideRatio](https://github.com/consideRatio)) + +#### Documentation improvements + +- add example for deploying with mock-oauth2-server [#730](https://github.com/jupyterhub/oauthenticator/pull/730) ([@minrk](https://github.com/minrk), [@consideRatio](https://github.com/consideRatio)) +- add dedicated doc on details of allowing access [#729](https://github.com/jupyterhub/oauthenticator/pull/729) ([@minrk](https://github.com/minrk), [@GeorgianaElena](https://github.com/GeorgianaElena), [@consideRatio](https://github.com/consideRatio), [@manics](https://github.com/manics)) +- clarify what claim_groups_key is used for [#707](https://github.com/jupyterhub/oauthenticator/pull/707) ([@minrk](https://github.com/minrk), [@consideRatio](https://github.com/consideRatio)) + +#### Contributors to this release + +The following people contributed discussions, new ideas, code and documentation contributions, and review. +See [our definition of contributors](https://github-activity.readthedocs.io/en/latest/#how-does-this-tool-define-contributions-in-the-reports). + +([GitHub contributors page for this release](https://github.com/jupyterhub/oauthenticator/graphs/contributors?from=2023-11-27&to=2024-03-20&type=c)) + +@benjimin ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Abenjimin+updated%3A2023-11-27..2024-03-20&type=Issues)) | @consideRatio ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3AconsideRatio+updated%3A2023-11-27..2024-03-20&type=Issues)) | @GeorgianaElena ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3AGeorgianaElena+updated%3A2023-11-27..2024-03-20&type=Issues)) | @krassowski ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Akrassowski+updated%3A2023-11-27..2024-03-20&type=Issues)) | @manics ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Amanics+updated%3A2023-11-27..2024-03-20&type=Issues)) | @minrk ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Aminrk+updated%3A2023-11-27..2024-03-20&type=Issues)) | @thomafred ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Athomafred+updated%3A2023-11-27..2024-03-20&type=Issues)) | @yuvipanda ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Foauthenticator+involves%3Ayuvipanda+updated%3A2023-11-27..2024-03-20&type=Issues)) + ## 16.2 ### [16.2.1] - 2023-11-27 diff --git a/docs/source/topic/allowing.md b/docs/source/topic/allowing.md new file mode 100644 index 00000000..0d88b7ce --- /dev/null +++ b/docs/source/topic/allowing.md @@ -0,0 +1,142 @@ +(allowing)= + +# Allowing access to your JupyterHub + +OAuthenticator is about deferring **authentication** to an external source, +assuming your users all have accounts _somewhere_. +But many of these sources (e.g. Google, GitHub) have _lots_ of users, and you don't want _all_ of them to be able to use your hub. +This is where **authorization** comes in. + +In OAuthenticator, authorization is represented via configuration options that start with `allow` or `block`. + +There are also lots of OAuth providers, and as a result, lots of ways to tell OAuthenticator who should be allowed to access your hub. + +## Default behavior: nobody is allowed! + +The default behavior of OAuthenticator (starting with version 16) is to block all users unless explicitly authorized via _some_ `allow` configuration. +If you want anyone to be able to use your hub, you must specify at least one `allow` configuration. + +```{versionchanged} 16 +Prior to OAuthenticator 16, `allow_all` was _implied_ if no other `allow` configuration was specified. +Starting from 16, `allow_all` can only be enabled explicitly. +``` + +## Allowing access + +There are several `allow_` configuration options, to grant access to users according to different rules. + +When you have only one `allow` configuration, the behavior is generally unambiguous: anyone allowed by the rule can login to the Hub, while anyone not explicitly allowed cannot login. +However, once you start adding additional `allow` configuration, there is some ambiguity in how multiple rules are combined. + +```{important} +Additional allow rules **can only grant access**, meaning they only _expand_ who has access to your hub. +Adding an `allow` rule cannot prevent access granted by another `allow` rule. +To block access, use `block` configuration. +``` + +That is, if a user is granted access by _any_ `allow` configuration, they are allowed. +An allow rule cannot _exclude_ access granted by another `allow` rule. + +An example: + +```python +c.GitHubOAuthenticator.allowed_users = {"mensah", "art"} +c.GitHubOAuthenticator.allowed_organizations = {"preservation"} +``` + +means that the users `mensah` and `art` are allowed, _and_ any member of the `preservation` organization are allowed. +Any user that doesn't meet any of the allow rules will not be allowed. + +| user | allowed | reason | +| ----- | ------- | ------------------------------------------------------- | +| art | True | in `allowed_users` | +| amena | True | member of `preservation` | +| tlacy | False | not in `allowed_users` and not member of `preservation` | + +### `allow_all` + +The first and simplest way to allow access is to any user who can successfully authenticate: + +```python +c.OAuthenticator.allow_all = True +``` + +This is appropriate when you use an authentication provider (e.g. an institutional single-sign-on provider), where everyone who has an account in the provider should have access to your Hub. +It may also be appropriate for unadvertised short-lived hubs, e.g. dedicated hubs for workshops that will be shutdown after a day, where you may decide it is acceptable to allow anyone who finds your hub to login. + +If `allow_all` is enabled, no other `allow` configuration will have any effect. + +```{seealso} +Configuration documentation for {attr}`.OAuthenticator.allow_all` +``` + +### `allowed_users` + +This is top-level JupyterHub configuration, shared by all Authenticators. +This specifies a list of users that are allowed by name. +This is the simplest authorization mechanism when you have a small group of users whose usernames you know: + +```python +c.OAuthenticator.allowed_users = {"mensah", "ratthi"} +``` + +If this is your only configuration, only these users will be allowed, no others. + +Note that any additional usernames in the deprecated `admin_users` configuration will also be allowed to login. + +```{seealso} +Configuration documentation for {attr}`.OAuthenticator.allowed_users` +``` + +### `allow_existing_users` + +JupyterHub can allow you to add and remove users while the Hub is running via the admin page. +If you add or remove users this way, they will be added to the JupyterHub database, but they will not be able to login unless they are also granted access via an `allow` rule. + +To enable managing users via the admin panel, set + +```python +c.OAuthenticator.allow_existing_users = True +``` + +```{warning} +Enabling `allow_existing_users` means that _removing_ users from any explicit allow mechanisms will no longer revoke their access. +Once the user has been added to the database, the only way to revoke their access to the hub is to remove the user from JupyterHub entirely, via the admin page. +``` + +```{seealso} +Configuration documentation for {attr}`.OAuthenticator.allow_existing_users` +``` + +### Provider-specific rules + +Each OAuthenticator provider may have its own provider-specific rules to allow groups of users access, such as: + +- {attr}`.GitHubOAuthenticator.allowed_organizations` +- {attr}`.GitLabOAuthenticator.allowed_gitlab_groups` +- {attr}`.GlobusOAuthenticator.allowed_globus_groups` +- {attr}`.GoogleOAuthenticator.allowed_google_groups` + +## Blocking Access + +It's possible that you want to limit who has access to your Hub to less than all of the users granted access by your `allow` configuration. +`block` configuration always has higher priority than `allow` configuration, so if a user is both allowed _and_ blocked, they will not be able to login. + +The only `block` configuration is the base Authenticators `block_users`, +a set of usernames that will not be allowed to login. + +### Revoking previously-allowed access + +Any users who have logged in previously will be present in the JupyterHub database. +Removing a user's login permissions (e.g. removing them from a GitLab project when using {attr}`.GitLabOAuthenticator.project_ids`) only prevents future logins; +it does not remove the user from the JupyterHub database. +This means that: + +1. any API tokens that the user still has access to will continue to be valid, and can continue to be used +2. any still-valid browser sessions will continue to be logged in. + +```{important} +To fully remove a user's access to JupyterHub, +their login permission must be revoked _and_ their user fully deleted from the Hub, +e.g. via the admin page. +``` diff --git a/examples/auth_state/jupyterhub_config.py b/examples/auth_state/jupyterhub_config.py index dbb143d3..2852361f 100644 --- a/examples/auth_state/jupyterhub_config.py +++ b/examples/auth_state/jupyterhub_config.py @@ -5,6 +5,7 @@ 2. pass select auth_state to Spawner via environment variables 3. enable auth_state via `JUPYTERHUB_CRYPT_KEY` and `enable_auth_state = True` """ + import os import pprint import warnings diff --git a/examples/mock-provider/README.md b/examples/mock-provider/README.md new file mode 100644 index 00000000..8e022e19 --- /dev/null +++ b/examples/mock-provider/README.md @@ -0,0 +1,21 @@ +# Generic OAuth with mock provider + +This example uses [mock-oauth2-server] to launch a standalone local OAuth2 provider and configures GenericOAuthenticator to use it. + +mock-auth2-server implements OpenID Connect (OIDC), and can be used to test GenericOAuthenticator configurations for use with OIDC providers without needing to register your application with a real OAuth provider. + +[mock-oauth2-server]: https://github.com/navikt/mock-oauth2-server + +To launch the oauth provider in a container: + +``` +docker run --rm -it -p 127.0.0.1:8080:8080 ghcr.io/navikt/mock-oauth2-server:2.1.1 +``` + +Then launch JupyterHub: + +``` +jupyterhub +``` + +When you login, you will be presented with a form allowing you to specify the username, and (optionally) any additional fields that should be present in the `userinfo` response. diff --git a/examples/mock-provider/jupyterhub_config.py b/examples/mock-provider/jupyterhub_config.py new file mode 100644 index 00000000..60c9f09a --- /dev/null +++ b/examples/mock-provider/jupyterhub_config.py @@ -0,0 +1,27 @@ +c = get_config() # noqa + +c.JupyterHub.authenticator_class = "generic-oauth" + +# assumes oauth provider run with: +# docker run --rm -it -p 127.0.0.1:8080:8080 ghcr.io/navikt/mock-oauth2-server:2.1.1 + +provider = "http://127.0.0.1:8080/default" +c.GenericOAuthenticator.authorize_url = f"{provider}/authorize" +c.GenericOAuthenticator.token_url = f"{provider}/token" +c.GenericOAuthenticator.userdata_url = f"{provider}/userinfo" +c.GenericOAuthenticator.scope = ["openid", "somescope", "otherscope"] + +# these are the defaults. They can be configured at http://localhost:8080/default/debugger +c.GenericOAuthenticator.client_id = "debugger" +c.GenericOAuthenticator.client_secret = "someSecret" + +# 'sub' is the first field in the login form +c.GenericOAuthenticator.username_claim = "sub" + +c.GenericOAuthenticator.allow_all = True +c.GenericOAuthenticator.admin_users = {"admin"} + +# demo boilerplate +c.JupyterHub.default_url = "/hub/home" +c.JupyterHub.spawner_class = "simple" +c.JupyterHub.ip = "127.0.0.1" diff --git a/oauthenticator/_version.py b/oauthenticator/_version.py index 01d20b3a..4716f12f 100644 --- a/oauthenticator/_version.py +++ b/oauthenticator/_version.py @@ -1,7 +1,7 @@ # __version__ should be updated using tbump, based on configuration in # pyproject.toml, according to instructions in RELEASE.md. # -__version__ = "16.2.2.dev" +__version__ = "16.3.1.dev" # version_info looks like (1, 2, 3, "dev") if __version__ is 1.2.3.dev version_info = tuple(int(p) if p.isdigit() else p for p in __version__.split(".")) diff --git a/oauthenticator/auth0.py b/oauthenticator/auth0.py index 31b2fa27..b9fa2157 100644 --- a/oauthenticator/auth0.py +++ b/oauthenticator/auth0.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with Auth0 as an identity provider. """ + import os from jupyterhub.auth import LocalAuthenticator diff --git a/oauthenticator/azuread.py b/oauthenticator/azuread.py index 48359864..e4e6682b 100644 --- a/oauthenticator/azuread.py +++ b/oauthenticator/azuread.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with Azure AD as an identity provider. """ + import os import jwt diff --git a/oauthenticator/bitbucket.py b/oauthenticator/bitbucket.py index 8221bbfa..6a836f2e 100644 --- a/oauthenticator/bitbucket.py +++ b/oauthenticator/bitbucket.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with Bitbucket as an identity provider. """ + import os from jupyterhub.auth import LocalAuthenticator diff --git a/oauthenticator/cilogon.py b/oauthenticator/cilogon.py index b11739fc..bf08f14e 100644 --- a/oauthenticator/cilogon.py +++ b/oauthenticator/cilogon.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with CILogon as an identity provider. """ + import os from fnmatch import fnmatch from urllib.parse import urlparse diff --git a/oauthenticator/generic.py b/oauthenticator/generic.py index 31945d6a..2fd07be7 100644 --- a/oauthenticator/generic.py +++ b/oauthenticator/generic.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with any OAuth2 based identity provider. """ + import os from functools import reduce @@ -27,8 +28,8 @@ def _login_service_default(self): that accepts the returned json (as a dict) and returns the groups list. This configures how group membership in the upstream provider is determined - for use by `allowed_groups`, `admin_groups`, etc. - It has no effect on its own, and is not related to users' _JupyterHub_ group membership. + for use by `allowed_groups`, `admin_groups`, etc. If `manage_groups` is True, + this will also determine users' _JupyterHub_ group membership. """, ) @@ -60,20 +61,6 @@ def _login_service_default(self): """, ) - username_claim = Union( - [Unicode(os.environ.get('OAUTH2_USERNAME_KEY', 'username')), Callable()], - config=True, - help=""" - When `userdata_url` returns a json response, the username will be taken - from this key. - - Can be a string key name or a callable that accepts the returned - userdata json (as a dict) and returns the username. The callable is - useful e.g. for extracting the username from a nested object in the - response. - """, - ) - @default("http_client") def _default_http_client(self): return AsyncHTTPClient( @@ -113,17 +100,6 @@ def _default_http_client(self): """, ) - def user_info_to_username(self, user_info): - """ - Overrides OAuthenticator.user_info_to_username to support the - GenericOAuthenticator unique feature of allowing username_claim to be a - callable function. - """ - if callable(self.username_claim): - return self.username_claim(user_info) - else: - return super().user_info_to_username(user_info) - def get_user_groups(self, user_info): """ Returns a set of groups the user belongs to based on claim_groups_key @@ -154,15 +130,22 @@ async def update_auth_model(self, auth_model): the user isn't part of `admin_users` or `admin_groups`. Note that leaving it at None makes users able to retain an admin status while setting it to False makes it be revoked. + + Also populates groups if `manage_groups` is set. """ + if self.manage_groups or self.admin_groups: + user_info = auth_model["auth_state"][self.user_auth_state_key] + user_groups = self.get_user_groups(user_info) + + if self.manage_groups: + auth_model["groups"] = sorted(user_groups) + if auth_model["admin"]: # auth_model["admin"] being True means the user was in admin_users return auth_model if self.admin_groups: # admin status should in this case be True or False, not None - user_info = auth_model["auth_state"][self.user_auth_state_key] - user_groups = self.get_user_groups(user_info) auth_model["admin"] = bool(user_groups & self.admin_groups) return auth_model diff --git a/oauthenticator/github.py b/oauthenticator/github.py index 29535e81..fcc63b3b 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with GitHub as an identity provider. """ + import json import os import warnings diff --git a/oauthenticator/gitlab.py b/oauthenticator/gitlab.py index 1d32fe34..dde71b15 100644 --- a/oauthenticator/gitlab.py +++ b/oauthenticator/gitlab.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with GitLab as an identity provider. """ + import os import warnings from urllib.parse import quote diff --git a/oauthenticator/globus.py b/oauthenticator/globus.py index 1e19a80d..09066e85 100644 --- a/oauthenticator/globus.py +++ b/oauthenticator/globus.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with Globus as an identity provider. """ + import base64 import os import pickle diff --git a/oauthenticator/google.py b/oauthenticator/google.py index 49f506d0..5ed613d9 100644 --- a/oauthenticator/google.py +++ b/oauthenticator/google.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with Google as an identity provider. """ + import os from jupyterhub.auth import LocalAuthenticator @@ -110,19 +111,26 @@ def _userdata_url_default(self): help=""" This config has two functions. - 1. Restrict sign-in to a list of email domain names, such as - `["mycollege.edu"]` or `["college1.edu", "college2.edu"]`. - 2. If a single domain is specified, the username will be stripped to exclude the `@domain` part. + 1. Restrict sign-in to users part of Google organizations/workspaces + managing domains, such as `["mycollege.edu"]` or `["college1.edu", + "college2.edu"]`. + 2. If a single domain is specified, usernames with that domain will be + stripped to exclude the `@domain` part. - Note that users with email domains in this list must still be allowed - via another config, such as `allow_all`, `allowed_users`, or - `allowed_google_groups`. + Users not restricted by this configuration must still be explicitly + allowed by a configuration intended to allow users, like `allow_all`, + `allowed_users`, or `allowed_google_groups`. + + .. warning:: + + Changing this config either to or from having a single entry is a + disruptive change as the same Google user will get a new username, + either without or with a domain name included. + + .. versionchanged:: 16.1 - ```{warning} Disruptive config changes - Changing this config either to or from having a single entry is a - disruptive change as the same Google user will get a new username, - either without or with a domain name included. - ``` + Now restricts sign-in based on the hd claim, not the domain in the + user's email. """, ) @@ -173,8 +181,19 @@ def user_info_to_username(self, user_info): user_email = user_info["email"] user_domain = user_info["domain"] = user_email.split("@")[1].lower() - if len(self.hosted_domain) == 1 and self.hosted_domain[0] == user_domain: - # unambiguous domain, use only base name + # NOTE: This is not an authorization check, it just about username + # derivation. Decoupling hosted_domain from this is considered in + # https://github.com/jupyterhub/oauthenticator/issues/733. + # + # NOTE: This code is written with without knowing for sure if the user + # email's domain could be different from the domain in hd, so we + # assume it could be even though it seems like it can't be. If a + # Google organization/workspace manages users in a "primary + # domain" and a "secondary domain", users with respective email + # domain have their hd field set respectively. + # + if len(self.hosted_domain) == 1 and user_domain == self.hosted_domain[0]: + # strip the domain in this situation username = username.split("@")[0] return username @@ -213,6 +232,28 @@ async def update_auth_model(self, auth_model): return auth_model + def check_blocked_users(self, username, auth_model): + """ + Overrides `Authenticator.check_blocked_users` to not only block users in + `Authenticator.blocked_users`, but to also enforce + `GoogleOAuthenticator.hosted_domain` if its configured. + + When hosted_domain is configured, users are required to be part of + listed Google organizations/workspaces. + + Returns False if the user is blocked, otherwise True. + """ + user_info = auth_model["auth_state"][self.user_auth_state_key] + + # hd ref: https://developers.google.com/identity/openid-connect/openid-connect#id_token-hd + hd = user_info.get("hd", "") + + if self.hosted_domain and hd not in self.hosted_domain: + self.log.warning(f"Blocked {username} with 'hd={hd}' not in hosted_domain") + return False + + return super().check_blocked_users(username, auth_model) + async def check_allowed(self, username, auth_model): """ Overrides the OAuthenticator.check_allowed to also allow users part of @@ -235,19 +276,6 @@ async def check_allowed(self, username, auth_model): self.log.warning(message) raise HTTPError(403, message) - # NOTE: If hosted_domain is configured as ["a.com", "b.com"], and - # allowed_google_groups is declared as {"a.com": {"a-group"}}, a - # "b.com" user won't be authorized unless allowed in another way. - # - # This means that its not possible to allow all users of a given - # domain if one wants to restrict another. - # - if self.hosted_domain: - if user_domain not in self.hosted_domain: - message = f"Login with domain @{user_domain} is not allowed" - self.log.warning(message) - raise HTTPError(403, message) - if await super().check_allowed(username, auth_model): return True diff --git a/oauthenticator/mediawiki.py b/oauthenticator/mediawiki.py index ee3c46bd..04677f83 100644 --- a/oauthenticator/mediawiki.py +++ b/oauthenticator/mediawiki.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with MediaWiki as an identity provider. """ + import json import os from asyncio import wrap_future diff --git a/oauthenticator/oauth2.py b/oauthenticator/oauth2.py index 5d2ce41a..f3f55551 100644 --- a/oauthenticator/oauth2.py +++ b/oauthenticator/oauth2.py @@ -3,12 +3,14 @@ Founded based on work by Kyle Kelley (@rgbkrk) """ + import base64 import json import os import uuid from urllib.parse import quote, urlencode, urlparse, urlunparse +import jwt from jupyterhub.auth import Authenticator from jupyterhub.crypto import EncryptionUnavailable, InvalidToken, decrypt from jupyterhub.handlers import BaseHandler, LogoutHandler @@ -18,7 +20,7 @@ from tornado.httpclient import AsyncHTTPClient, HTTPClientError, HTTPRequest from tornado.httputil import url_concat from tornado.log import app_log -from traitlets import Any, Bool, Dict, List, Unicode, default, validate +from traitlets import Any, Bool, Callable, Dict, List, Unicode, Union, default, validate def guess_callback_uri(protocol, host, hub_server_url): @@ -268,6 +270,8 @@ class OAuthenticator(Authenticator): help=""" Allow all authenticated users to login. + Overrides all other `allow` configuration. + .. versionadded:: 16.0 """, ) @@ -278,37 +282,29 @@ class OAuthenticator(Authenticator): help=""" Allow existing users to login. - An existing user is a user in JupyterHub's database of users, and it - includes all users that has previously logged in. + Enable this if you want to manage user access via the JupyterHub admin page (/hub/admin). + + With this enabled, all users present in the JupyterHub database are allowed to login. + This has the effect of any user who has _previously_ been allowed to login + via any means will continue to be allowed until the user is deleted via the /hub/admin page + or REST API. .. warning:: Before enabling this you should review the existing users in the JupyterHub admin panel at `/hub/admin`. You may find users existing - there because they have once been declared in config such as - `allowed_users` or once been allowed to sign in. + there because they have previously been declared in config such as + `allowed_users` or allowed to sign in. .. warning:: - When this is enabled and you are to remove access for one or more - users allowed via other config options, you must make sure that they - are not part of the database of users still. This can be tricky to do + When this is enabled and you wish to remove access for one or more + users previously allowed, you must make sure that they + are removed from the jupyterhub database. This can be tricky to do if you stop allowing a group of externally managed users for example. With this enabled, JupyterHub admin users can visit `/hub/admin` or use - JupyterHub's REST API to add and remove users as a way to allow them - access. - - The username for existing users must match the normalized username - returned by the authenticator. When creating users, only lowercase - letters should be used unless `MWOAuthenticator` is used. - - .. note:: - - Allowing existing users is done by adding existing users on startup - and newly created users to the `allowed_users` set. Due to that, you - can't rely on this config to independently allow existing users if - you for example would reset `allowed_users` after startup. + JupyterHub's REST API to add and remove users to manage who can login. .. versionadded:: 16.0 @@ -359,6 +355,24 @@ def _authorize_url_default(self): def _token_url_default(self): return os.environ.get("OAUTH2_TOKEN_URL", "") + userdata_from_id_token = Bool( + False, + config=True, + help=""" + Extract user details from an id token received via a request to + :attr:`token_url`, rather than making a follow-up request to the + userinfo endpoint :attr:`userdata_url`. + + Should only be used if :attr:`token_url` uses HTTPS, to ensure + token authenticity. + + For more context, see `Authentication using the Authorization + Code Flow + `_ + in the OIDC Core standard document. + """, + ) + userdata_url = Unicode( config=True, help=""" @@ -369,6 +383,8 @@ def _token_url_default(self): For more context, see the `Protocol Flow section `_ in the OAuth2 standard document, specifically steps E-F. + + Incompatible with :attr:`userdata_from_id_token`. """, ) @@ -376,14 +392,25 @@ def _token_url_default(self): def _userdata_url_default(self): return os.environ.get("OAUTH2_USERDATA_URL", "") - username_claim = Unicode( - "username", + @validate("userdata_url") + def _validate_userdata_url(self, proposal): + if proposal.value and self.userdata_from_id_token: + raise ValueError( + "Cannot specify both authenticator.userdata_url and authenticator.userdata_from_id_token." + ) + return proposal.value + + username_claim = Union( + [Unicode(os.environ.get('OAUTH2_USERNAME_KEY', 'username')), Callable()], config=True, help=""" - The key to get the JupyterHub username from in the data response to the - request made to :attr:`userdata_url`. + When `userdata_url` returns a json response, the username will be taken + from this key. - Examples include: email, username, nickname + Can be a string key name or a callable that accepts the returned + userdata json (as a dict) and returns the username. The callable is + useful e.g. for extracting the username from a nested object in the + response or doing other post processing. What keys are available will depend on the scopes requested and the authenticator used. @@ -798,9 +825,15 @@ def user_info_to_username(self, user_info): Called by the :meth:`oauthenticator.OAuthenticator.authenticate` """ - username = user_info.get(self.username_claim, None) + + if callable(self.username_claim): + username = self.username_claim(user_info) + else: + username = user_info.get(self.username_claim, None) if not username: - message = (f"No {self.username_claim} found in {user_info}",) + message = ( + f"No {self.username_claim} found in {user_info}. Maybe the hub needs to be configured to request more scopes?", + ) self.log.error(message) raise ValueError(message) @@ -893,6 +926,9 @@ async def token_to_user(self, token_info): Determines who the logged-in user by sending a "GET" request to :data:`oauthenticator.OAuthenticator.userdata_url` using the `access_token`. + If :data:`oauthenticator.OAuthenticator.userdata_from_id_token` is set then + extracts the corresponding info from an `id_token` instead. + Args: token_info: the dictionary returned by the token request (exchanging the OAuth code for an Access Token) @@ -901,6 +937,32 @@ async def token_to_user(self, token_info): Called by the :meth:`oauthenticator.OAuthenticator.authenticate` """ + if self.userdata_from_id_token: + # Use id token instead of exchanging access token with userinfo endpoint. + id_token = token_info.get("id_token", None) + if not id_token: + raise web.HTTPError( + 500, + f"An id token was not returned: {token_info}\nPlease configure authenticator.userdata_url", + ) + try: + # Here we parse the id token. Note that per OIDC spec (core v1.0 sect. 3.1.3.7.6) we can skip + # signature validation as the hub has obtained the tokens from the id provider directly (using + # https). Google suggests all token validation may be skipped assuming the provider is trusted. + # https://openid.net/specs/openid-connect-core-1_0.html#IDTokenValidation + # https://developers.google.com/identity/openid-connect/openid-connect#obtainuserinfo + return jwt.decode( + id_token, + audience=self.client_id, + options=dict( + verify_signature=False, verify_aud=True, verify_exp=True + ), + ) + except Exception as err: + raise web.HTTPError( + 500, f"Unable to decode id token: {id_token}\n{err}" + ) + access_token = token_info["access_token"] token_type = token_info["token_type"] @@ -1119,3 +1181,18 @@ def __init__(self, **kwargs): self._deprecated_oauth_trait, names=list(self._deprecated_oauth_aliases) ) super().__init__(**kwargs) + + +# patch allowed_users help string to match our definition +# base Authenticator class help string gives the wrong impression +# when combined with other allow options +OAuthenticator.class_traits()[ + "allowed_users" +].help = """ +Set of usernames that should be allowed to login. + +If unspecified, grants no access. You must set at least one other `allow` configuration +if any users are to have permission to access the Hub. + +Any usernames in `admin_users` will also be allowed to login. +""" diff --git a/oauthenticator/openshift.py b/oauthenticator/openshift.py index 599f989f..6b016b0d 100644 --- a/oauthenticator/openshift.py +++ b/oauthenticator/openshift.py @@ -1,6 +1,7 @@ """ A JupyterHub authenticator class for use with OpenShift as an identity provider. """ + import concurrent.futures import json import os diff --git a/oauthenticator/tests/conftest.py b/oauthenticator/tests/conftest.py index 30fe93e0..3ffbdf8c 100644 --- a/oauthenticator/tests/conftest.py +++ b/oauthenticator/tests/conftest.py @@ -1,4 +1,5 @@ """Py.Test fixtures""" + from pytest import fixture from tornado.httpclient import AsyncHTTPClient diff --git a/oauthenticator/tests/mocks.py b/oauthenticator/tests/mocks.py index cd8a0b05..efeac575 100644 --- a/oauthenticator/tests/mocks.py +++ b/oauthenticator/tests/mocks.py @@ -1,4 +1,5 @@ """Mocking utilities for testing""" + import json import os import re @@ -129,9 +130,6 @@ def setup_oauth_mock( scope (str): The scope field returned by the provider """ - if user_path is None and token_request_style != "jwt": - raise TypeError("user_path is required unless token_request_style is jwt") - client.oauth_codes = oauth_codes = {} client.access_tokens = access_tokens = {} @@ -168,7 +166,7 @@ def access_token(request): } if scope: model['scope'] = scope - if token_request_style == 'jwt': + if 'id_token' in user: model['id_token'] = user['id_token'] return model diff --git a/oauthenticator/tests/test_azuread.py b/oauthenticator/tests/test_azuread.py index b3537dd5..2bf7606c 100644 --- a/oauthenticator/tests/test_azuread.py +++ b/oauthenticator/tests/test_azuread.py @@ -1,4 +1,5 @@ """test azure ad""" + import json import os import re @@ -21,7 +22,6 @@ def azure_client(client): client, host=['login.microsoftonline.com'], access_token_path=re.compile('^/[^/]+/oauth2/token$'), - token_request_style='jwt', ) return client diff --git a/oauthenticator/tests/test_generic.py b/oauthenticator/tests/test_generic.py index c1dd1d68..07e6752a 100644 --- a/oauthenticator/tests/test_generic.py +++ b/oauthenticator/tests/test_generic.py @@ -2,23 +2,33 @@ import re from functools import partial +import jwt from pytest import fixture, mark, raises from traitlets.config import Config from ..generic import GenericOAuthenticator from .mocks import setup_oauth_mock +client_id = "jupyterhub-oauth-client" + def user_model(username, **kwargs): """Return a user model""" return { "username": username, + "aud": client_id, + "sub": "oauth2|cilogon|http://cilogon.org/servera/users/43431", "scope": "basic", "groups": ["group1"], **kwargs, } +@fixture(params=["id_token", "userdata_url"]) +def userdata_from_id_token(request): + return request.param == "id_token" + + @fixture def generic_client(client): setup_oauth_mock( @@ -31,10 +41,31 @@ def generic_client(client): return client +@fixture +def generic_client_variant(client, userdata_from_id_token): + setup_oauth_mock( + client, + host='generic.horse', + access_token_path='/oauth/access_token', + user_path='/oauth/userinfo', + ) + return client + + def _get_authenticator(**kwargs): return GenericOAuthenticator( token_url='https://generic.horse/oauth/access_token', userdata_url='https://generic.horse/oauth/userinfo', + client_id=client_id, + **kwargs, + ) + + +def _get_authenticator_for_id_token(**kwargs): + return GenericOAuthenticator( + token_url='https://generic.horse/oauth/access_token', + userdata_from_id_token=True, + client_id=client_id, **kwargs, ) @@ -47,6 +78,21 @@ def get_authenticator(generic_client): return partial(_get_authenticator, http_client=generic_client) +@fixture +def get_authenticator_variant(generic_client, userdata_from_id_token): + """ + http_client can't be configured, only passed as argument to the constructor. + """ + return partial( + ( + _get_authenticator_for_id_token + if userdata_from_id_token + else _get_authenticator + ), + http_client=generic_client, + ) + + @mark.parametrize( "test_variation_id,class_config,expect_allowed,expect_admin", [ @@ -153,29 +199,47 @@ def get_authenticator(generic_client): False, False, ), + ( + "20", + { + "manage_groups": True, + "allow_all": True, + }, + True, + None, + ), ], ) async def test_generic( - get_authenticator, - generic_client, + get_authenticator_variant, + generic_client_variant, test_variation_id, class_config, expect_allowed, expect_admin, + userdata_from_id_token, ): print(f"Running test variation id {test_variation_id}") c = Config() c.GenericOAuthenticator = Config(class_config) c.GenericOAuthenticator.username_claim = "username" - authenticator = get_authenticator(config=c) + authenticator = get_authenticator_variant(config=c) + manage_groups = False + if "manage_groups" in class_config: + manage_groups = authenticator.manage_groups handled_user_model = user_model("user1") - handler = generic_client.handler_for_user(handled_user_model) + if userdata_from_id_token: + handled_user_model = dict(id_token=jwt.encode(handled_user_model, key="foo")) + handler = generic_client_variant.handler_for_user(handled_user_model) auth_model = await authenticator.get_authenticated_user(handler, None) if expect_allowed: assert auth_model - assert set(auth_model) == {"name", "admin", "auth_state"} + expected_keys = {"name", "admin", "auth_state"} + if manage_groups: + expected_keys.add("groups") + assert set(auth_model) == expected_keys assert auth_model["admin"] == expect_admin auth_state = auth_model["auth_state"] assert json.dumps(auth_state) @@ -185,10 +249,39 @@ async def test_generic( assert "scope" in auth_state user_info = auth_state[authenticator.user_auth_state_key] assert auth_model["name"] == user_info[authenticator.username_claim] + if manage_groups: + assert auth_model["groups"] == user_info[authenticator.claim_groups_key] + else: assert auth_model == None +async def test_username_claim_callable( + get_authenticator, + generic_client, +): + c = Config() + c.GenericOAuthenticator = Config() + + def username_claim(user_info): + username = user_info["sub"] + if username.startswith("oauth2|cilogon"): + cilogon_sub = username.rsplit("|", 1)[-1] + cilogon_sub_parts = cilogon_sub.split("/") + username = f"oauth2|cilogon|{cilogon_sub_parts[3]}|{cilogon_sub_parts[5]}" + return username + + c.GenericOAuthenticator.username_claim = username_claim + c.GenericOAuthenticator.allow_all = True + authenticator = get_authenticator(config=c) + + handled_user_model = user_model("user1") + handler = generic_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + + assert auth_model["name"] == "oauth2|cilogon|servera|43431" + + async def test_generic_data(get_authenticator, generic_client): c = Config() c.GenericOAuthenticator.allow_all = True diff --git a/oauthenticator/tests/test_google.py b/oauthenticator/tests/test_google.py index 28adeb0c..3aeb652d 100644 --- a/oauthenticator/tests/test_google.py +++ b/oauthenticator/tests/test_google.py @@ -5,22 +5,23 @@ from unittest import mock from pytest import fixture, mark, raises -from tornado.web import HTTPError from traitlets.config import Config from ..google import GoogleOAuthenticator from .mocks import setup_oauth_mock -def user_model(email, username="user1"): +def user_model(email, username="user1", hd=None): """Return a user model""" - return { + model = { 'sub': hashlib.md5(email.encode()).hexdigest(), 'email': email, 'custom': username, - 'hd': email.split('@')[1], 'verified_email': True, } + if hd: + model['hd'] = hd + return model @fixture @@ -187,37 +188,49 @@ async def test_google( assert auth_model == None -async def test_hosted_domain_single_entry(google_client): +@mark.parametrize( + "test_variation_id,user_email,user_hd,expect_username,expect_allowed,expect_admin", + [ + ("01", "user1@ok-hd.orG", "ok-hd.org", "user1", True, True), + ("02", "user2@ok-hd.orG", "ok-hd.org", "user2", True, None), + ("03", "blocked@ok-hd.org", "ok-hd.org", None, False, None), + ("04", "user2@ok-hd.org", "", None, False, None), + ("05", "user1@not-ok.org", "", None, False, None), + # Test variation 06 below isn't believed to be possible, but since we + # aren't sure this test clarifies what we expect to happen. + ("06", "user1@other.org", "ok-hd.org", "user1@other.org", True, None), + ], +) +async def test_hosted_domain_single_entry( + google_client, + test_variation_id, + user_email, + user_hd, + expect_username, + expect_allowed, + expect_admin, +): """ Tests that sign in is restricted to the listed domain and that the username represents the part before the `@domain.com` as expected when hosted_domain contains a single entry. """ c = Config() - c.GoogleOAuthenticator.hosted_domain = ["In-Hosted-Domain.com"] + c.GoogleOAuthenticator.hosted_domain = ["ok-hd.org"] c.GoogleOAuthenticator.admin_users = {"user1"} - c.GoogleOAuthenticator.allowed_users = {"user2"} + c.GoogleOAuthenticator.allowed_users = {"user2", "blocked", "user1@other.org"} + c.GoogleOAuthenticator.blocked_users = {"blocked"} authenticator = GoogleOAuthenticator(config=c) - handled_user_model = user_model("user1@iN-hosteD-domaiN.com") + handled_user_model = user_model(user_email, hd=user_hd) handler = google_client.handler_for_user(handled_user_model) auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user1" - assert auth_model["admin"] == True - - handled_user_model = user_model("user2@iN-hosteD-domaiN.com") - handler = google_client.handler_for_user(handled_user_model) - auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user2" - assert auth_model["admin"] == None - - handled_user_model = user_model("user1@not-in-hosted-domain.com") - handler = google_client.handler_for_user(handled_user_model) - with raises(HTTPError) as exc: - await authenticator.get_authenticated_user(handler, None) - assert exc.value.status_code == 403 + if expect_allowed: + assert auth_model + assert auth_model["name"] == expect_username + assert auth_model["admin"] == expect_admin + else: + assert auth_model == None @mark.parametrize( @@ -235,7 +248,27 @@ async def test_check_allowed_no_auth_state(google_client, name, allowed): assert await authenticator.check_allowed(name, None) -async def test_hosted_domain_multiple_entries(google_client): +@mark.parametrize( + "test_variation_id,user_email,user_hd,expect_username,expect_allowed", + [ + ("01", "user1@ok-hd1.orG", "ok-hd1.org", "user1@ok-hd1.org", True), + ("02", "user2@ok-hd2.orG", "ok-hd2.org", "user2@ok-hd2.org", True), + ("03", "blocked@ok-hd1.org", "ok-hd1.org", None, False), + ("04", "user3@ok-hd1.org", "", None, False), + ("05", "user1@not-ok.org", "", None, False), + # Test variation 06 below isn't believed to be possible, but since we + # aren't sure this test clarifies what we expect to happen. + ("06", "user1@other.org", "ok-hd1.org", "user1@other.org", True), + ], +) +async def test_hosted_domain_multiple_entries( + google_client, + test_variation_id, + user_email, + user_hd, + expect_username, + expect_allowed, +): """ Tests that sign in is restricted to the listed domains and that the username represents the full email as expected when hosted_domain contains multiple @@ -243,29 +276,21 @@ async def test_hosted_domain_multiple_entries(google_client): """ c = Config() c.GoogleOAuthenticator.hosted_domain = [ - "In-Hosted-Domain1.com", - "In-Hosted-Domain2.com", + "ok-hd1.org", + "ok-hd2.ORG", ] + c.GoogleOAuthenticator.blocked_users = ["blocked@ok-hd1.org"] c.GoogleOAuthenticator.allow_all = True authenticator = GoogleOAuthenticator(config=c) - handled_user_model = user_model("user1@iN-hosteD-domaiN1.com") + handled_user_model = user_model(user_email, hd=user_hd) handler = google_client.handler_for_user(handled_user_model) auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user1@in-hosted-domain1.com" - - handled_user_model = user_model("user2@iN-hosteD-domaiN2.com") - handler = google_client.handler_for_user(handled_user_model) - auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user2@in-hosted-domain2.com" - - handled_user_model = user_model("user1@not-in-hosted-domain.com") - handler = google_client.handler_for_user(handled_user_model) - with raises(HTTPError) as exc: - await authenticator.get_authenticated_user(handler, None) - assert exc.value.status_code == 403 + if expect_allowed: + assert auth_model + assert auth_model["name"] == expect_username + else: + assert auth_model == None @mark.parametrize( diff --git a/pyproject.toml b/pyproject.toml index ec038069..58806384 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ omit = [ github_url = "https://github.com/jupyterhub/oauthenticator" [tool.tbump.version] -current = "16.2.2.dev" +current = "16.3.1.dev" regex = ''' (?P\d+) \. diff --git a/requirements.txt b/requirements.txt index 74438405..53cc9bc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ # jsonschema is used for validating authenticator configurations jsonschema -jupyterhub>=1.2 +jupyterhub>=2.2 +# PyJWT is used for parsing id tokens +# and azuread +pyjwt>=2 # requests is already required by JupyterHub, but explicitly ask for it since we use it requests # ruamel.yaml is used to read and write .yaml files. diff --git a/setup.py b/setup.py index 61fe2fab..2e5fb18a 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ def run(self): setup_args = dict( name='oauthenticator', packages=find_packages(), - version="16.2.2.dev", + version="16.3.1.dev", description="OAuthenticator: Authenticate JupyterHub users with common OAuth providers", long_description=open("README.md").read(), long_description_content_type="text/markdown", @@ -88,8 +88,6 @@ def run(self): setup_args['extras_require'] = { - # azuread is required for use of AzureADOAuthenticator - 'azuread': ['pyjwt>=2'], # googlegroups is required for use of GoogleOAuthenticator configured with # either admin_google_groups and/or allowed_google_groups. 'googlegroups': [ @@ -106,8 +104,6 @@ def run(self): 'pytest-asyncio>=0.17,<0.23', 'pytest-cov', 'requests-mock', - # dependencies from azuread: - 'pyjwt>=2', # dependencies from googlegroups: 'google-api-python-client', 'google-auth-oauthlib',