diff --git a/airflow/www/views.py b/airflow/www/views.py index 04671e6f5338d..cc08609096add 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -153,6 +153,27 @@ def sanitize_args(args: dict[str, str]) -> dict[str, str]: return {key: value for key, value in args.items() if not key.startswith("_")} +# Following the release of https://github.com/python/cpython/issues/102153 in Python 3.8.17 and 3.9.17 on +# June 6, 2023, we are adding extra sanitization of the urls passed to get_safe_url method to make it works +# the same way regardless if the user uses latest Python patchlevel versions or not. This also follows +# a recommended solution by the Python core team. +# +# From: https://github.com/python/cpython/commit/d28bafa2d3e424b6fdcfd7ae7cde8e71d7177369 +# +# We recommend that users of these APIs where the values may be used anywhere +# with security implications code defensively. Do some verification within your +# code before trusting a returned component part. Does that ``scheme`` make +# sense? Is that a sensible ``path``? Is there anything strange about that +# ``hostname``? etc. +# +# C0 control and space to be stripped per WHATWG spec. +# == "".join([chr(i) for i in range(0, 0x20 + 1)]) +_WHATWG_C0_CONTROL_OR_SPACE = ( + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c" + "\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f " +) + + def get_safe_url(url): """Given a user-supplied URL, ensure it points to our web server.""" if not url: @@ -163,6 +184,8 @@ def get_safe_url(url): if ";" in unquote(url): return url_for("Airflow.index") + url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE) + host_url = urlsplit(request.host_url) redirect_url = urlsplit(urljoin(request.host_url, url)) if not (redirect_url.scheme in ("http", "https") and host_url.netloc == redirect_url.netloc): diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 0636ba16c7aae..e266b9b8c1a13 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -193,7 +193,7 @@ def test_task_dag_id_equals_filter(admin_client, url, content): [ ("", "/home"), ("javascript:alert(1)", "/home"), - (" javascript:alert(1)", "http://localhost:8080/ javascript:alert(1)"), + (" javascript:alert(1)", "/home"), ("http://google.com", "/home"), ("google.com", "http://localhost:8080/google.com"), ("\\/google.com", "http://localhost:8080/\\/google.com"),