diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c27d4e846..539b0f293e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1199](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1199)) - Add metric instrumentation in Pyramid ([#1242](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1242)) +- `opentelemetry-util-http` Add support for sanitizing HTTP header values. + ([#1253](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1253)) ### Fixed diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index 9222b9c727..22ff9dda9a 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from os import environ +from re import IGNORECASE as RE_IGNORECASE from re import compile as re_compile from re import search from typing import Iterable, List @@ -20,6 +21,9 @@ from opentelemetry.semconv.trace import SpanAttributes +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS = ( + "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS" +) OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST = ( "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST" ) @@ -60,6 +64,22 @@ def url_disabled(self, url: str) -> bool: return bool(self._excluded_urls and search(self._regex, url)) +class SanitizeValue: + """Class to sanitize (remove sensitive data from) certain headers (given as a list of regexes)""" + + def __init__(self, sanitized_fields: Iterable[str]): + self._sanitized_fields = sanitized_fields + if self._sanitized_fields: + self._regex = re_compile("|".join(sanitized_fields), RE_IGNORECASE) + + def sanitize_header_value(self, header: str, value: str) -> str: + return ( + "[REDACTED]" + if (self._sanitized_fields and search(self._regex, header)) + else value + ) + + _root = r"OTEL_PYTHON_{}" @@ -90,7 +110,7 @@ def get_excluded_urls(instrumentation: str) -> ExcludeList: def parse_excluded_urls(excluded_urls: str) -> ExcludeList: """ - Small helper to put an arbitrary url list inside of ExcludeList + Small helper to put an arbitrary url list inside an ExcludeList """ if excluded_urls: excluded_url_list = [ diff --git a/util/opentelemetry-util-http/tests/test_capture_custom_headers.py b/util/opentelemetry-util-http/tests/test_capture_custom_headers.py index e6e1583ffb..24f0ee0ada 100644 --- a/util/opentelemetry-util-http/tests/test_capture_custom_headers.py +++ b/util/opentelemetry-util-http/tests/test_capture_custom_headers.py @@ -16,8 +16,10 @@ from unittest.mock import patch from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, + SanitizeValue, get_custom_headers, normalise_request_header_name, normalise_response_header_name, @@ -58,6 +60,48 @@ def test_get_custom_response_header(self): ], ) + @patch.dict( + "os.environ", + { + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: "My-Secret-Header,My-Secret-Header-2" + }, + ) + def test_get_custom_sanitize_header(self): + sanitized_fields = get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) + self.assertEqual( + sanitized_fields, + ["My-Secret-Header", "My-Secret-Header-2"], + ) + + @patch.dict( + "os.environ", + { + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: "My-Secret-Header,My-Secret-Header-2" + }, + ) + def test_sanitize(self): + sanitized_fields = get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) + + sanitize = SanitizeValue(sanitized_fields) + + self.assertEqual( + sanitize.sanitize_header_value( + header="My-Secret-Header", value="My-Secret-Value" + ), + "[REDACTED]", + ) + + self.assertEqual( + sanitize.sanitize_header_value( + header="My-Not-Secret-Header", value="My-Not-Secret-Value" + ), + "My-Not-Secret-Value", + ) + def test_normalise_request_header_name(self): key = normalise_request_header_name("Test-Header") self.assertEqual(key, "http.request.header.test_header")