diff --git a/CHANGELOG.md b/CHANGELOG.md index c5a8de2443..ed90e63bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1818](https://github.com/open-telemetry/opentelemetry-python/pull/1818)) - Update transient errors retry timeout and retryable status codes ([#1842](https://github.com/open-telemetry/opentelemetry-python/pull/1842)) +- Apply validation of attributes to `Resource`, move attribute related logic to separate package. + ([#1834](https://github.com/open-telemetry/opentelemetry-python/pull/1834)) - Fix start span behavior when excess links and attributes are included ([#1856](https://github.com/open-telemetry/opentelemetry-python/pull/1856)) diff --git a/opentelemetry-api/src/opentelemetry/attributes/__init__.py b/opentelemetry-api/src/opentelemetry/attributes/__init__.py new file mode 100644 index 0000000000..6875f56631 --- /dev/null +++ b/opentelemetry-api/src/opentelemetry/attributes/__init__.py @@ -0,0 +1,110 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# type: ignore + +import logging +from types import MappingProxyType +from typing import MutableSequence, Sequence + +from opentelemetry.util import types + +_VALID_ATTR_VALUE_TYPES = (bool, str, int, float) + + +_logger = logging.getLogger(__name__) + + +def _is_valid_attribute_value(value: types.AttributeValue) -> bool: + """Checks if attribute value is valid. + + An attribute value is valid if it is either: + - A primitive type: string, boolean, double precision floating + point (IEEE 754-1985) or integer. + - An array of primitive type values. The array MUST be homogeneous, + i.e. it MUST NOT contain values of different types. + """ + + if isinstance(value, Sequence): + if len(value) == 0: + return True + + sequence_first_valid_type = None + for element in value: + if element is None: + continue + element_type = type(element) + if element_type not in _VALID_ATTR_VALUE_TYPES: + _logger.warning( + "Invalid type %s in attribute value sequence. Expected one of " + "%s or None", + element_type.__name__, + [ + valid_type.__name__ + for valid_type in _VALID_ATTR_VALUE_TYPES + ], + ) + return False + # The type of the sequence must be homogeneous. The first non-None + # element determines the type of the sequence + if sequence_first_valid_type is None: + sequence_first_valid_type = element_type + elif not isinstance(element, sequence_first_valid_type): + _logger.warning( + "Mixed types %s and %s in attribute value sequence", + sequence_first_valid_type.__name__, + type(element).__name__, + ) + return False + + elif not isinstance(value, _VALID_ATTR_VALUE_TYPES): + _logger.warning( + "Invalid type %s for attribute value. Expected one of %s or a " + "sequence of those types", + type(value).__name__, + [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], + ) + return False + return True + + +def _filter_attributes(attributes: types.Attributes) -> None: + """Applies attribute validation rules and drops (key, value) pairs + that doesn't adhere to attributes specification. + + https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes. + """ + if attributes: + for attr_key, attr_value in list(attributes.items()): + if not attr_key: + _logger.warning("invalid key `%s` (empty or null)", attr_key) + attributes.pop(attr_key) + continue + + if _is_valid_attribute_value(attr_value): + if isinstance(attr_value, MutableSequence): + attributes[attr_key] = tuple(attr_value) + if isinstance(attr_value, bytes): + try: + attributes[attr_key] = attr_value.decode() + except ValueError: + attributes.pop(attr_key) + _logger.warning("Byte attribute could not be decoded.") + else: + attributes.pop(attr_key) + + +def _create_immutable_attributes( + attributes: types.Attributes, +) -> types.Attributes: + return MappingProxyType(attributes.copy() if attributes else {}) diff --git a/opentelemetry-api/tests/attributes/test_attributes.py b/opentelemetry-api/tests/attributes/test_attributes.py new file mode 100644 index 0000000000..2a391f78af --- /dev/null +++ b/opentelemetry-api/tests/attributes/test_attributes.py @@ -0,0 +1,87 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# type: ignore + +import unittest + +from opentelemetry.attributes import ( + _create_immutable_attributes, + _filter_attributes, + _is_valid_attribute_value, +) + + +class TestAttributes(unittest.TestCase): + def test_is_valid_attribute_value(self): + self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4])) + self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4])) + self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"])) + self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5])) + self.assertFalse(_is_valid_attribute_value(dict())) + self.assertTrue(_is_valid_attribute_value(True)) + self.assertTrue(_is_valid_attribute_value("hi")) + self.assertTrue(_is_valid_attribute_value(3.4)) + self.assertTrue(_is_valid_attribute_value(15)) + self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5])) + self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5])) + self.assertTrue(_is_valid_attribute_value([True, False])) + self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"])) + self.assertTrue(_is_valid_attribute_value([])) + # None in sequences are valid + self.assertTrue(_is_valid_attribute_value(["A", None, None])) + self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"])) + self.assertTrue(_is_valid_attribute_value([None, None])) + self.assertFalse(_is_valid_attribute_value(["A", None, 1])) + self.assertFalse(_is_valid_attribute_value([None, "A", None, 1])) + + def test_filter_attributes(self): + attrs_with_invalid_keys = { + "": "empty-key", + None: "None-value", + "attr-key": "attr-value", + } + _filter_attributes(attrs_with_invalid_keys) + self.assertTrue(len(attrs_with_invalid_keys), 1) + self.assertEqual(attrs_with_invalid_keys, {"attr-key": "attr-value"}) + + attrs_with_invalid_values = { + "nonhomogeneous": [1, 2, 3.4, "ss", 4], + "nonprimitive": dict(), + "mixed": [1, 2.4, "st", dict()], + "validkey1": "validvalue1", + "intkey": 5, + "floatkey": 3.14, + "boolkey": True, + "valid-byte-string": b"hello-otel", + } + _filter_attributes(attrs_with_invalid_values) + self.assertEqual(len(attrs_with_invalid_values), 5) + self.assertEqual( + attrs_with_invalid_values, + { + "validkey1": "validvalue1", + "intkey": 5, + "floatkey": 3.14, + "boolkey": True, + "valid-byte-string": "hello-otel", + }, + ) + + def test_create_immutable_attributes(self): + attrs = {"key": "value", "pi": 3.14} + immutable = _create_immutable_attributes(attrs) + # TypeError: 'mappingproxy' object does not support item assignment + with self.assertRaises(TypeError): + immutable["pi"] = 1.34 diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index 77240b8ef8..8755b2d1f9 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -64,6 +64,7 @@ import pkg_resources +from opentelemetry.attributes import _filter_attributes from opentelemetry.sdk.environment_variables import ( OTEL_RESOURCE_ATTRIBUTES, OTEL_SERVICE_NAME, @@ -141,6 +142,7 @@ class Resource: """A Resource is an immutable representation of the entity producing telemetry as Attributes.""" def __init__(self, attributes: Attributes): + _filter_attributes(attributes) self._attributes = attributes.copy() @staticmethod diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index ad1d8284cd..cc0114a22d 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -39,6 +39,11 @@ from opentelemetry import context as context_api from opentelemetry import trace as trace_api +from opentelemetry.attributes import ( + _create_immutable_attributes, + _filter_attributes, + _is_valid_attribute_value, +) from opentelemetry.sdk import util from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, @@ -64,7 +69,6 @@ _SPAN_EVENT_COUNT_LIMIT = int(environ.get(OTEL_SPAN_EVENT_COUNT_LIMIT, 128)) _SPAN_LINK_COUNT_LIMIT = int(environ.get(OTEL_SPAN_LINK_COUNT_LIMIT, 128)) -_VALID_ATTR_VALUE_TYPES = (bool, str, int, float) # pylint: disable=protected-access _TRACE_SAMPLER = sampling._get_from_env_or_default() @@ -315,72 +319,6 @@ def attributes(self) -> types.Attributes: return self._attributes -def _is_valid_attribute_value(value: types.AttributeValue) -> bool: - """Checks if attribute value is valid. - - An attribute value is valid if it is one of the valid types. - If the value is a sequence, it is only valid if all items in the sequence: - - are of the same valid type or None - - are not a sequence - """ - - if isinstance(value, Sequence): - if len(value) == 0: - return True - - sequence_first_valid_type = None - for element in value: - if element is None: - continue - element_type = type(element) - if element_type not in _VALID_ATTR_VALUE_TYPES: - logger.warning( - "Invalid type %s in attribute value sequence. Expected one of " - "%s or None", - element_type.__name__, - [ - valid_type.__name__ - for valid_type in _VALID_ATTR_VALUE_TYPES - ], - ) - return False - # The type of the sequence must be homogeneous. The first non-None - # element determines the type of the sequence - if sequence_first_valid_type is None: - sequence_first_valid_type = element_type - elif not isinstance(element, sequence_first_valid_type): - logger.warning( - "Mixed types %s and %s in attribute value sequence", - sequence_first_valid_type.__name__, - type(element).__name__, - ) - return False - - elif not isinstance(value, _VALID_ATTR_VALUE_TYPES): - logger.warning( - "Invalid type %s for attribute value. Expected one of %s or a " - "sequence of those types", - type(value).__name__, - [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], - ) - return False - return True - - -def _filter_attribute_values(attributes: types.Attributes): - if attributes: - for attr_key, attr_value in list(attributes.items()): - if _is_valid_attribute_value(attr_value): - if isinstance(attr_value, MutableSequence): - attributes[attr_key] = tuple(attr_value) - else: - attributes.pop(attr_key) - - -def _create_immutable_attributes(attributes): - return MappingProxyType(attributes.copy() if attributes else {}) - - def _check_span_ended(func): def wrapper(self, *args, **kwargs): already_ended = False @@ -623,7 +561,7 @@ def __init__( self._span_processor = span_processor self._lock = threading.Lock() - _filter_attribute_values(attributes) + _filter_attributes(attributes) if not attributes: self._attributes = self._new_attributes() else: @@ -634,7 +572,7 @@ def __init__( self._events = self._new_events() if events: for event in events: - _filter_attribute_values(event.attributes) + _filter_attributes(event.attributes) # pylint: disable=protected-access event._attributes = _create_immutable_attributes( event.attributes @@ -709,7 +647,7 @@ def add_event( attributes: types.Attributes = None, timestamp: Optional[int] = None, ) -> None: - _filter_attribute_values(attributes) + _filter_attributes(attributes) attributes = _create_immutable_attributes(attributes) self._add_event( Event( diff --git a/opentelemetry-sdk/tests/resources/test_resources.py b/opentelemetry-sdk/tests/resources/test_resources.py index 211187f1eb..87b4006ce7 100644 --- a/opentelemetry-sdk/tests/resources/test_resources.py +++ b/opentelemetry-sdk/tests/resources/test_resources.py @@ -16,6 +16,7 @@ import os import unittest +import uuid from unittest import mock from opentelemetry.sdk import resources @@ -138,6 +139,25 @@ def test_service_name_using_process_name(self): "unknown_service:test", ) + def test_invalid_resource_attribute_values(self): + resource = resources.Resource( + { + resources.SERVICE_NAME: "test", + "non-primitive-data-type": dict(), + "invalid-byte-type-attribute": b"\xd8\xe1\xb7\xeb\xa8\xe5 \xd2\xb7\xe1", + "": "empty-key-value", + None: "null-key-value", + "another-non-primitive": uuid.uuid4(), + } + ) + self.assertEqual( + resource.attributes, + { + resources.SERVICE_NAME: "test", + }, + ) + self.assertEqual(len(resource.attributes), 1) + def test_aggregated_resources_no_detectors(self): aggregated_resources = resources.get_aggregated_resources([]) self.assertEqual(aggregated_resources, resources.Resource.get_empty()) diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index 5a713c4cfa..a8b8330fe8 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -671,35 +671,6 @@ def test_byte_type_attribute_value(self): isinstance(root.attributes["valid-byte-type-attribute"], str) ) - def test_check_attribute_helper(self): - # pylint: disable=protected-access - self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, "ss", 4])) - self.assertFalse( - trace._is_valid_attribute_value([dict(), 1, 2, 3.4, 4]) - ) - self.assertFalse( - trace._is_valid_attribute_value(["sw", "lf", 3.4, "ss"]) - ) - self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, 5])) - self.assertTrue(trace._is_valid_attribute_value([1, 2, 3, 5])) - self.assertTrue(trace._is_valid_attribute_value([1.2, 2.3, 3.4, 4.5])) - self.assertTrue(trace._is_valid_attribute_value([True, False])) - self.assertTrue(trace._is_valid_attribute_value(["ss", "dw", "fw"])) - self.assertTrue(trace._is_valid_attribute_value([])) - self.assertFalse(trace._is_valid_attribute_value(dict())) - self.assertTrue(trace._is_valid_attribute_value(True)) - self.assertTrue(trace._is_valid_attribute_value("hi")) - self.assertTrue(trace._is_valid_attribute_value(3.4)) - self.assertTrue(trace._is_valid_attribute_value(15)) - # None in sequences are valid - self.assertTrue(trace._is_valid_attribute_value(["A", None, None])) - self.assertTrue( - trace._is_valid_attribute_value(["A", None, None, "B"]) - ) - self.assertTrue(trace._is_valid_attribute_value([None, None])) - self.assertFalse(trace._is_valid_attribute_value(["A", None, 1])) - self.assertFalse(trace._is_valid_attribute_value([None, "A", None, 1])) - def test_sampling_attributes(self): sampling_attributes = { "sampler-attr": "sample-val",