Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply validation of attributes to Resource, move attribute related logic to util package #1834

Merged
merged 20 commits into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@
import pkg_resources

from opentelemetry.sdk.environment_variables import OTEL_RESOURCE_ATTRIBUTES
from opentelemetry.sdk.util import (
_filter_attributes,
)
from opentelemetry.semconv.resource import ResourceAttributes

LabelValue = typing.Union[str, bool, int, float]
Expand Down Expand Up @@ -138,6 +141,7 @@ class Resource:
"""A Resource is an immutable representation of the entity producing telemetry as Attributes."""

def __init__(self, attributes: Attributes):
_filter_attributes(attributes)
self._attributes = attributes.copy()

@staticmethod
Expand Down
81 changes: 10 additions & 71 deletions opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import sampling
from opentelemetry.sdk.trace.id_generator import IdGenerator, RandomIdGenerator
from opentelemetry.sdk.util import BoundedDict, BoundedList
from opentelemetry.sdk.util import (
BoundedDict,
BoundedList,
_create_immutable_attributes,
_filter_attributes,
_is_valid_attribute_value,
)
from opentelemetry.sdk.util.instrumentation import InstrumentationInfo
from opentelemetry.trace import SpanContext
from opentelemetry.trace.propagation import SPAN_KEY
Expand All @@ -64,7 +70,6 @@

_SPAN_EVENT_COUNT_LIMIT = int(environ.get(OTEL_SPAN_EVENT_COUNT_LIMIT, 128))
_SPAN_LINK_COUNT_LIMIT = int(environ.get(OTEL_SPAN_LINK_COUNT_LIMIT, 128))
_VALID_ATTR_VALUE_TYPES = (bool, str, int, float)
# pylint: disable=protected-access
_TRACE_SAMPLER = sampling._get_from_env_or_default()

Expand Down Expand Up @@ -315,72 +320,6 @@ def attributes(self) -> types.Attributes:
return self._attributes


def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
"""Checks if attribute value is valid.

An attribute value is valid if it is one of the valid types.
If the value is a sequence, it is only valid if all items in the sequence:
- are of the same valid type or None
- are not a sequence
"""

if isinstance(value, Sequence):
if len(value) == 0:
return True

sequence_first_valid_type = None
for element in value:
if element is None:
continue
element_type = type(element)
if element_type not in _VALID_ATTR_VALUE_TYPES:
logger.warning(
"Invalid type %s in attribute value sequence. Expected one of "
"%s or None",
element_type.__name__,
[
valid_type.__name__
for valid_type in _VALID_ATTR_VALUE_TYPES
],
)
return False
# The type of the sequence must be homogeneous. The first non-None
# element determines the type of the sequence
if sequence_first_valid_type is None:
sequence_first_valid_type = element_type
elif not isinstance(element, sequence_first_valid_type):
logger.warning(
"Mixed types %s and %s in attribute value sequence",
sequence_first_valid_type.__name__,
type(element).__name__,
)
return False

elif not isinstance(value, _VALID_ATTR_VALUE_TYPES):
logger.warning(
"Invalid type %s for attribute value. Expected one of %s or a "
"sequence of those types",
type(value).__name__,
[valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
)
return False
return True


def _filter_attribute_values(attributes: types.Attributes):
if attributes:
for attr_key, attr_value in list(attributes.items()):
if _is_valid_attribute_value(attr_value):
if isinstance(attr_value, MutableSequence):
attributes[attr_key] = tuple(attr_value)
else:
attributes.pop(attr_key)


def _create_immutable_attributes(attributes):
return MappingProxyType(attributes.copy() if attributes else {})


def _check_span_ended(func):
def wrapper(self, *args, **kwargs):
already_ended = False
Expand Down Expand Up @@ -619,7 +558,7 @@ def __init__(
self._span_processor = span_processor
self._lock = threading.Lock()

_filter_attribute_values(attributes)
_filter_attributes(attributes)
if not attributes:
self._attributes = self._new_attributes()
else:
Expand All @@ -630,7 +569,7 @@ def __init__(
self._events = self._new_events()
if events:
for event in events:
_filter_attribute_values(event.attributes)
_filter_attributes(event.attributes)
# pylint: disable=protected-access
event._attributes = _create_immutable_attributes(
event.attributes
Expand Down Expand Up @@ -702,7 +641,7 @@ def add_event(
attributes: types.Attributes = None,
timestamp: Optional[int] = None,
) -> None:
_filter_attribute_values(attributes)
_filter_attributes(attributes)
attributes = _create_immutable_attributes(attributes)
self._add_event(
Event(
Expand Down
87 changes: 87 additions & 0 deletions opentelemetry-sdk/src/opentelemetry/sdk/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,96 @@
# limitations under the License.

import datetime
import logging
import threading
from collections import OrderedDict, deque
from collections.abc import MutableMapping, Sequence
from types import MappingProxyType
from typing import MutableSequence

from opentelemetry.util import types

_VALID_ATTR_VALUE_TYPES = (bool, str, int, float)


_logger = logging.getLogger(__name__)


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should all of this logic go into the api?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe if we want to provide this as part of public api package

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 I guess other SDKs could use a method like this to ensure their attributes are all validated and users could potentially use this to validate/filter their attributes before setting them? not sure how likely these scenarios are though. I could go either ways with this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was more referring to the fact that Attributes are an API concept. I don't think they need to be part of the PUBLIC api, simply private methods that exist in the api package.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are Attributess an API concept? I am inclined towards keeping them in sdk because all operations are no-op in api package so we will probably never use them there and if we don't want offer them as a part of public api to user/application owners I don't see any benefit in moving to api package?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes Attribute s are defined in the API. Hmm now thinking about it, this is probably valuable and common enough to be exposed in the public api. It is also defined in the specs so I don't mind increasing our api surface for this. For example, this would be useful for instrumentations to leverage (which only have dependencies on the api).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can move this to api util package. I just want to make sure we are okay with making these public because of all of util is pretty much private and internal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can be the first :)

def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
"""Checks if attribute value is valid.

An attribute value is valid if it is one of the valid types.
If the value is a sequence, it is only valid if all items in the sequence:
- are of the same valid type or None
- are not a sequence
"""

if isinstance(value, Sequence):
if len(value) == 0:
return True

sequence_first_valid_type = None
for element in value:
if element is None:
continue
element_type = type(element)
if element_type not in _VALID_ATTR_VALUE_TYPES:
_logger.warning(
"Invalid type %s in attribute value sequence. Expected one of "
"%s or None",
element_type.__name__,
[
valid_type.__name__
for valid_type in _VALID_ATTR_VALUE_TYPES
],
)
return False
# The type of the sequence must be homogeneous. The first non-None
# element determines the type of the sequence
if sequence_first_valid_type is None:
sequence_first_valid_type = element_type
elif not isinstance(element, sequence_first_valid_type):
_logger.warning(
"Mixed types %s and %s in attribute value sequence",
sequence_first_valid_type.__name__,
type(element).__name__,
)
return False

elif not isinstance(value, _VALID_ATTR_VALUE_TYPES):
_logger.warning(
"Invalid type %s for attribute value. Expected one of %s or a "
"sequence of those types",
type(value).__name__,
[valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
)
return False
return True


def _filter_attributes(attributes: types.Attributes):
if attributes:
for attr_key, attr_value in list(attributes.items()):
if not attr_key:
_logger.warning("invalid key `%s` (empty or null)", attr_key)
attributes.pop(attr_key)
continue

if _is_valid_attribute_value(attr_value):
if isinstance(attr_value, MutableSequence):
attributes[attr_key] = tuple(attr_value)
if isinstance(attr_value, bytes):
try:
attributes[attr_key] = attr_value.decode()
except ValueError:
attributes.pop(attr_key)
_logger.warning("Byte attribute could not be decoded.")
else:
attributes.pop(attr_key)


def _create_immutable_attributes(attributes):
return MappingProxyType(attributes.copy() if attributes else {})


def ns_to_iso_str(nanoseconds):
Expand Down
20 changes: 20 additions & 0 deletions opentelemetry-sdk/tests/resources/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import os
import unittest
import uuid
from unittest import mock

from opentelemetry.sdk import resources
Expand Down Expand Up @@ -138,6 +139,25 @@ def test_service_name_using_process_name(self):
"unknown_service:test",
)

def test_invalid_resource_attribute_values(self):
resource = resources.Resource(
{
resources.SERVICE_NAME: "test",
"non-primitive-data-type": dict(),
"invalid-byte-type-attribute": b"\xd8\xe1\xb7\xeb\xa8\xe5 \xd2\xb7\xe1",
"": "empty-key-value",
None: "null-key-value",
"another-non-primitive": uuid.uuid4(),
}
)
self.assertEqual(
resource.attributes,
{
resources.SERVICE_NAME: "test",
},
)
self.assertEqual(len(resource.attributes), 1)

def test_aggregated_resources_no_detectors(self):
aggregated_resources = resources.get_aggregated_resources([])
self.assertEqual(aggregated_resources, resources.Resource.get_empty())
Expand Down
46 changes: 20 additions & 26 deletions opentelemetry-sdk/tests/trace/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
from opentelemetry.sdk.trace import Resource, sampling
from opentelemetry.sdk.trace.id_generator import RandomIdGenerator
from opentelemetry.sdk.util import ns_to_iso_str
from opentelemetry.sdk.util import _is_valid_attribute_value, ns_to_iso_str
from opentelemetry.sdk.util.instrumentation import InstrumentationInfo
from opentelemetry.trace import StatusCode
from opentelemetry.util._time import _time_ns
Expand Down Expand Up @@ -647,32 +647,26 @@ def test_byte_type_attribute_value(self):

def test_check_attribute_helper(self):
# pylint: disable=protected-access
self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, "ss", 4]))
self.assertFalse(
trace._is_valid_attribute_value([dict(), 1, 2, 3.4, 4])
)
self.assertFalse(
trace._is_valid_attribute_value(["sw", "lf", 3.4, "ss"])
)
self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, 5]))
self.assertTrue(trace._is_valid_attribute_value([1, 2, 3, 5]))
self.assertTrue(trace._is_valid_attribute_value([1.2, 2.3, 3.4, 4.5]))
self.assertTrue(trace._is_valid_attribute_value([True, False]))
self.assertTrue(trace._is_valid_attribute_value(["ss", "dw", "fw"]))
self.assertTrue(trace._is_valid_attribute_value([]))
self.assertFalse(trace._is_valid_attribute_value(dict()))
self.assertTrue(trace._is_valid_attribute_value(True))
self.assertTrue(trace._is_valid_attribute_value("hi"))
self.assertTrue(trace._is_valid_attribute_value(3.4))
self.assertTrue(trace._is_valid_attribute_value(15))
self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4]))
self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4]))
self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"]))
self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5]))
self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5]))
self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5]))
self.assertTrue(_is_valid_attribute_value([True, False]))
self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"]))
self.assertTrue(_is_valid_attribute_value([]))
self.assertFalse(_is_valid_attribute_value(dict()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: please group all the true/false together. makes it easier to read :)

self.assertTrue(_is_valid_attribute_value(True))
self.assertTrue(_is_valid_attribute_value("hi"))
self.assertTrue(_is_valid_attribute_value(3.4))
self.assertTrue(_is_valid_attribute_value(15))
# None in sequences are valid
self.assertTrue(trace._is_valid_attribute_value(["A", None, None]))
self.assertTrue(
trace._is_valid_attribute_value(["A", None, None, "B"])
)
self.assertTrue(trace._is_valid_attribute_value([None, None]))
self.assertFalse(trace._is_valid_attribute_value(["A", None, 1]))
self.assertFalse(trace._is_valid_attribute_value([None, "A", None, 1]))
self.assertTrue(_is_valid_attribute_value(["A", None, None]))
self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"]))
self.assertTrue(_is_valid_attribute_value([None, None]))
self.assertFalse(_is_valid_attribute_value(["A", None, 1]))
self.assertFalse(_is_valid_attribute_value([None, "A", None, 1]))

def test_sampling_attributes(self):
sampling_attributes = {
Expand Down