Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions designs/serialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ class ShapeSerializer(Protocol):
def write_document(self, schema: "Schema", value: "Document") -> None:
...

def write_data_stream(self, schema: "Schema", value: StreamingBlob) -> None:
raise NotImplementedError()


@runtime_checkable
class MapSerializer(Protocol):
Expand Down Expand Up @@ -531,6 +534,9 @@ class ShapeDeserializer(Protocol):
def read_timestamp(self, schema: "Schema") -> datetime.datetime:
...

def read_data_stream(self, schema: "Schema") -> StreamingBlob:
raise NotImplementedError()


@runtime_checkable
class DeserializeableShape(Protocol):
Expand Down
22 changes: 21 additions & 1 deletion packages/smithy-core/src/smithy_core/deserializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from decimal import Decimal
from typing import TYPE_CHECKING, Never, Protocol, Self, runtime_checkable

from .exceptions import SmithyException
from .exceptions import SmithyException, UnsupportedStreamException

if TYPE_CHECKING:
from .documents import Document
from .schemas import Schema
from .aio.interfaces import StreamingBlob as _Stream


@runtime_checkable
Expand Down Expand Up @@ -171,6 +172,22 @@ def read_timestamp(self, schema: "Schema") -> datetime.datetime:
"""
...

def read_data_stream(self, schema: "Schema") -> "_Stream":
"""Read a data stream from the underlying data.

The data itself MUST NOT be read by this method. The value returned is intended
to be read later by the consumer. In an HTTP implementation, for example, this
would directly return the HTTP body stream. The stream MAY be wrapped to provide
a more consistent interface or to avoid exposing implementation details.

Data streams are only supported at the top-level input and output for
operations.

:param schema: The shape's schema.
:returns: A data stream derived from the underlying data.
"""
raise UnsupportedStreamException()


class SpecificShapeDeserializer(ShapeDeserializer):
"""Expects to deserialize a specific kind of shape, failing if other shapes are
Expand Down Expand Up @@ -247,6 +264,9 @@ def read_document(self, schema: "Schema") -> "Document":
def read_timestamp(self, schema: "Schema") -> datetime.datetime:
self._invalid_state(schema)

def read_data_stream(self, schema: "Schema") -> "_Stream":
self._invalid_state(schema)


@runtime_checkable
class DeserializeableShape(Protocol):
Expand Down
5 changes: 5 additions & 0 deletions packages/smithy-core/src/smithy_core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ class MissingDependencyException(SmithyException):
class AsyncBodyException(SmithyException):
"""Exception indicating that a request with an async body type was created in a sync
context."""


class UnsupportedStreamException(SmithyException):
"""Indicates that a serializer or deserializer's stream method was called, but data
streams are not supported."""
24 changes: 23 additions & 1 deletion packages/smithy-core/src/smithy_core/interfaces/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from typing import Protocol, runtime_checkable
from asyncio import iscoroutinefunction
from typing import Protocol, runtime_checkable, Any, TypeGuard


class URI(Protocol):
Expand Down Expand Up @@ -58,6 +59,27 @@ class BytesReader(Protocol):
def read(self, size: int = -1, /) -> bytes: ...


def is_bytes_reader(obj: Any) -> TypeGuard[BytesReader]:
"""Determines whether the given object conforms to the BytesReader protocol.

This is necessary to distinguish this from an async reader, since runtime_checkable
doesn't make that distinction.

:param obj: The object to inspect.
"""
return isinstance(obj, BytesReader) and not iscoroutinefunction(
getattr(obj, "read")
)


# A union of all acceptable streaming blob types. Deserialized payloads will
# always return a ByteStream, or AsyncByteStream if async is enabled.
type StreamingBlob = BytesReader | bytes | bytearray


def is_streaming_blob(obj: Any) -> TypeGuard[StreamingBlob]:
"""Determines whether the given object is a StreamingBlob.

:param obj: The object to inspect.
"""
return isinstance(obj, bytes | bytearray) or is_bytes_reader(obj)
28 changes: 27 additions & 1 deletion packages/smithy-core/src/smithy_core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from decimal import Decimal
from typing import TYPE_CHECKING, Never, Protocol, runtime_checkable

from .exceptions import SmithyException
from .exceptions import SmithyException, UnsupportedStreamException

if TYPE_CHECKING:
from .documents import Document
from .schemas import Schema
from .aio.interfaces import StreamingBlob as _Stream


@runtime_checkable
Expand Down Expand Up @@ -198,6 +199,24 @@ def write_document(self, schema: "Schema", value: "Document") -> None:
"""
...

def write_data_stream(self, schema: "Schema", value: "_Stream") -> None:
"""Write a data stream to the output.

If the value is a stream (i.e. not bytes or bytearray) it MUST NOT be read
directly by this method. Such values are intended to only be read as needed when
sending a message, and so should be bound directly to the request / response
type and then read by the transport.

Data streams are only supported at the top-level input and output for
operations.

:param schema: The shape's schema.
:param value: The streaming value to write.
"""
if isinstance(value, bytes | bytearray):
self.write_blob(schema, bytes(value))
raise UnsupportedStreamException()

def flush(self) -> None:
"""Flush the underlying data."""

Expand Down Expand Up @@ -324,6 +343,10 @@ def write_document(self, schema: "Schema", value: "Document") -> None:
self.before(schema).write_document(schema, value)
self.after(schema)

def write_data_stream(self, schema: "Schema", value: "_Stream") -> None:
self.before(schema).write_data_stream(schema, value)
self.after(schema)


class SpecificShapeSerializer(ShapeSerializer):
"""Expects to serialize a specific kind of shape, failing if other shapes are
Expand Down Expand Up @@ -393,6 +416,9 @@ def write_timestamp(self, schema: "Schema", value: datetime.datetime) -> None:
def write_document(self, schema: "Schema", value: "Document") -> None:
self._invalid_state(schema)

def write_data_stream(self, schema: "Schema", value: "_Stream") -> None:
self._invalid_state(schema)


@runtime_checkable
class SerializeableShape(Protocol):
Expand Down
120 changes: 117 additions & 3 deletions packages/smithy-core/src/smithy_core/traits.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
# they're correct regardless, so it's okay if the checks are stripped out.
# ruff: noqa: S101

from dataclasses import dataclass
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, ClassVar
from typing import TYPE_CHECKING, ClassVar, Mapping

from .types import TimestampFormat
from .types import TimestampFormat, PathPattern
from .shapes import ShapeID

if TYPE_CHECKING:
Expand Down Expand Up @@ -193,3 +193,117 @@ def __post_init__(self):
@property
def value(self) -> str:
return self.document_value # type: ignore


# TODO: Get all this moved over to the http package
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it need to be? I don't think we need to put traits in their "respective" packages

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd tend to agree with this, I think the other PR we moved everything into the centralized location. I like that unless we have a technical limitation of keeping them together. Is the concern that we may include traits that aren't needed if the smithy-http package isn't present, or are we hitting typing problems?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's more about fighting against package size inflation, though practically for HTTP it's not much of an issue.

@dataclass(init=False, frozen=True)
class HTTPTrait(Trait, id=ShapeID("smithy.api#http")):
path: PathPattern = field(repr=False, hash=False, compare=False)
code: int = field(repr=False, hash=False, compare=False)
query: str | None = field(default=None, repr=False, hash=False, compare=False)

def __init__(self, value: "DocumentValue | DynamicTrait" = None):
super().__init__(value)
assert isinstance(self.document_value, Mapping)
assert isinstance(self.document_value["method"], str)

code = self.document_value.get("code", 200)
assert isinstance(code, int)
object.__setattr__(self, "code", code)

uri = self.document_value["uri"]
assert isinstance(uri, str)
parts = uri.split("?", 1)

object.__setattr__(self, "path", PathPattern(parts[0]))
object.__setattr__(self, "query", parts[1] if len(parts) == 2 else None)

@property
def method(self) -> str:
return self.document_value["method"] # type: ignore


@dataclass(init=False, frozen=True)
class HTTPErrorTrait(Trait, id=ShapeID("smithy.api#httpError")):
def __post_init__(self):
assert isinstance(self.document_value, int)

@property
def code(self) -> int:
return self.document_value # type: ignore


@dataclass(init=False, frozen=True)
class HTTPHeaderTrait(Trait, id=ShapeID("smithy.api#httpHeader")):
def __post_init__(self):
assert isinstance(self.document_value, str)

@property
def key(self) -> str:
return self.document_value # type: ignore


@dataclass(init=False, frozen=True)
class HTTPLabelTrait(Trait, id=ShapeID("smithy.api#httpLabel")):
def __post_init__(self):
assert self.document_value is None


@dataclass(init=False, frozen=True)
class HTTPPayloadTrait(Trait, id=ShapeID("smithy.api#httpPayload")):
def __post_init__(self):
assert self.document_value is None


@dataclass(init=False, frozen=True)
class HTTPPrefixHeadersTrait(Trait, id=ShapeID("smithy.api#httpPrefixHeaders")):
def __post_init__(self):
assert isinstance(self.document_value, str)

@property
def prefix(self) -> str:
return self.document_value # type: ignore


@dataclass(init=False, frozen=True)
class HTTPQueryTrait(Trait, id=ShapeID("smithy.api#httpQuery")):
def __post_init__(self):
assert isinstance(self.document_value, str)

@property
def key(self) -> str:
return self.document_value # type: ignore


@dataclass(init=False, frozen=True)
class HTTPQueryParamsTrait(Trait, id=ShapeID("smithy.api#httpQueryParams")):
def __post_init__(self):
assert self.document_value is None


@dataclass(init=False, frozen=True)
class HTTPResponseCodeTrait(Trait, id=ShapeID("smithy.api#httpResponseCode")):
def __post_init__(self):
assert self.document_value is None


@dataclass(init=False, frozen=True)
class HTTPChecksumRequiredTrait(Trait, id=ShapeID("smithy.api#httpChecksumRequired")):
def __post_init__(self):
assert self.document_value is None


@dataclass(init=False, frozen=True)
class EndpointTrait(Trait, id=ShapeID("smithy.api#endpoint")):
def __post_init__(self):
assert isinstance(self.document_value, str)

@property
def host_prefix(self) -> str:
return self.document_value["hostPrefix"] # type: ignore


@dataclass(init=False, frozen=True)
class HostLabelTrait(Trait, id=ShapeID("smithy.api#hostLabel")):
def __post_init__(self):
assert self.document_value is None
42 changes: 42 additions & 0 deletions packages/smithy-core/src/smithy_core/types.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import json
import re
from collections.abc import Mapping, Sequence
from datetime import datetime
from email.utils import format_datetime, parsedate_to_datetime
from enum import Enum
from typing import Any
from dataclasses import dataclass

from .exceptions import ExpectationNotMetException
from .utils import (
Expand All @@ -16,6 +18,8 @@
serialize_rfc3339,
)

_GREEDY_LABEL_RE = re.compile(r"\{(\w+)\+\}")

type Document = (
Mapping[str, "Document"] | Sequence["Document"] | str | int | float | bool | None
)
Expand Down Expand Up @@ -111,3 +115,41 @@ def deserialize(self, value: str | float) -> datetime:
return ensure_utc(parsedate_to_datetime(expect_type(str, value)))
case TimestampFormat.DATE_TIME:
return ensure_utc(datetime.fromisoformat(expect_type(str, value)))


@dataclass(init=False, frozen=True)
class PathPattern:
"""A formattable URI path pattern.

The pattern may contain formattable labels, which may be normal labels or greedy
labels. Normal labels forbid path separators, greedy labels allow them.
"""

pattern: str
"""The path component of the URI which is a formattable string."""

greedy_labels: set[str]
"""The pattern labels whose values may contain path separators."""

def __init__(self, pattern: str) -> None:
object.__setattr__(self, "pattern", pattern)
object.__setattr__(
self, "greedy_labels", set(_GREEDY_LABEL_RE.findall(pattern))
)

def format(self, *args: object, **kwargs: str) -> str:
if args:
raise ValueError("PathPattern formatting requires only keyword arguments.")

for key, value in kwargs.items():
if "/" in value and key not in self.greedy_labels:
raise ValueError(
'Non-greedy labels must not contain path separators ("/").'
)

result = self.pattern.replace("+}", "}").format(**kwargs)
if "//" in result:
raise ValueError(
f'Path must not contain empty segments, but was "{result}".'
)
return result
Loading
Loading