Skip to content

Commit 065b551

Browse files
refactor: align S3 store with sanitization strategy pattern
- Add S3KeySanitizationStrategy and S3CollectionSanitizationStrategy classes - Remove automatic sanitization from _get_s3_key() - Add collection_sanitization_strategy and key_sanitization_strategy parameters to S3Store - Update S3Store to use BaseStore's _sanitize_collection_and_key() method - By default, collections/keys are not sanitized (matches new main pattern) - Update tests to use sanitization strategies - Update documentation to explain when/why to use strategies This aligns with the main branch refactor where stores no longer sanitize collections and keys by default, making sanitization opt-in via strategy parameters. Co-authored-by: William Easton <strawgate@users.noreply.github.com>
1 parent e0f4529 commit 065b551

File tree

3 files changed

+109
-11
lines changed

3 files changed

+109
-11
lines changed
Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
"""AWS S3-based key-value store."""
22

3-
from key_value.aio.stores.s3.store import S3Store
3+
from key_value.aio.stores.s3.store import (
4+
S3CollectionSanitizationStrategy,
5+
S3KeySanitizationStrategy,
6+
S3Store,
7+
)
48

5-
__all__ = ["S3Store"]
9+
__all__ = [
10+
"S3CollectionSanitizationStrategy",
11+
"S3KeySanitizationStrategy",
12+
"S3Store",
13+
]

key-value/key-value-aio/src/key_value/aio/stores/s3/store.py

Lines changed: 94 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import TYPE_CHECKING, Any, overload
33

44
from key_value.shared.utils.managed_entry import ManagedEntry
5+
from key_value.shared.utils.sanitization import SanitizationStrategy
56
from key_value.shared.utils.sanitize import hash_excess_length
67
from typing_extensions import Self, override
78

@@ -33,6 +34,55 @@
3334
from aiobotocore.client import AioBaseClient as S3Client
3435

3536

37+
class S3KeySanitizationStrategy(SanitizationStrategy):
38+
"""Sanitization strategy for S3 keys with byte-aware length limits.
39+
40+
S3 has a maximum key length of 1024 bytes (UTF-8 encoded). This strategy
41+
hashes keys that exceed the specified byte limit to ensure compliance.
42+
43+
Args:
44+
max_bytes: Maximum key length in bytes. Defaults to 500.
45+
"""
46+
47+
def __init__(self, max_bytes: int = MAX_KEY_LENGTH) -> None:
48+
"""Initialize the S3 key sanitization strategy.
49+
50+
Args:
51+
max_bytes: Maximum key length in bytes.
52+
"""
53+
self.max_bytes = max_bytes
54+
55+
def sanitize(self, value: str) -> str:
56+
"""Hash the value if it exceeds max_bytes when UTF-8 encoded.
57+
58+
Args:
59+
value: The key to sanitize.
60+
61+
Returns:
62+
The original value if within limit, or truncated+hashed if too long.
63+
"""
64+
return hash_excess_length(value, self.max_bytes, length_is_bytes=True)
65+
66+
def validate(self, value: str) -> None:
67+
"""No validation needed for S3 keys."""
68+
69+
70+
class S3CollectionSanitizationStrategy(S3KeySanitizationStrategy):
71+
"""Sanitization strategy for S3 collection names with byte-aware length limits.
72+
73+
This is identical to S3KeySanitizationStrategy but uses a default of 500 bytes
74+
for collection names to match the S3 key format {collection}/{key}.
75+
"""
76+
77+
def __init__(self, max_bytes: int = MAX_COLLECTION_LENGTH) -> None:
78+
"""Initialize the S3 collection sanitization strategy.
79+
80+
Args:
81+
max_bytes: Maximum collection name length in bytes.
82+
"""
83+
super().__init__(max_bytes=max_bytes)
84+
85+
3686
class S3Store(BaseContextManagerStore, BaseStore):
3787
"""AWS S3-based key-value store.
3888
@@ -42,13 +92,28 @@ class S3Store(BaseContextManagerStore, BaseStore):
4292
S3 object metadata and checked client-side during retrieval (S3 lifecycle policies
4393
can be configured separately for background cleanup, but don't provide atomic TTL+retrieval).
4494
95+
By default, collections and keys are not sanitized. This means you must ensure that
96+
the combined "{collection}/{key}" path does not exceed S3's 1024-byte limit when UTF-8 encoded.
97+
98+
To handle long collection or key names, use the S3CollectionSanitizationStrategy and
99+
S3KeySanitizationStrategy which will hash values exceeding the byte limit.
100+
45101
Example:
46102
Basic usage with automatic AWS credentials:
47103
48104
>>> async with S3Store(bucket_name="my-kv-store") as store:
49105
... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600)
50106
... user = await store.get(key="user:123")
51107
108+
With sanitization for long keys/collections:
109+
110+
>>> async with S3Store(
111+
... bucket_name="my-kv-store",
112+
... collection_sanitization_strategy=S3CollectionSanitizationStrategy(),
113+
... key_sanitization_strategy=S3KeySanitizationStrategy(),
114+
... ) as store:
115+
... await store.put(key="very_long_key" * 100, value={"data": "test"})
116+
52117
With custom AWS credentials:
53118
54119
>>> async with S3Store(
@@ -74,13 +139,23 @@ class S3Store(BaseContextManagerStore, BaseStore):
74139
_client: S3Client | None
75140

76141
@overload
77-
def __init__(self, *, client: S3Client, bucket_name: str, default_collection: str | None = None) -> None:
142+
def __init__(
143+
self,
144+
*,
145+
client: S3Client,
146+
bucket_name: str,
147+
default_collection: str | None = None,
148+
collection_sanitization_strategy: SanitizationStrategy | None = None,
149+
key_sanitization_strategy: SanitizationStrategy | None = None,
150+
) -> None:
78151
"""Initialize the S3 store with a pre-configured client.
79152
80153
Args:
81154
client: The S3 client to use. You must have entered the context manager before passing this in.
82155
bucket_name: The name of the S3 bucket to use.
83156
default_collection: The default collection to use if no collection is provided.
157+
collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
158+
key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
84159
"""
85160

86161
@overload
@@ -94,6 +169,8 @@ def __init__(
94169
aws_secret_access_key: str | None = None,
95170
aws_session_token: str | None = None,
96171
default_collection: str | None = None,
172+
collection_sanitization_strategy: SanitizationStrategy | None = None,
173+
key_sanitization_strategy: SanitizationStrategy | None = None,
97174
) -> None:
98175
"""Initialize the S3 store with AWS credentials.
99176
@@ -105,6 +182,8 @@ def __init__(
105182
aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials).
106183
aws_session_token: AWS session token. Defaults to None (uses AWS default credentials).
107184
default_collection: The default collection to use if no collection is provided.
185+
collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
186+
key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
108187
"""
109188

110189
def __init__(
@@ -118,6 +197,8 @@ def __init__(
118197
aws_secret_access_key: str | None = None,
119198
aws_session_token: str | None = None,
120199
default_collection: str | None = None,
200+
collection_sanitization_strategy: SanitizationStrategy | None = None,
201+
key_sanitization_strategy: SanitizationStrategy | None = None,
121202
) -> None:
122203
"""Initialize the S3 store.
123204
@@ -130,6 +211,8 @@ def __init__(
130211
aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials).
131212
aws_session_token: AWS session token. Defaults to None (uses AWS default credentials).
132213
default_collection: The default collection to use if no collection is provided.
214+
collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
215+
key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
133216
"""
134217
self._bucket_name = bucket_name
135218
self._endpoint_url = endpoint_url
@@ -148,7 +231,11 @@ def __init__(
148231
self._raw_client = session.client(service_name="s3", endpoint_url=endpoint_url) # pyright: ignore[reportUnknownMemberType]
149232
self._client = None
150233

151-
super().__init__(default_collection=default_collection)
234+
super().__init__(
235+
default_collection=default_collection,
236+
collection_sanitization_strategy=collection_sanitization_strategy,
237+
key_sanitization_strategy=key_sanitization_strategy,
238+
)
152239

153240
async def _connect(self) -> None:
154241
if self._client is None and self._raw_client:
@@ -230,8 +317,8 @@ async def _setup(self) -> None:
230317
def _get_s3_key(self, *, collection: str, key: str) -> str:
231318
"""Generate the S3 object key for a given collection and key.
232319
233-
S3 has a maximum key length of 1024 bytes. To ensure compliance, we hash
234-
long collection or key names to stay within limits while maintaining uniqueness.
320+
The collection and key are sanitized using the configured sanitization strategies
321+
before being combined into the S3 object key format: {collection}/{key}.
235322
236323
Args:
237324
collection: The collection name.
@@ -240,11 +327,9 @@ def _get_s3_key(self, *, collection: str, key: str) -> str:
240327
Returns:
241328
The S3 object key in format: {collection}/{key}
242329
"""
243-
# Hash collection and key if they exceed their max byte lengths
244-
# This ensures the combined S3 key stays under 1024 bytes
245-
safe_collection = hash_excess_length(collection, MAX_COLLECTION_LENGTH, length_is_bytes=True)
246-
safe_key = hash_excess_length(key, MAX_KEY_LENGTH, length_is_bytes=True)
247-
return f"{safe_collection}/{safe_key}"
330+
# Use the sanitization strategies from BaseStore
331+
sanitized_collection, sanitized_key = self._sanitize_collection_and_key(collection=collection, key=key)
332+
return f"{sanitized_collection}/{sanitized_key}"
248333

249334
@override
250335
async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None:

key-value/key-value-aio/tests/stores/s3/test_s3.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,17 @@ async def setup_s3(self, request: pytest.FixtureRequest) -> AsyncGenerator[None,
7070
@override
7171
@pytest.fixture
7272
async def store(self, setup_s3: None) -> S3Store:
73+
from key_value.aio.stores.s3 import S3CollectionSanitizationStrategy, S3KeySanitizationStrategy
74+
7375
store = S3Store(
7476
bucket_name=S3_TEST_BUCKET,
7577
endpoint_url=S3_ENDPOINT,
7678
aws_access_key_id="test",
7779
aws_secret_access_key="test",
7880
region_name="us-east-1",
81+
# Use sanitization strategies for tests to handle long collection/key names
82+
collection_sanitization_strategy=S3CollectionSanitizationStrategy(),
83+
key_sanitization_strategy=S3KeySanitizationStrategy(),
7984
)
8085

8186
# Clean up test bucket if it exists

0 commit comments

Comments
 (0)