22from typing import TYPE_CHECKING , Any , overload
33
44from key_value .shared .utils .managed_entry import ManagedEntry
5+ from key_value .shared .utils .sanitization import SanitizationStrategy
56from key_value .shared .utils .sanitize import hash_excess_length
67from typing_extensions import Self , override
78
3334 from aiobotocore .client import AioBaseClient as S3Client
3435
3536
37+ class S3KeySanitizationStrategy (SanitizationStrategy ):
38+ """Sanitization strategy for S3 keys with byte-aware length limits.
39+
40+ S3 has a maximum key length of 1024 bytes (UTF-8 encoded). This strategy
41+ hashes keys that exceed the specified byte limit to ensure compliance.
42+
43+ Args:
44+ max_bytes: Maximum key length in bytes. Defaults to 500.
45+ """
46+
47+ def __init__ (self , max_bytes : int = MAX_KEY_LENGTH ) -> None :
48+ """Initialize the S3 key sanitization strategy.
49+
50+ Args:
51+ max_bytes: Maximum key length in bytes.
52+ """
53+ self .max_bytes = max_bytes
54+
55+ def sanitize (self , value : str ) -> str :
56+ """Hash the value if it exceeds max_bytes when UTF-8 encoded.
57+
58+ Args:
59+ value: The key to sanitize.
60+
61+ Returns:
62+ The original value if within limit, or truncated+hashed if too long.
63+ """
64+ return hash_excess_length (value , self .max_bytes , length_is_bytes = True )
65+
66+ def validate (self , value : str ) -> None :
67+ """No validation needed for S3 keys."""
68+
69+
70+ class S3CollectionSanitizationStrategy (S3KeySanitizationStrategy ):
71+ """Sanitization strategy for S3 collection names with byte-aware length limits.
72+
73+ This is identical to S3KeySanitizationStrategy but uses a default of 500 bytes
74+ for collection names to match the S3 key format {collection}/{key}.
75+ """
76+
77+ def __init__ (self , max_bytes : int = MAX_COLLECTION_LENGTH ) -> None :
78+ """Initialize the S3 collection sanitization strategy.
79+
80+ Args:
81+ max_bytes: Maximum collection name length in bytes.
82+ """
83+ super ().__init__ (max_bytes = max_bytes )
84+
85+
3686class S3Store (BaseContextManagerStore , BaseStore ):
3787 """AWS S3-based key-value store.
3888
@@ -42,13 +92,28 @@ class S3Store(BaseContextManagerStore, BaseStore):
4292 S3 object metadata and checked client-side during retrieval (S3 lifecycle policies
4393 can be configured separately for background cleanup, but don't provide atomic TTL+retrieval).
4494
95+ By default, collections and keys are not sanitized. This means you must ensure that
96+ the combined "{collection}/{key}" path does not exceed S3's 1024-byte limit when UTF-8 encoded.
97+
98+ To handle long collection or key names, use the S3CollectionSanitizationStrategy and
99+ S3KeySanitizationStrategy which will hash values exceeding the byte limit.
100+
45101 Example:
46102 Basic usage with automatic AWS credentials:
47103
48104 >>> async with S3Store(bucket_name="my-kv-store") as store:
49105 ... await store.put(key="user:123", value={"name": "Alice"}, ttl=3600)
50106 ... user = await store.get(key="user:123")
51107
108+ With sanitization for long keys/collections:
109+
110+ >>> async with S3Store(
111+ ... bucket_name="my-kv-store",
112+ ... collection_sanitization_strategy=S3CollectionSanitizationStrategy(),
113+ ... key_sanitization_strategy=S3KeySanitizationStrategy(),
114+ ... ) as store:
115+ ... await store.put(key="very_long_key" * 100, value={"data": "test"})
116+
52117 With custom AWS credentials:
53118
54119 >>> async with S3Store(
@@ -74,13 +139,23 @@ class S3Store(BaseContextManagerStore, BaseStore):
74139 _client : S3Client | None
75140
76141 @overload
77- def __init__ (self , * , client : S3Client , bucket_name : str , default_collection : str | None = None ) -> None :
142+ def __init__ (
143+ self ,
144+ * ,
145+ client : S3Client ,
146+ bucket_name : str ,
147+ default_collection : str | None = None ,
148+ collection_sanitization_strategy : SanitizationStrategy | None = None ,
149+ key_sanitization_strategy : SanitizationStrategy | None = None ,
150+ ) -> None :
78151 """Initialize the S3 store with a pre-configured client.
79152
80153 Args:
81154 client: The S3 client to use. You must have entered the context manager before passing this in.
82155 bucket_name: The name of the S3 bucket to use.
83156 default_collection: The default collection to use if no collection is provided.
157+ collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
158+ key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
84159 """
85160
86161 @overload
@@ -94,6 +169,8 @@ def __init__(
94169 aws_secret_access_key : str | None = None ,
95170 aws_session_token : str | None = None ,
96171 default_collection : str | None = None ,
172+ collection_sanitization_strategy : SanitizationStrategy | None = None ,
173+ key_sanitization_strategy : SanitizationStrategy | None = None ,
97174 ) -> None :
98175 """Initialize the S3 store with AWS credentials.
99176
@@ -105,6 +182,8 @@ def __init__(
105182 aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials).
106183 aws_session_token: AWS session token. Defaults to None (uses AWS default credentials).
107184 default_collection: The default collection to use if no collection is provided.
185+ collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
186+ key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
108187 """
109188
110189 def __init__ (
@@ -118,6 +197,8 @@ def __init__(
118197 aws_secret_access_key : str | None = None ,
119198 aws_session_token : str | None = None ,
120199 default_collection : str | None = None ,
200+ collection_sanitization_strategy : SanitizationStrategy | None = None ,
201+ key_sanitization_strategy : SanitizationStrategy | None = None ,
121202 ) -> None :
122203 """Initialize the S3 store.
123204
@@ -130,6 +211,8 @@ def __init__(
130211 aws_secret_access_key: AWS secret access key. Defaults to None (uses AWS default credentials).
131212 aws_session_token: AWS session token. Defaults to None (uses AWS default credentials).
132213 default_collection: The default collection to use if no collection is provided.
214+ collection_sanitization_strategy: Strategy for sanitizing collection names. Defaults to None (no sanitization).
215+ key_sanitization_strategy: Strategy for sanitizing keys. Defaults to None (no sanitization).
133216 """
134217 self ._bucket_name = bucket_name
135218 self ._endpoint_url = endpoint_url
@@ -148,7 +231,11 @@ def __init__(
148231 self ._raw_client = session .client (service_name = "s3" , endpoint_url = endpoint_url ) # pyright: ignore[reportUnknownMemberType]
149232 self ._client = None
150233
151- super ().__init__ (default_collection = default_collection )
234+ super ().__init__ (
235+ default_collection = default_collection ,
236+ collection_sanitization_strategy = collection_sanitization_strategy ,
237+ key_sanitization_strategy = key_sanitization_strategy ,
238+ )
152239
153240 async def _connect (self ) -> None :
154241 if self ._client is None and self ._raw_client :
@@ -230,8 +317,8 @@ async def _setup(self) -> None:
230317 def _get_s3_key (self , * , collection : str , key : str ) -> str :
231318 """Generate the S3 object key for a given collection and key.
232319
233- S3 has a maximum key length of 1024 bytes. To ensure compliance, we hash
234- long collection or key names to stay within limits while maintaining uniqueness .
320+ The collection and key are sanitized using the configured sanitization strategies
321+ before being combined into the S3 object key format: {collection}/{key} .
235322
236323 Args:
237324 collection: The collection name.
@@ -240,11 +327,9 @@ def _get_s3_key(self, *, collection: str, key: str) -> str:
240327 Returns:
241328 The S3 object key in format: {collection}/{key}
242329 """
243- # Hash collection and key if they exceed their max byte lengths
244- # This ensures the combined S3 key stays under 1024 bytes
245- safe_collection = hash_excess_length (collection , MAX_COLLECTION_LENGTH , length_is_bytes = True )
246- safe_key = hash_excess_length (key , MAX_KEY_LENGTH , length_is_bytes = True )
247- return f"{ safe_collection } /{ safe_key } "
330+ # Use the sanitization strategies from BaseStore
331+ sanitized_collection , sanitized_key = self ._sanitize_collection_and_key (collection = collection , key = key )
332+ return f"{ sanitized_collection } /{ sanitized_key } "
248333
249334 @override
250335 async def _get_managed_entry (self , * , key : str , collection : str ) -> ManagedEntry | None :
0 commit comments