diff --git a/key-value/key-value-aio/pyproject.toml b/key-value/key-value-aio/pyproject.toml index 61c591e4..8f41827c 100644 --- a/key-value/key-value-aio/pyproject.toml +++ b/key-value/key-value-aio/pyproject.toml @@ -49,6 +49,7 @@ rocksdb = [ "rocksdict>=0.3.24 ; python_version >= '3.12'", # RocksDB 0.3.24 is the first version to support Python 3.13 "rocksdict>=0.3.2 ; python_version < '3.12'" ] +duckdb = ["duckdb>=1.1.1", "pytz>=2025.2"] wrappers-encryption = ["cryptography>=45.0.0"] [tool.pytest.ini_options] @@ -68,7 +69,7 @@ env_files = [".env"] [dependency-groups] dev = [ - "py-key-value-aio[memory,disk,filetree,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb]", + "py-key-value-aio[memory,disk,filetree,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb,duckdb]", "py-key-value-aio[valkey]; platform_system != 'Windows'", "py-key-value-aio[keyring]", "py-key-value-aio[pydantic]", diff --git a/key-value/key-value-aio/src/key_value/aio/stores/duckdb/__init__.py b/key-value/key-value-aio/src/key_value/aio/stores/duckdb/__init__.py new file mode 100644 index 00000000..797a7fb4 --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/duckdb/__init__.py @@ -0,0 +1,3 @@ +from key_value.aio.stores.duckdb.store import DuckDBStore + +__all__ = ["DuckDBStore"] diff --git a/key-value/key-value-aio/src/key_value/aio/stores/duckdb/store.py b/key-value/key-value-aio/src/key_value/aio/stores/duckdb/store.py new file mode 100644 index 00000000..bb4ff890 --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/duckdb/store.py @@ -0,0 +1,380 @@ +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, overload + +from key_value.shared.errors import DeserializationError +from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.serialization import SerializationAdapter +from typing_extensions import override + +from key_value.aio.stores.base import SEED_DATA_TYPE, BaseContextManagerStore, BaseStore + +try: + import duckdb +except ImportError as e: + msg = "DuckDBStore requires the duckdb extra from py-key-value-aio or py-key-value-sync" + raise ImportError(msg) from e + + +class DuckDBSerializationAdapter(SerializationAdapter): + """Adapter for DuckDB with native JSON storage.""" + + def __init__(self) -> None: + """Initialize the DuckDB adapter.""" + super().__init__() + + self._date_format = "datetime" + + @override + def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: + """Prepare data for dumping to DuckDB.""" + return data + + @override + def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: + """Prepare data loaded from DuckDB for conversion to ManagedEntry. + + Handles timezone conversion for DuckDB's naive timestamps. + """ + # DuckDB always returns naive timestamps, but ManagedEntry expects timezone-aware ones + self._convert_timestamps_to_utc(data) + + return data + + def _convert_timestamps_to_utc(self, data: dict[str, Any]) -> None: + """Convert naive timestamps to UTC timezone-aware timestamps.""" + created_at = data.get("created_at") + if created_at is not None and isinstance(created_at, datetime): + if created_at.tzinfo is None: + data["created_at"] = created_at.replace(tzinfo=timezone.utc) + else: + data["created_at"] = created_at.astimezone(tz=timezone.utc) + + expires_at = data.get("expires_at") + if expires_at is not None and isinstance(expires_at, datetime): + if expires_at.tzinfo is None: + data["expires_at"] = expires_at.replace(tzinfo=timezone.utc) + else: + data["expires_at"] = expires_at.astimezone(tz=timezone.utc) + + +class DuckDBStore(BaseContextManagerStore, BaseStore): + """A DuckDB-based key-value store supporting both in-memory and persistent storage. + + DuckDB is an in-process SQL OLAP database that provides excellent performance + for analytical workloads while supporting standard SQL operations. This store + can operate in memory-only mode or persist data to disk. + + The store uses native DuckDB types (JSON, TIMESTAMP) to enable efficient SQL queries + on stored data. Users can query the database directly for analytics or data exploration. + + Values are stored in a JSON column as native dicts, allowing direct SQL queries + on the stored data for analytics and reporting. + + Note on connection ownership: When you provide an existing connection, the store + will take ownership and close it when the store is closed or garbage collected. + If you need to reuse a connection, create separate DuckDB connections for each store. + """ + + _connection: duckdb.DuckDBPyConnection + _is_closed: bool + _owns_connection: bool + _adapter: SerializationAdapter + _table_name: str + + @overload + def __init__( + self, + *, + connection: duckdb.DuckDBPyConnection, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store with an existing connection. + + Warning: The store will take ownership of the connection and close it + when the store is closed or garbage collected. If you need to reuse + a connection, create separate DuckDB connections for each store. + + Args: + connection: An existing DuckDB connection to use. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + + @overload + def __init__( + self, + *, + database_path: Path | str | None = None, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store with a database path. + + Args: + database_path: Path to the database file. If None or ':memory:', uses in-memory database. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + + def __init__( + self, + *, + connection: duckdb.DuckDBPyConnection | None = None, + database_path: Path | str | None = None, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store. + + Args: + connection: An existing DuckDB connection to use. + database_path: Path to the database file. If None or ':memory:', uses in-memory database. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + if connection is not None and database_path is not None: + msg = "Provide only one of connection or database_path" + raise ValueError(msg) + + if connection is not None: + self._connection = connection + self._owns_connection = True # We take ownership even of provided connections + else: + # Convert Path to string if needed + if isinstance(database_path, Path): + database_path = str(database_path) + + # Use in-memory database if no path specified + if database_path is None or database_path == ":memory:": + self._connection = duckdb.connect(":memory:") + else: + self._connection = duckdb.connect(database=database_path) + self._owns_connection = True + + self._is_closed = False + self._adapter = DuckDBSerializationAdapter() + + # Validate table name to prevent SQL injection + if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", table_name): + msg = "Table name must start with a letter or underscore and contain only letters, digits, or underscores" + raise ValueError(msg) + self._table_name = table_name + self._stable_api = False + + super().__init__(default_collection=default_collection, seed=seed) + + def _get_create_table_sql(self) -> str: + """Generate SQL for creating the key-value entries table. + + Returns: + SQL CREATE TABLE statement. + """ + return f""" + CREATE TABLE IF NOT EXISTS {self._table_name} ( + collection VARCHAR NOT NULL, + key VARCHAR NOT NULL, + value JSON NOT NULL, + created_at TIMESTAMPTZ, + expires_at TIMESTAMPTZ, + version INT NOT NULL, + PRIMARY KEY (collection, key) + ) + """ + + def _get_create_collection_index_sql(self) -> str: + """Generate SQL for creating index on collection column. + + Returns: + SQL CREATE INDEX statement. + """ + return f""" + CREATE INDEX IF NOT EXISTS idx_{self._table_name}_collection + ON {self._table_name}(collection) + """ + + def _get_create_expires_index_sql(self) -> str: + """Generate SQL for creating index on expires_at column. + + Returns: + SQL CREATE INDEX statement. + """ + return f""" + CREATE INDEX IF NOT EXISTS idx_{self._table_name}_expires_at + ON {self._table_name}(expires_at) + """ + + def _get_select_sql(self) -> str: + """Generate SQL for selecting an entry by collection and key. + + Returns: + SQL SELECT statement with placeholders. + """ + return f""" + SELECT value, created_at, expires_at, version + FROM {self._table_name} + WHERE collection = ? AND key = ? + """ # noqa: S608 + + def _get_insert_sql(self) -> str: + """Generate SQL for inserting or replacing an entry. + + Returns: + SQL INSERT OR REPLACE statement with placeholders. + """ + return f""" + INSERT OR REPLACE INTO {self._table_name} + (collection, key, value, created_at, expires_at, version) + VALUES (?, ?, ?, ?, ?, ?) + """ # noqa: S608 + + def _get_delete_sql(self) -> str: + """Generate SQL for deleting an entry by collection and key. + + Returns: + SQL DELETE statement with RETURNING clause. + """ + return f""" + DELETE FROM {self._table_name} + WHERE collection = ? AND key = ? + RETURNING key + """ # noqa: S608 + + @override + async def _setup(self) -> None: + """Initialize the database schema for key-value storage. + + The schema uses native DuckDB types for efficient querying: + - value: JSON column storing native dicts for queryability + - created_at: TIMESTAMP for native datetime operations + - expires_at: TIMESTAMP for native expiration queries + + This design enables: + - Direct SQL queries on the database for analytics + - Efficient expiration cleanup: DELETE FROM table WHERE expires_at < now() + - Metadata queries without JSON deserialization + - Native JSON column support for rich querying capabilities + """ + # Create the main table for storing key-value entries + self._connection.execute(self._get_create_table_sql()) + + # Create index for efficient collection queries + self._connection.execute(self._get_create_collection_index_sql()) + + # Create index for expiration-based queries + self._connection.execute(self._get_create_expires_index_sql()) + + @override + async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + """Retrieve a managed entry by key from the specified collection. + + Reconstructs the ManagedEntry from value column and metadata columns + using the serialization adapter. + """ + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + result = self._connection.execute( + self._get_select_sql(), + [collection, key], + ).fetchone() + + if result is None: + return None + + value, created_at, expires_at, version = result + + # Build document dict for the adapter + document: dict[str, Any] = { + "value": value, + "created_at": created_at, + "expires_at": expires_at, + "version": version, + } + + document = {k: v for k, v in document.items() if v is not None} + + try: + managed_entry = self._adapter.load_dict(data=document) + except DeserializationError: + return None + + return managed_entry + + @override + async def _put_managed_entry( + self, + *, + key: str, + collection: str, + managed_entry: ManagedEntry, + ) -> None: + """Store a managed entry by key in the specified collection. + + Uses the serialization adapter to convert the ManagedEntry to the + appropriate storage format. + """ + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + # Ensure that the value is serializable to JSON + _ = managed_entry.value_as_json + + # Use adapter to dump the managed entry to a dict with key and collection + document = self._adapter.dump_dict(entry=managed_entry, key=key, collection=collection) + + # Insert or replace the entry with metadata in separate columns + self._connection.execute( + self._get_insert_sql(), + [ + collection, + key, + document["value"], + document.get("created_at"), + document.get("expires_at"), + document.get("version"), + ], + ) + + @override + async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + """Delete a managed entry by key from the specified collection.""" + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + result = self._connection.execute( + self._get_delete_sql(), + [collection, key], + ) + + # Check if any rows were deleted by counting returned rows + deleted_rows = result.fetchall() + return len(deleted_rows) > 0 + + @override + async def _close(self) -> None: + """Close the DuckDB connection.""" + if not self._is_closed and self._owns_connection: + self._connection.close() + self._is_closed = True + + def __del__(self) -> None: + """Clean up the DuckDB connection on deletion.""" + try: + if not self._is_closed and self._owns_connection and hasattr(self, "_connection"): + self._connection.close() + self._is_closed = True + except Exception: # noqa: S110 + # Suppress errors during cleanup to avoid issues during interpreter shutdown + pass diff --git a/key-value/key-value-aio/tests/stores/duckdb/__init__.py b/key-value/key-value-aio/tests/stores/duckdb/__init__.py new file mode 100644 index 00000000..d735ddb5 --- /dev/null +++ b/key-value/key-value-aio/tests/stores/duckdb/__init__.py @@ -0,0 +1 @@ +# DuckDB store tests diff --git a/key-value/key-value-aio/tests/stores/duckdb/test_duckdb.py b/key-value/key-value-aio/tests/stores/duckdb/test_duckdb.py new file mode 100644 index 00000000..9b890e8d --- /dev/null +++ b/key-value/key-value-aio/tests/stores/duckdb/test_duckdb.py @@ -0,0 +1,217 @@ +from collections.abc import AsyncGenerator +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +from duckdb import CatalogException, DuckDBPyConnection +from inline_snapshot import snapshot +from typing_extensions import override + +from key_value.aio.stores.base import BaseStore +from key_value.aio.stores.duckdb import DuckDBStore +from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + + +def get_client_from_store(store: DuckDBStore) -> DuckDBPyConnection: + return store._connection # pyright: ignore[reportPrivateUsage] + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStore(ContextManagerStoreTestMixin, BaseStoreTests): + @override + @pytest.fixture + async def store(self) -> AsyncGenerator[DuckDBStore, None]: + """Test with in-memory DuckDB database.""" + duckdb_store = DuckDBStore() + yield duckdb_store + await duckdb_store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + async def test_not_unbounded(self, store: BaseStore): ... + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStorePersistent(ContextManagerStoreTestMixin, BaseStoreTests): + @override + @pytest.fixture + async def store(self) -> AsyncGenerator[DuckDBStore, None]: + """Test with persistent DuckDB database file.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test.db" + duckdb_store = DuckDBStore(database_path=db_path) + yield duckdb_store + await duckdb_store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + async def test_not_unbounded(self, store: BaseStore): ... + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStoreSpecific: + """Test DuckDB-specific functionality.""" + + @pytest.fixture + async def store(self) -> AsyncGenerator[DuckDBStore, None]: + """Provide DuckDB store instance.""" + duckdb_store = DuckDBStore() + yield duckdb_store + await duckdb_store.close() + + async def test_native_sql_queryability(self): + """Test that users can query the database directly with SQL.""" + store = DuckDBStore() + + # Store some test data with known metadata + await store.put(collection="products", key="item1", value={"name": "Widget", "price": 10.99}, ttl=3600) + await store.put(collection="products", key="item2", value={"name": "Gadget", "price": 25.50}, ttl=7200) + await store.put(collection="orders", key="order1", value={"total": 100.00, "items": 3}) + + # Query directly via SQL to verify native storage + # Check that value is stored as JSON (can extract fields) + result = ( + get_client_from_store(store) + .execute(""" + SELECT key, value->'name' as name, value->'price' as price + FROM kv_entries + WHERE collection = 'products' + ORDER BY key + """) + .fetchall() + ) # pyright: ignore[reportPrivateUsage] + + assert len(result) == 2 + assert result[0][0] == "item1" + assert result[0][1] == '"Widget"' # JSON strings are quoted + assert result[1][0] == "item2" + + # Query by expiration timestamp + count_result = ( + get_client_from_store(store) + .execute(""" + SELECT COUNT(*) + FROM kv_entries + WHERE expires_at > now() OR expires_at IS NULL + """) + .fetchone() + ) # pyright: ignore[reportPrivateUsage] + + assert count_result is not None + assert count_result[0] == 3 # All 3 entries should not be expired + + await store.close() + + async def test_database_path_initialization(self): + """Test that store can be initialized with different database path options.""" + # In-memory (default) + store1 = DuckDBStore() + await store1.put(collection="test", key="key1", value={"test": "value1"}) + result1 = await store1.get(collection="test", key="key1") + assert result1 == {"test": "value1"} + await store1.close() + + # Explicit in-memory + store2 = DuckDBStore(database_path=":memory:") + await store2.put(collection="test", key="key2", value={"test": "value2"}) + result2 = await store2.get(collection="test", key="key2") + assert result2 == {"test": "value2"} + await store2.close() + + async def test_persistent_database(self): + """Test that data persists across store instances when using file database.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "persist_test.db" + + # Store data in first instance + store1 = DuckDBStore(database_path=db_path) + await store1.put(collection="test", key="persist_key", value={"data": "persistent"}) + await store1.close() + + # Create second instance with same database file + store2 = DuckDBStore(database_path=db_path) + result = await store2.get(collection="test", key="persist_key") + await store2.close() + + assert result == {"data": "persistent"} + + async def test_sql_injection_protection(self, store: DuckDBStore): + """Test that the store is protected against SQL injection attacks.""" + malicious_collection = "test'; DROP TABLE kv_entries; --" + malicious_key = "key'; DELETE FROM kv_entries; --" + + # These operations should not cause SQL injection + await store.put(collection=malicious_collection, key=malicious_key, value={"safe": "data"}) + result = await store.get(collection=malicious_collection, key=malicious_key) + assert result == {"safe": "data"} + + # Verify the table still exists and other data is safe + await store.put(collection="normal", key="normal_key", value={"normal": "data"}) + normal_result = await store.get(collection="normal", key="normal_key") + assert normal_result == {"normal": "data"} + + async def test_large_data_storage(self, store: DuckDBStore): + """Test storing and retrieving large data values.""" + # Create a large value (1MB of data) + large_value = {"large_data": "x" * (1024 * 1024)} + + await store.put(collection="test", key="large_key", value=large_value) + result = await store.get(collection="test", key="large_key") + + assert result == large_value + + async def test_unicode_support(self, store: DuckDBStore): + """Test that the store properly handles Unicode characters.""" + unicode_data = { + "english": "Hello World", + "chinese": "你好世界", + "japanese": "こんにちは世界", + "arabic": "مرحبا بالعالم", + "emoji": "🌍🚀💻", + "special": "Special chars: !@#$%^&*()_+-={}[]|\\:;\"'<>?,./", + } + + await store.put(collection="unicode_test", key="unicode_key", value=unicode_data) + result = await store.get(collection="unicode_test", key="unicode_key") + + assert result == unicode_data + + async def test_connection_initialization(self): + """Test that store can be initialized with existing DuckDB connection.""" + import duckdb + + conn = duckdb.connect(":memory:") + store = DuckDBStore(connection=conn) + + await store.put(collection="test", key="conn_test", value={"test": "value"}) + result = await store.get(collection="test", key="conn_test") + assert result == {"test": "value"} + + await store.close() + + async def test_custom_table_name(self): + """Test that store can use custom table name.""" + custom_table = "my_custom_kv_table" + store = DuckDBStore(table_name=custom_table) + + # Store some data + await store.put(collection="test", key="key1", value={"data": "value"}) + + # Verify the custom table exists and contains the data + tables = ( + get_client_from_store(store) + .table(custom_table) + .filter(filter_expr="key = 'key1'") + .select("key", "collection") + .execute() + .fetchone() + ) + + assert tables == snapshot(("key1", "test")) + + # Verify default table doesn't exist + with pytest.raises(CatalogException): + get_client_from_store(store).table("kv_entries") + + await store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + async def test_not_unbounded(self, store: BaseStore): ... diff --git a/key-value/key-value-sync/pyproject.toml b/key-value/key-value-sync/pyproject.toml index c268ed04..50770808 100644 --- a/key-value/key-value-sync/pyproject.toml +++ b/key-value/key-value-sync/pyproject.toml @@ -34,6 +34,7 @@ module-name = "key_value.sync" [project.optional-dependencies] memory = ["cachetools>=5.0.0"] disk = ["diskcache>=5.0.0", "pathvalidate>=3.3.1",] +filetree = ["anyio>=4.4.0"] redis = ["redis>=4.3.0"] mongodb = ["pymongo>=4.0.0"] valkey = ["valkey-glide-sync>=2.1.0"] @@ -47,6 +48,7 @@ rocksdb = [ "rocksdict>=0.3.24 ; python_version >= '3.12'", # RocksDB 0.3.24 is the first version to support Python 3.13 "rocksdict>=0.3.2 ; python_version < '3.12'" ] +duckdb = ["duckdb>=1.1.1", "pytz>=2025.2"] wrappers-encryption = ["cryptography>=45.0.0"] [tool.pytest.ini_options] @@ -66,7 +68,7 @@ env_files = [".env"] [dependency-groups] dev = [ - "py-key-value-sync[memory,disk,redis,elasticsearch,memcached,mongodb,vault,rocksdb]", + "py-key-value-sync[memory,disk,filetree,redis,elasticsearch,memcached,mongodb,vault,rocksdb,duckdb]", "py-key-value-sync[valkey]; platform_system != 'Windows'", "py-key-value-sync[pydantic]", "py-key-value-sync[keyring]", diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/__init__.py new file mode 100644 index 00000000..7cde61a6 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/__init__.py @@ -0,0 +1,6 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +from key_value.sync.code_gen.stores.duckdb.store import DuckDBStore + +__all__ = ["DuckDBStore"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/store.py new file mode 100644 index 00000000..15c7713f --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/duckdb/store.py @@ -0,0 +1,330 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'store.py' +# DO NOT CHANGE! Change the original file instead. +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, overload + +from key_value.shared.errors import DeserializationError +from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.serialization import SerializationAdapter +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import SEED_DATA_TYPE, BaseContextManagerStore, BaseStore + +try: + import duckdb +except ImportError as e: + msg = "DuckDBStore requires the duckdb extra from py-key-value-aio or py-key-value-sync" + raise ImportError(msg) from e + + +class DuckDBSerializationAdapter(SerializationAdapter): + """Adapter for DuckDB with native JSON storage.""" + + def __init__(self) -> None: + """Initialize the DuckDB adapter.""" + super().__init__() + + self._date_format = "datetime" + + @override + def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: + """Prepare data for dumping to DuckDB.""" + return data + + @override + def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: + """Prepare data loaded from DuckDB for conversion to ManagedEntry. + + Handles timezone conversion for DuckDB's naive timestamps. + """ + # DuckDB always returns naive timestamps, but ManagedEntry expects timezone-aware ones + self._convert_timestamps_to_utc(data) + + return data + + def _convert_timestamps_to_utc(self, data: dict[str, Any]) -> None: + """Convert naive timestamps to UTC timezone-aware timestamps.""" + created_at = data.get("created_at") + if created_at is not None and isinstance(created_at, datetime): + if created_at.tzinfo is None: + data["created_at"] = created_at.replace(tzinfo=timezone.utc) + else: + data["created_at"] = created_at.astimezone(tz=timezone.utc) + + expires_at = data.get("expires_at") + if expires_at is not None and isinstance(expires_at, datetime): + if expires_at.tzinfo is None: + data["expires_at"] = expires_at.replace(tzinfo=timezone.utc) + else: + data["expires_at"] = expires_at.astimezone(tz=timezone.utc) + + +class DuckDBStore(BaseContextManagerStore, BaseStore): + """A DuckDB-based key-value store supporting both in-memory and persistent storage. + + DuckDB is an in-process SQL OLAP database that provides excellent performance + for analytical workloads while supporting standard SQL operations. This store + can operate in memory-only mode or persist data to disk. + + The store uses native DuckDB types (JSON, TIMESTAMP) to enable efficient SQL queries + on stored data. Users can query the database directly for analytics or data exploration. + + Values are stored in a JSON column as native dicts, allowing direct SQL queries + on the stored data for analytics and reporting. + + Note on connection ownership: When you provide an existing connection, the store + will take ownership and close it when the store is closed or garbage collected. + If you need to reuse a connection, create separate DuckDB connections for each store. + """ + + _connection: duckdb.DuckDBPyConnection + _is_closed: bool + _owns_connection: bool + _adapter: SerializationAdapter + _table_name: str + + @overload + def __init__( + self, + *, + connection: duckdb.DuckDBPyConnection, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store with an existing connection. + + Warning: The store will take ownership of the connection and close it + when the store is closed or garbage collected. If you need to reuse + a connection, create separate DuckDB connections for each store. + + Args: + connection: An existing DuckDB connection to use. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + + @overload + def __init__( + self, + *, + database_path: Path | str | None = None, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store with a database path. + + Args: + database_path: Path to the database file. If None or ':memory:', uses in-memory database. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + + def __init__( + self, + *, + connection: duckdb.DuckDBPyConnection | None = None, + database_path: Path | str | None = None, + table_name: str = "kv_entries", + default_collection: str | None = None, + seed: SEED_DATA_TYPE | None = None, + ) -> None: + """Initialize the DuckDB store. + + Args: + connection: An existing DuckDB connection to use. + database_path: Path to the database file. If None or ':memory:', uses in-memory database. + table_name: Name of the table to store key-value entries. Defaults to "kv_entries". + default_collection: The default collection to use if no collection is provided. + seed: Optional seed data to pre-populate the store. + """ + if connection is not None and database_path is not None: + msg = "Provide only one of connection or database_path" + raise ValueError(msg) + + if connection is not None: + self._connection = connection + self._owns_connection = True # We take ownership even of provided connections + else: + # Convert Path to string if needed + if isinstance(database_path, Path): + database_path = str(database_path) + # Use in-memory database if no path specified + if database_path is None or database_path == ":memory:": + self._connection = duckdb.connect(":memory:") + else: + self._connection = duckdb.connect(database=database_path) + self._owns_connection = True + + self._is_closed = False + self._adapter = DuckDBSerializationAdapter() + + # Validate table name to prevent SQL injection + if not re.fullmatch("[A-Za-z_][A-Za-z0-9_]*", table_name): + msg = "Table name must start with a letter or underscore and contain only letters, digits, or underscores" + raise ValueError(msg) + self._table_name = table_name + self._stable_api = False + + super().__init__(default_collection=default_collection, seed=seed) + + def _get_create_table_sql(self) -> str: + """Generate SQL for creating the key-value entries table. + + Returns: + SQL CREATE TABLE statement. + """ + return f"\n CREATE TABLE IF NOT EXISTS {self._table_name} (\n collection VARCHAR NOT NULL,\n key VARCHAR NOT NULL,\n value JSON NOT NULL,\n created_at TIMESTAMPTZ,\n expires_at TIMESTAMPTZ,\n version INT NOT NULL,\n PRIMARY KEY (collection, key)\n )\n " + + def _get_create_collection_index_sql(self) -> str: + """Generate SQL for creating index on collection column. + + Returns: + SQL CREATE INDEX statement. + """ + return f"\n CREATE INDEX IF NOT EXISTS idx_{self._table_name}_collection\n ON {self._table_name}(collection)\n " + + def _get_create_expires_index_sql(self) -> str: + """Generate SQL for creating index on expires_at column. + + Returns: + SQL CREATE INDEX statement. + """ + return f"\n CREATE INDEX IF NOT EXISTS idx_{self._table_name}_expires_at\n ON {self._table_name}(expires_at)\n " + + def _get_select_sql(self) -> str: + """Generate SQL for selecting an entry by collection and key. + + Returns: + SQL SELECT statement with placeholders. + """ + return f"\n SELECT value, created_at, expires_at, version\n FROM {self._table_name}\n WHERE collection = ? AND key = ?\n " # noqa: S608 + + def _get_insert_sql(self) -> str: + """Generate SQL for inserting or replacing an entry. + + Returns: + SQL INSERT OR REPLACE statement with placeholders. + """ + return f"\n INSERT OR REPLACE INTO {self._table_name}\n (collection, key, value, created_at, expires_at, version)\n VALUES (?, ?, ?, ?, ?, ?)\n " # noqa: S608 + + def _get_delete_sql(self) -> str: + """Generate SQL for deleting an entry by collection and key. + + Returns: + SQL DELETE statement with RETURNING clause. + """ + return f"\n DELETE FROM {self._table_name}\n WHERE collection = ? AND key = ?\n RETURNING key\n " # noqa: S608 + + @override + def _setup(self) -> None: + """Initialize the database schema for key-value storage. + + The schema uses native DuckDB types for efficient querying: + - value: JSON column storing native dicts for queryability + - created_at: TIMESTAMP for native datetime operations + - expires_at: TIMESTAMP for native expiration queries + + This design enables: + - Direct SQL queries on the database for analytics + - Efficient expiration cleanup: DELETE FROM table WHERE expires_at < now() + - Metadata queries without JSON deserialization + - Native JSON column support for rich querying capabilities + """ + # Create the main table for storing key-value entries + self._connection.execute(self._get_create_table_sql()) + + # Create index for efficient collection queries + self._connection.execute(self._get_create_collection_index_sql()) + + # Create index for expiration-based queries + self._connection.execute(self._get_create_expires_index_sql()) + + @override + def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + """Retrieve a managed entry by key from the specified collection. + + Reconstructs the ManagedEntry from value column and metadata columns + using the serialization adapter. + """ + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + result = self._connection.execute(self._get_select_sql(), [collection, key]).fetchone() + + if result is None: + return None + + (value, created_at, expires_at, version) = result + + # Build document dict for the adapter + document: dict[str, Any] = {"value": value, "created_at": created_at, "expires_at": expires_at, "version": version} + + document = {k: v for (k, v) in document.items() if v is not None} + + try: + managed_entry = self._adapter.load_dict(data=document) + except DeserializationError: + return None + + return managed_entry + + @override + def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: + """Store a managed entry by key in the specified collection. + + Uses the serialization adapter to convert the ManagedEntry to the + appropriate storage format. + """ + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + # Ensure that the value is serializable to JSON + _ = managed_entry.value_as_json + + # Use adapter to dump the managed entry to a dict with key and collection + document = self._adapter.dump_dict(entry=managed_entry, key=key, collection=collection) + + # Insert or replace the entry with metadata in separate columns + self._connection.execute( + self._get_insert_sql(), + [collection, key, document["value"], document.get("created_at"), document.get("expires_at"), document.get("version")], + ) + + @override + def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + """Delete a managed entry by key from the specified collection.""" + if self._is_closed: + msg = "Cannot operate on closed DuckDBStore" + raise RuntimeError(msg) + + result = self._connection.execute(self._get_delete_sql(), [collection, key]) + + # Check if any rows were deleted by counting returned rows + deleted_rows = result.fetchall() + return len(deleted_rows) > 0 + + @override + def _close(self) -> None: + """Close the DuckDB connection.""" + if not self._is_closed and self._owns_connection: + self._connection.close() + self._is_closed = True + + def __del__(self) -> None: + """Clean up the DuckDB connection on deletion.""" + try: + if not self._is_closed and self._owns_connection and hasattr(self, "_connection"): + self._connection.close() + self._is_closed = True + except Exception: # noqa: S110 + # Suppress errors during cleanup to avoid issues during interpreter shutdown + pass diff --git a/key-value/key-value-sync/src/key_value/sync/stores/duckdb/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/duckdb/__init__.py new file mode 100644 index 00000000..7cde61a6 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/stores/duckdb/__init__.py @@ -0,0 +1,6 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +from key_value.sync.code_gen.stores.duckdb.store import DuckDBStore + +__all__ = ["DuckDBStore"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/duckdb/__init__.py b/key-value/key-value-sync/tests/code_gen/stores/duckdb/__init__.py new file mode 100644 index 00000000..0b0927be --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/duckdb/__init__.py @@ -0,0 +1,4 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +# DuckDB store tests diff --git a/key-value/key-value-sync/tests/code_gen/stores/duckdb/test_duckdb.py b/key-value/key-value-sync/tests/code_gen/stores/duckdb/test_duckdb.py new file mode 100644 index 00000000..ed88d3ec --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/duckdb/test_duckdb.py @@ -0,0 +1,220 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'test_duckdb.py' +# DO NOT CHANGE! Change the original file instead. +from collections.abc import Generator +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +from duckdb import CatalogException, DuckDBPyConnection +from inline_snapshot import snapshot +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import BaseStore +from key_value.sync.code_gen.stores.duckdb import DuckDBStore +from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + + +def get_client_from_store(store: DuckDBStore) -> DuckDBPyConnection: + return store._connection # pyright: ignore[reportPrivateUsage] + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStore(ContextManagerStoreTestMixin, BaseStoreTests): + @override + @pytest.fixture + def store(self) -> Generator[DuckDBStore, None, None]: + """Test with in-memory DuckDB database.""" + duckdb_store = DuckDBStore() + yield duckdb_store + duckdb_store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + def test_not_unbounded(self, store: BaseStore): ... + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStorePersistent(ContextManagerStoreTestMixin, BaseStoreTests): + @override + @pytest.fixture + def store(self) -> Generator[DuckDBStore, None, None]: + """Test with persistent DuckDB database file.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test.db" + duckdb_store = DuckDBStore(database_path=db_path) + yield duckdb_store + duckdb_store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + def test_not_unbounded(self, store: BaseStore): ... + + +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestDuckDBStoreSpecific: + """Test DuckDB-specific functionality.""" + + @pytest.fixture + def store(self) -> Generator[DuckDBStore, None, None]: + """Provide DuckDB store instance.""" + duckdb_store = DuckDBStore() + yield duckdb_store + duckdb_store.close() + + def test_native_sql_queryability(self): + """Test that users can query the database directly with SQL.""" + store = DuckDBStore() + + # Store some test data with known metadata + store.put(collection="products", key="item1", value={"name": "Widget", "price": 10.99}, ttl=3600) + store.put(collection="products", key="item2", value={"name": "Gadget", "price": 25.5}, ttl=7200) + store.put(collection="orders", key="order1", value={"total": 100.0, "items": 3}) + + # Query directly via SQL to verify native storage + # Check that value is stored as JSON (can extract fields) + result = ( + get_client_from_store(store) + .execute(""" + SELECT key, value->'name' as name, value->'price' as price + FROM kv_entries + WHERE collection = 'products' + ORDER BY key + """) + .fetchall() + ) # pyright: ignore[reportPrivateUsage] + + assert len(result) == 2 + assert result[0][0] == "item1" + assert result[0][1] == '"Widget"' # JSON strings are quoted + assert result[1][0] == "item2" + + # Query by expiration timestamp + count_result = ( + get_client_from_store(store) + .execute(""" + SELECT COUNT(*) + FROM kv_entries + WHERE expires_at > now() OR expires_at IS NULL + """) + .fetchone() + ) # pyright: ignore[reportPrivateUsage] + + assert count_result is not None + assert count_result[0] == 3 # All 3 entries should not be expired + + store.close() + + def test_database_path_initialization(self): + """Test that store can be initialized with different database path options.""" + # In-memory (default) + store1 = DuckDBStore() + store1.put(collection="test", key="key1", value={"test": "value1"}) + result1 = store1.get(collection="test", key="key1") + assert result1 == {"test": "value1"} + store1.close() + + # Explicit in-memory + store2 = DuckDBStore(database_path=":memory:") + store2.put(collection="test", key="key2", value={"test": "value2"}) + result2 = store2.get(collection="test", key="key2") + assert result2 == {"test": "value2"} + store2.close() + + def test_persistent_database(self): + """Test that data persists across store instances when using file database.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "persist_test.db" + + # Store data in first instance + store1 = DuckDBStore(database_path=db_path) + store1.put(collection="test", key="persist_key", value={"data": "persistent"}) + store1.close() + + # Create second instance with same database file + store2 = DuckDBStore(database_path=db_path) + result = store2.get(collection="test", key="persist_key") + store2.close() + + assert result == {"data": "persistent"} + + def test_sql_injection_protection(self, store: DuckDBStore): + """Test that the store is protected against SQL injection attacks.""" + malicious_collection = "test'; DROP TABLE kv_entries; --" + malicious_key = "key'; DELETE FROM kv_entries; --" + + # These operations should not cause SQL injection + store.put(collection=malicious_collection, key=malicious_key, value={"safe": "data"}) + result = store.get(collection=malicious_collection, key=malicious_key) + assert result == {"safe": "data"} + + # Verify the table still exists and other data is safe + store.put(collection="normal", key="normal_key", value={"normal": "data"}) + normal_result = store.get(collection="normal", key="normal_key") + assert normal_result == {"normal": "data"} + + def test_large_data_storage(self, store: DuckDBStore): + """Test storing and retrieving large data values.""" + # Create a large value (1MB of data) + large_value = {"large_data": "x" * (1024 * 1024)} + + store.put(collection="test", key="large_key", value=large_value) + result = store.get(collection="test", key="large_key") + + assert result == large_value + + def test_unicode_support(self, store: DuckDBStore): + """Test that the store properly handles Unicode characters.""" + unicode_data = { + "english": "Hello World", + "chinese": "你好世界", + "japanese": "こんにちは世界", + "arabic": "مرحبا بالعالم", + "emoji": "🌍🚀💻", + "special": "Special chars: !@#$%^&*()_+-={}[]|\\:;\"'<>?,./", + } + + store.put(collection="unicode_test", key="unicode_key", value=unicode_data) + result = store.get(collection="unicode_test", key="unicode_key") + + assert result == unicode_data + + def test_connection_initialization(self): + """Test that store can be initialized with existing DuckDB connection.""" + import duckdb + + conn = duckdb.connect(":memory:") + store = DuckDBStore(connection=conn) + + store.put(collection="test", key="conn_test", value={"test": "value"}) + result = store.get(collection="test", key="conn_test") + assert result == {"test": "value"} + + store.close() + + def test_custom_table_name(self): + """Test that store can use custom table name.""" + custom_table = "my_custom_kv_table" + store = DuckDBStore(table_name=custom_table) + + # Store some data + store.put(collection="test", key="key1", value={"data": "value"}) + + # Verify the custom table exists and contains the data + tables = ( + get_client_from_store(store) + .table(custom_table) + .filter(filter_expr="key = 'key1'") + .select("key", "collection") + .execute() + .fetchone() + ) + + assert tables == snapshot(("key1", "test")) + + # Verify default table doesn't exist + with pytest.raises(CatalogException): + get_client_from_store(store).table("kv_entries") + + store.close() + + @pytest.mark.skip(reason="Local disk stores are unbounded") + def test_not_unbounded(self, store: BaseStore): ... diff --git a/pyproject.toml b/pyproject.toml index ed23a84a..8ea8b0a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ line-length = 140 "**/code_gen/**/*.py" = [ "ARG001", # Unused argument, Pyright captures this for us "ARG002", # Unused argument, Pyright captures this for us + "E501", # Ignore long lines ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index ecc91b5e..5eff3617 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12' and sys_platform != 'win32'", @@ -741,6 +741,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, ] +[[package]] +name = "duckdb" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/e7/21cf50a3d52ffceee1f0bcc3997fa96a5062e6bab705baee4f6c4e33cce5/duckdb-1.4.1.tar.gz", hash = "sha256:f903882f045d057ebccad12ac69975952832edfe133697694854bb784b8d6c76", size = 18461687, upload-time = "2025-10-07T10:37:28.605Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/cc/00a07de0e33d16763edd4132d7c8a2f9efd57a2f296a25a948f239a1fadf/duckdb-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:296b4fff3908fb4c47b0aa1d77bd1933375e75401009d2dc81af8e7a0b8a05b4", size = 29062814, upload-time = "2025-10-07T10:36:14.261Z" }, + { url = "https://files.pythonhosted.org/packages/17/ea/fb0fda8886d1928f1b2a53a1163ef94f6f4b41f6d8b29eee457acfc2fa67/duckdb-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b4182800092115feee5d71a8691efb283d3c9f5eb0b36362b308ef007a12222", size = 16161652, upload-time = "2025-10-07T10:36:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/b4/5f/052e6436a71f461e61cd3a982954c029145a84b58cefa1dfb3eb2d96e4fc/duckdb-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:67cc3b6c7f7ba07a69e9331b8ccea7a60cbcd4204bb473e5da9b71588bd2eca9", size = 13753030, upload-time = "2025-10-07T10:36:19.782Z" }, + { url = "https://files.pythonhosted.org/packages/c2/fd/3ae3c89d0f6ad54c0be4430e572306fbfc9f173c97b23c5025a540449325/duckdb-1.4.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cef0cee7030b561640cb9af718f8841b19cdd2aa020d53561057b5743bea90b", size = 18487683, upload-time = "2025-10-07T10:36:22.375Z" }, + { url = "https://files.pythonhosted.org/packages/d4/3c/eef454cd7c3880c2d55b50e18a9c7a213bf91ded79efcfb573d8d6dd8a47/duckdb-1.4.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2bf93347f37a46bacce6ac859d651dbf5731e2c94a64ab358300425b09e3de23", size = 20487080, upload-time = "2025-10-07T10:36:24.692Z" }, + { url = "https://files.pythonhosted.org/packages/bb/5b/b619f4c986a1cb0b06315239da9ce5fd94a20c07a344d03e2635d56a6967/duckdb-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:2e60d2361f978908a3d96eebaf1f4b346f283afcc467351aae50ea45ca293a2b", size = 12324436, upload-time = "2025-10-07T10:36:27.458Z" }, + { url = "https://files.pythonhosted.org/packages/d9/52/606f13fa9669a24166d2fe523e28982d8ef9039874b4de774255c7806d1f/duckdb-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:605d563c1d5203ca992497cd33fb386ac3d533deca970f9dcf539f62a34e22a9", size = 29065894, upload-time = "2025-10-07T10:36:29.837Z" }, + { url = "https://files.pythonhosted.org/packages/84/57/138241952ece868b9577e607858466315bed1739e1fbb47205df4dfdfd88/duckdb-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d3305c7c4b70336171de7adfdb50431f23671c000f11839b580c4201d9ce6ef5", size = 16163720, upload-time = "2025-10-07T10:36:32.241Z" }, + { url = "https://files.pythonhosted.org/packages/a3/81/afa3a0a78498a6f4acfea75c48a70c5082032d9ac87822713d7c2d164af1/duckdb-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a063d6febbe34b32f1ad2e68822db4d0e4b1102036f49aaeeb22b844427a75df", size = 13756223, upload-time = "2025-10-07T10:36:34.673Z" }, + { url = "https://files.pythonhosted.org/packages/47/dd/5f6064fbd9248e37a3e806a244f81e0390ab8f989d231b584fb954f257fc/duckdb-1.4.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1ffcaaf74f7d1df3684b54685cbf8d3ce732781c541def8e1ced304859733ae", size = 18487022, upload-time = "2025-10-07T10:36:36.759Z" }, + { url = "https://files.pythonhosted.org/packages/a1/10/b54969a1c42fd9344ad39228d671faceb8aa9f144b67cd9531a63551757f/duckdb-1.4.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685d3d1599dc08160e0fa0cf09e93ac4ff8b8ed399cb69f8b5391cd46b5b207c", size = 20491004, upload-time = "2025-10-07T10:36:39.318Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d5/7332ae8f804869a4e895937821b776199a283f8d9fc775fd3ae5a0558099/duckdb-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:78f1d28a15ae73bd449c43f80233732adffa49be1840a32de8f1a6bb5b286764", size = 12327619, upload-time = "2025-10-07T10:36:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/0e/6c/906a3fe41cd247b5638866fc1245226b528de196588802d4df4df1e6e819/duckdb-1.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cd1765a7d180b7482874586859fc23bc9969d7d6c96ced83b245e6c6f49cde7f", size = 29076820, upload-time = "2025-10-07T10:36:43.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c7/01dd33083f01f618c2a29f6dd068baf16945b8cbdb132929d3766610bbbb/duckdb-1.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8ed7a86725185470953410823762956606693c0813bb64e09c7d44dbd9253a64", size = 16167558, upload-time = "2025-10-07T10:36:46.003Z" }, + { url = "https://files.pythonhosted.org/packages/81/e2/f983b4b7ae1dfbdd2792dd31dee9a0d35f88554452cbfc6c9d65e22fdfa9/duckdb-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a189bdfc64cfb9cc1adfbe4f2dcfde0a4992ec08505ad8ce33c886e4813f0bf", size = 13762226, upload-time = "2025-10-07T10:36:48.55Z" }, + { url = "https://files.pythonhosted.org/packages/ed/34/fb69a7be19b90f573b3cc890961be7b11870b77514769655657514f10a98/duckdb-1.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9090089b6486f7319c92acdeed8acda022d4374032d78a465956f50fc52fabf", size = 18500901, upload-time = "2025-10-07T10:36:52.445Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a5/1395d7b49d5589e85da9a9d7ffd8b50364c9d159c2807bef72d547f0ad1e/duckdb-1.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:142552ea3e768048e0e8c832077a545ca07792631c59edaee925e3e67401c2a0", size = 20514177, upload-time = "2025-10-07T10:36:55.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/21/08f10706d30252753349ec545833fc0cea67c11abd0b5223acf2827f1056/duckdb-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:567f3b3a785a9e8650612461893c49ca799661d2345a6024dda48324ece89ded", size = 12336422, upload-time = "2025-10-07T10:36:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/d7/08/705988c33e38665c969f7876b3ca4328be578554aa7e3dc0f34158da3e64/duckdb-1.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:46496a2518752ae0c6c5d75d4cdecf56ea23dd098746391176dd8e42cf157791", size = 29077070, upload-time = "2025-10-07T10:36:59.83Z" }, + { url = "https://files.pythonhosted.org/packages/99/c5/7c9165f1e6b9069441bcda4da1e19382d4a2357783d37ff9ae238c5c41ac/duckdb-1.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1c65ae7e9b541cea07d8075343bcfebdecc29a3c0481aa6078ee63d51951cfcd", size = 16167506, upload-time = "2025-10-07T10:37:02.24Z" }, + { url = "https://files.pythonhosted.org/packages/38/46/267f4a570a0ee3ae6871ddc03435f9942884284e22a7ba9b7cb252ee69b6/duckdb-1.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:598d1a314e34b65d9399ddd066ccce1eeab6a60a2ef5885a84ce5ed62dbaf729", size = 13762330, upload-time = "2025-10-07T10:37:04.581Z" }, + { url = "https://files.pythonhosted.org/packages/15/7b/c4f272a40c36d82df20937d93a1780eb39ab0107fe42b62cba889151eab9/duckdb-1.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2f16b8def782d484a9f035fc422bb6f06941ed0054b4511ddcdc514a7fb6a75", size = 18504687, upload-time = "2025-10-07T10:37:06.991Z" }, + { url = "https://files.pythonhosted.org/packages/17/fc/9b958751f0116d7b0406406b07fa6f5a10c22d699be27826d0b896f9bf51/duckdb-1.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a7d0aed068a5c33622a8848857947cab5cfb3f2a315b1251849bac2c74c492", size = 20513823, upload-time = "2025-10-07T10:37:09.349Z" }, + { url = "https://files.pythonhosted.org/packages/30/79/4f544d73fcc0513b71296cb3ebb28a227d22e80dec27204977039b9fa875/duckdb-1.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:280fd663dacdd12bb3c3bf41f3e5b2e5b95e00b88120afabb8b8befa5f335c6f", size = 12336460, upload-time = "2025-10-07T10:37:12.154Z" }, +] + [[package]] name = "elastic-transport" version = "9.2.0" @@ -1742,6 +1774,10 @@ disk = [ { name = "diskcache" }, { name = "pathvalidate" }, ] +duckdb = [ + { name = "duckdb" }, + { name = "pytz" }, +] dynamodb = [ { name = "aioboto3" }, { name = "types-aiobotocore-dynamodb" }, @@ -1793,7 +1829,7 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "filetree", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-aio", extra = ["disk", "duckdb", "dynamodb", "elasticsearch", "filetree", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, { name = "py-key-value-aio", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] @@ -1809,6 +1845,7 @@ requires-dist = [ { name = "cryptography", marker = "extra == 'wrappers-encryption'", specifier = ">=45.0.0" }, { name = "dbus-python", marker = "extra == 'keyring-linux'", specifier = ">=1.4.0" }, { name = "diskcache", marker = "extra == 'disk'", specifier = ">=5.0.0" }, + { name = "duckdb", marker = "extra == 'duckdb'", specifier = ">=1.1.1" }, { name = "elasticsearch", marker = "extra == 'elasticsearch'", specifier = ">=8.0.0" }, { name = "hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "keyring", marker = "extra == 'keyring'", specifier = ">=25.6.0" }, @@ -1817,6 +1854,7 @@ requires-dist = [ { name = "py-key-value-shared", editable = "key-value/key-value-shared" }, { name = "pydantic", marker = "extra == 'pydantic'", specifier = ">=2.11.9" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.0.0" }, + { name = "pytz", marker = "extra == 'duckdb'", specifier = ">=2025.2" }, { name = "redis", marker = "extra == 'redis'", specifier = ">=4.3.0" }, { name = "rocksdict", marker = "python_full_version >= '3.12' and extra == 'rocksdb'", specifier = ">=0.3.24" }, { name = "rocksdict", marker = "python_full_version < '3.12' and extra == 'rocksdb'", specifier = ">=0.3.2" }, @@ -1824,13 +1862,13 @@ requires-dist = [ { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "filetree", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "filetree", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "duckdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-aio", extras = ["keyring"] }, - { name = "py-key-value-aio", extras = ["memory", "disk", "filetree", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb"] }, + { name = "py-key-value-aio", extras = ["memory", "disk", "filetree", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb", "duckdb"] }, { name = "py-key-value-aio", extras = ["pydantic"] }, { name = "py-key-value-aio", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-aio", extras = ["wrappers-encryption"] }, @@ -1910,10 +1948,17 @@ disk = [ { name = "diskcache" }, { name = "pathvalidate" }, ] +duckdb = [ + { name = "duckdb" }, + { name = "pytz" }, +] elasticsearch = [ { name = "aiohttp" }, { name = "elasticsearch" }, ] +filetree = [ + { name = "anyio" }, +] keyring = [ { name = "keyring" }, ] @@ -1953,7 +1998,7 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-sync", extra = ["disk", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-sync", extra = ["disk", "duckdb", "elasticsearch", "filetree", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, { name = "py-key-value-sync", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] @@ -1961,11 +2006,13 @@ dev = [ requires-dist = [ { name = "aiohttp", marker = "extra == 'elasticsearch'", specifier = ">=3.12" }, { name = "aiomcache", marker = "extra == 'memcached'", specifier = ">=0.8.0" }, + { name = "anyio", marker = "extra == 'filetree'", specifier = ">=4.4.0" }, { name = "beartype", specifier = ">=0.20.0" }, { name = "cachetools", marker = "extra == 'memory'", specifier = ">=5.0.0" }, { name = "cryptography", marker = "extra == 'wrappers-encryption'", specifier = ">=45.0.0" }, { name = "dbus-python", marker = "extra == 'keyring-linux'", specifier = ">=1.4.0" }, { name = "diskcache", marker = "extra == 'disk'", specifier = ">=5.0.0" }, + { name = "duckdb", marker = "extra == 'duckdb'", specifier = ">=1.1.1" }, { name = "elasticsearch", marker = "extra == 'elasticsearch'", specifier = ">=8.0.0" }, { name = "hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "keyring", marker = "extra == 'keyring'", specifier = ">=25.6.0" }, @@ -1974,19 +2021,20 @@ requires-dist = [ { name = "py-key-value-shared", editable = "key-value/key-value-shared" }, { name = "pydantic", marker = "extra == 'pydantic'", specifier = ">=2.11.9" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.0.0" }, + { name = "pytz", marker = "extra == 'duckdb'", specifier = ">=2025.2" }, { name = "redis", marker = "extra == 'redis'", specifier = ">=4.3.0" }, { name = "rocksdict", marker = "python_full_version >= '3.12' and extra == 'rocksdb'", specifier = ">=0.3.24" }, { name = "rocksdict", marker = "python_full_version < '3.12' and extra == 'rocksdb'", specifier = ">=0.3.2" }, { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide-sync", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "pydantic", "keyring", "keyring-linux", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "filetree", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "pydantic", "keyring", "keyring-linux", "rocksdb", "duckdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-sync", extras = ["keyring"] }, - { name = "py-key-value-sync", extras = ["memory", "disk", "redis", "elasticsearch", "memcached", "mongodb", "vault", "rocksdb"] }, + { name = "py-key-value-sync", extras = ["memory", "disk", "filetree", "redis", "elasticsearch", "memcached", "mongodb", "vault", "rocksdb", "duckdb"] }, { name = "py-key-value-sync", extras = ["pydantic"] }, { name = "py-key-value-sync", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-sync", extras = ["wrappers-encryption"] }, @@ -2326,6 +2374,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, +] + [[package]] name = "pywin32" version = "311"