diff --git a/.evergreen/resync-specs.sh b/.evergreen/resync-specs.sh index a98b091d59..1177ebb04a 100755 --- a/.evergreen/resync-specs.sh +++ b/.evergreen/resync-specs.sh @@ -93,6 +93,7 @@ do cpjson client-side-encryption/corpus/ client-side-encryption/corpus cpjson client-side-encryption/external/ client-side-encryption/external cpjson client-side-encryption/limits/ client-side-encryption/limits + cpjson client-side-encryption/etc/data client-side-encryption/etc/data ;; cmap|CMAP|connection-monitoring-and-pooling) cpjson connection-monitoring-and-pooling/tests cmap diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index 5f5bda7dc1..4367bad246 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -139,7 +139,7 @@ if [ -n "$TEST_ENCRYPTION" ]; then export PYMONGOCRYPT_LIB # TODO: Test with 'pip install pymongocrypt' - git clone --branch master https://github.com/mongodb/libmongocrypt.git libmongocrypt_git + git clone https://github.com/mongodb/libmongocrypt.git libmongocrypt_git python -m pip install --prefer-binary -r .evergreen/test-encryption-requirements.txt python -m pip install ./libmongocrypt_git/bindings/python python -c "import pymongocrypt; print('pymongocrypt version: '+pymongocrypt.__version__)" diff --git a/pymongo/encryption.py b/pymongo/encryption.py index 40f7d20f23..71642aaa2a 100644 --- a/pymongo/encryption.py +++ b/pymongo/encryption.py @@ -15,6 +15,7 @@ """Support for explicit client-side field level encryption.""" import contextlib +import enum import uuid import weakref from typing import Any, Mapping, Optional, Sequence @@ -303,6 +304,7 @@ def _get_internal_client(encrypter, mongo_client): crypt_shared_lib_path=opts._crypt_shared_lib_path, crypt_shared_lib_required=opts._crypt_shared_lib_required, bypass_encryption=opts._bypass_auto_encryption, + bypass_query_analysis=opts._bypass_query_analysis, ), ) self._closed = False @@ -352,11 +354,33 @@ def close(self): self._internal_client = None -class Algorithm(object): +class Algorithm(str, enum.Enum): """An enum that defines the supported encryption algorithms.""" AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic = "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic" + """AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic.""" AEAD_AES_256_CBC_HMAC_SHA_512_Random = "AEAD_AES_256_CBC_HMAC_SHA_512-Random" + """AEAD_AES_256_CBC_HMAC_SHA_512_Random.""" + INDEXED = "Indexed" + """Indexed. + + .. versionadded:: 4.2 + """ + UNINDEXED = "Unindexed" + """Unindexed. + + .. versionadded:: 4.2 + """ + + +class QueryType(enum.IntEnum): + """An enum that defines the supported values for explicit encryption query_type. + + .. versionadded:: 4.2 + """ + + EQUALITY = 1 + """Used to encrypt a value for an equality query.""" class ClientEncryption(object): @@ -550,6 +574,9 @@ def encrypt( algorithm: str, key_id: Optional[Binary] = None, key_alt_name: Optional[str] = None, + index_key_id: Optional[Binary] = None, + query_type: Optional[int] = None, + contention_factor: Optional[int] = None, ) -> Binary: """Encrypt a BSON value with a given key and algorithm. @@ -564,20 +591,38 @@ def encrypt( :class:`~bson.binary.Binary` with subtype 4 ( :attr:`~bson.binary.UUID_SUBTYPE`). - `key_alt_name`: Identifies a key vault document by 'keyAltName'. + - `index_key_id` (bytes): the index key id to use for Queryable Encryption. + - `query_type` (int): The query type to execute. See + :class:`QueryType` for valid options. + - `contention_factor` (int): The contention factor to use + when the algorithm is "Indexed". :Returns: The encrypted value, a :class:`~bson.binary.Binary` with subtype 6. + + .. versionchanged:: 4.2 + Added the `index_key_id`, `query_type`, and `contention_factor` parameters. """ self._check_closed() if key_id is not None and not ( isinstance(key_id, Binary) and key_id.subtype == UUID_SUBTYPE ): raise TypeError("key_id must be a bson.binary.Binary with subtype 4") + if index_key_id is not None and not ( + isinstance(index_key_id, Binary) and index_key_id.subtype == UUID_SUBTYPE + ): + raise TypeError("index_key_id must be a bson.binary.Binary with subtype 4") doc = encode({"v": value}, codec_options=self._codec_options) with _wrap_encryption_errors(): encrypted_doc = self._encryption.encrypt( - doc, algorithm, key_id=key_id, key_alt_name=key_alt_name + doc, + algorithm, + key_id=key_id, + key_alt_name=key_alt_name, + index_key_id=index_key_id, + query_type=query_type, + contention_factor=contention_factor, ) return decode(encrypted_doc)["v"] # type: ignore[index] diff --git a/pymongo/encryption_options.py b/pymongo/encryption_options.py index cdb77c9707..5acc55042a 100644 --- a/pymongo/encryption_options.py +++ b/pymongo/encryption_options.py @@ -47,6 +47,7 @@ def __init__( kms_tls_options: Optional[Mapping[str, Any]] = None, crypt_shared_lib_path: Optional[str] = None, crypt_shared_lib_required: bool = False, + bypass_query_analysis: bool = False, ) -> None: """Options to configure automatic client-side field level encryption. @@ -145,9 +146,14 @@ def __init__( - `crypt_shared_lib_path` (optional): Override the path to load the crypt_shared library. - `crypt_shared_lib_required` (optional): If True, raise an error if libmongocrypt is unable to load the crypt_shared library. + - `bypass_query_analysis` (optional): If ``True``, disable automatic analysis of + outgoing commands. Set `bypass_query_analysis` to use explicit + encryption on indexed fields without the MongoDB Enterprise Advanced + licensed crypt_shared library. .. versionchanged:: 4.2 - Added `crypt_shared_lib_path` and `crypt_shared_lib_required` parameters + Added `crypt_shared_lib_path`, `crypt_shared_lib_required`, and `bypass_query_analysis` + parameters. .. versionchanged:: 4.0 Added the `kms_tls_options` parameter and the "kmip" KMS provider. @@ -179,3 +185,4 @@ def __init__( self._mongocryptd_spawn_args.append("--idleShutdownTimeoutSecs=60") # Maps KMS provider name to a SSLContext. self._kms_ssl_contexts = _parse_kms_tls_options(kms_tls_options) + self._bypass_query_analysis = bypass_query_analysis diff --git a/test/client-side-encryption/etc/data/encryptedFields.json b/test/client-side-encryption/etc/data/encryptedFields.json new file mode 100644 index 0000000000..2364590e4c --- /dev/null +++ b/test/client-side-encryption/etc/data/encryptedFields.json @@ -0,0 +1,33 @@ +{ + "escCollection": "enxcol_.default.esc", + "eccCollection": "enxcol_.default.ecc", + "ecocCollection": "enxcol_.default.ecoc", + "fields": [ + { + "keyId": { + "$binary": { + "base64": "EjRWeBI0mHYSNBI0VniQEg==", + "subType": "04" + } + }, + "path": "encryptedIndexed", + "bsonType": "string", + "queries": { + "queryType": "equality", + "contention": { + "$numberLong": "0" + } + } + }, + { + "keyId": { + "$binary": { + "base64": "q83vqxI0mHYSNBI0VniQEg==", + "subType": "04" + } + }, + "path": "encryptedUnindexed", + "bsonType": "string" + } + ] +} diff --git a/test/client-side-encryption/etc/data/keys/key1-document.json b/test/client-side-encryption/etc/data/keys/key1-document.json new file mode 100644 index 0000000000..566b56c354 --- /dev/null +++ b/test/client-side-encryption/etc/data/keys/key1-document.json @@ -0,0 +1,30 @@ +{ + "_id": { + "$binary": { + "base64": "EjRWeBI0mHYSNBI0VniQEg==", + "subType": "04" + } + }, + "keyMaterial": { + "$binary": { + "base64": "sHe0kz57YW7v8g9VP9sf/+K1ex4JqKc5rf/URX3n3p8XdZ6+15uXPaSayC6adWbNxkFskuMCOifDoTT+rkqMtFkDclOy884RuGGtUysq3X7zkAWYTKi8QAfKkajvVbZl2y23UqgVasdQu3OVBQCrH/xY00nNAs/52e958nVjBuzQkSb1T8pKJAyjZsHJ60+FtnfafDZSTAIBJYn7UWBCwQ==", + "subType": "00" + } + }, + "creationDate": { + "$date": { + "$numberLong": "1648914851981" + } + }, + "updateDate": { + "$date": { + "$numberLong": "1648914851981" + } + }, + "status": { + "$numberInt": "0" + }, + "masterKey": { + "provider": "local" + } +} diff --git a/test/client-side-encryption/etc/data/keys/key1-id.json b/test/client-side-encryption/etc/data/keys/key1-id.json new file mode 100644 index 0000000000..7d18f52ebb --- /dev/null +++ b/test/client-side-encryption/etc/data/keys/key1-id.json @@ -0,0 +1,6 @@ +{ + "$binary": { + "base64": "EjRWeBI0mHYSNBI0VniQEg==", + "subType": "04" + } +} diff --git a/test/client-side-encryption/etc/data/keys/key2-document.json b/test/client-side-encryption/etc/data/keys/key2-document.json new file mode 100644 index 0000000000..a654d980ba --- /dev/null +++ b/test/client-side-encryption/etc/data/keys/key2-document.json @@ -0,0 +1,30 @@ +{ + "_id": { + "$binary": { + "base64": "q83vqxI0mHYSNBI0VniQEg==", + "subType": "04" + } + }, + "keyMaterial": { + "$binary": { + "base64": "HBk9BWihXExNDvTp1lUxOuxuZK2Pe2ZdVdlsxPEBkiO1bS4mG5NNDsQ7zVxJAH8BtdOYp72Ku4Y3nwc0BUpIKsvAKX4eYXtlhv5zUQxWdeNFhg9qK7qb8nqhnnLeT0f25jFSqzWJoT379hfwDeu0bebJHr35QrJ8myZdPMTEDYF08QYQ48ShRBli0S+QzBHHAQiM2iJNr4svg2WR8JSeWQ==", + "subType": "00" + } + }, + "creationDate": { + "$date": { + "$numberLong": "1648914851981" + } + }, + "updateDate": { + "$date": { + "$numberLong": "1648914851981" + } + }, + "status": { + "$numberInt": "0" + }, + "masterKey": { + "provider": "local" + } +} diff --git a/test/client-side-encryption/etc/data/keys/key2-id.json b/test/client-side-encryption/etc/data/keys/key2-id.json new file mode 100644 index 0000000000..6e9b87bbc2 --- /dev/null +++ b/test/client-side-encryption/etc/data/keys/key2-id.json @@ -0,0 +1,6 @@ +{ + "$binary": { + "base64": "q83vqxI0mHYSNBI0VniQEg==", + "subType": "04" + } +} diff --git a/test/test_encryption.py b/test/test_encryption.py index 500c95af04..288c137c7e 100644 --- a/test/test_encryption.py +++ b/test/test_encryption.py @@ -51,14 +51,14 @@ from test.utils_spec_runner import SpecRunner from bson import encode, json_util -from bson.binary import JAVA_LEGACY, STANDARD, UUID_SUBTYPE, Binary, UuidRepresentation +from bson.binary import UUID_SUBTYPE, Binary, UuidRepresentation from bson.codec_options import CodecOptions from bson.errors import BSONError from bson.json_util import JSONOptions from bson.son import SON from pymongo import encryption from pymongo.cursor import CursorType -from pymongo.encryption import Algorithm, ClientEncryption +from pymongo.encryption import Algorithm, ClientEncryption, QueryType from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts from pymongo.errors import ( BulkWriteError, @@ -212,11 +212,11 @@ def assertBinaryUUID(self, val): BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "client-side-encryption") SPEC_PATH = os.path.join(BASE, "spec") -OPTS = CodecOptions(uuid_representation=STANDARD) +OPTS = CodecOptions() # Use SON to preserve the order of fields while parsing json. Use tz_aware # =False to match how CodecOptions decodes dates. -JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=STANDARD, tz_aware=False) +JSON_OPTS = JSONOptions(document_class=SON, tz_aware=False) def read(*paths): @@ -324,7 +324,7 @@ def test_use_after_close(self): class TestEncryptedBulkWrite(BulkTestBase, EncryptionIntegrationTest): - def test_upsert_uuid_standard_encrypte(self): + def test_upsert_uuid_standard_encrypt(self): opts = AutoEncryptionOpts(KMS_PROVIDERS, "keyvault.datakeys") client = rs_or_single_client(auto_encryption_opts=opts) self.addCleanup(client.close) @@ -449,11 +449,19 @@ def test_validation(self): msg = "key_id must be a bson.binary.Binary with subtype 4" algo = Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic + uid = uuid.uuid4() with self.assertRaisesRegex(TypeError, msg): - client_encryption.encrypt("str", algo, key_id=uuid.uuid4()) # type: ignore[arg-type] + client_encryption.encrypt("str", algo, key_id=uid) # type: ignore[arg-type] with self.assertRaisesRegex(TypeError, msg): client_encryption.encrypt("str", algo, key_id=Binary(b"123")) + msg = "index_key_id must be a bson.binary.Binary with subtype 4" + algo = Algorithm.INDEXED + with self.assertRaisesRegex(TypeError, msg): + client_encryption.encrypt("str", algo, index_key_id=uid) # type: ignore[arg-type] + with self.assertRaisesRegex(TypeError, msg): + client_encryption.encrypt("str", algo, index_key_id=Binary(b"123")) + def test_bson_errors(self): client_encryption = ClientEncryption( KMS_PROVIDERS, "keyvault.datakeys", client_context.client, OPTS @@ -466,7 +474,7 @@ def test_bson_errors(self): client_encryption.encrypt( unencodable_value, Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - key_id=Binary(uuid.uuid4().bytes, UUID_SUBTYPE), + key_id=Binary.from_uuid(uuid.uuid4()), ) def test_codec_options(self): @@ -475,7 +483,7 @@ def test_codec_options(self): KMS_PROVIDERS, "keyvault.datakeys", client_context.client, None # type: ignore[arg-type] ) - opts = CodecOptions(uuid_representation=JAVA_LEGACY) + opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY) client_encryption_legacy = ClientEncryption( KMS_PROVIDERS, "keyvault.datakeys", client_context.client, opts ) @@ -493,8 +501,9 @@ def test_codec_options(self): self.assertEqual(decrypted_value_legacy, value) # Encrypt the same UUID with STANDARD codec options. + opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) client_encryption = ClientEncryption( - KMS_PROVIDERS, "keyvault.datakeys", client_context.client, OPTS + KMS_PROVIDERS, "keyvault.datakeys", client_context.client, opts ) self.addCleanup(client_encryption.close) encrypted_standard = client_encryption.encrypt( @@ -986,9 +995,7 @@ def _test_corpus(self, opts): ) self.addCleanup(vault.drop) - client_encrypted = rs_or_single_client( - auto_encryption_opts=opts, uuidRepresentation="standard" - ) + client_encrypted = rs_or_single_client(auto_encryption_opts=opts) self.addCleanup(client_encrypted.close) client_encryption = ClientEncryption( @@ -1436,7 +1443,7 @@ def _test_explicit(self, expectation): ciphertext = client_encryption.encrypt( "string0", algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - key_id=Binary.from_uuid(self.DEK["_id"], STANDARD), + key_id=self.DEK["_id"], ) self.assertEqual(bytes(ciphertext), base64.b64decode(expectation)) @@ -1972,9 +1979,124 @@ def test_04_kmip(self): with self.assertRaisesRegex(EncryptionError, "expired|certificate verify failed"): self.client_encryption_expired.create_data_key("kmip") # Invalid cert hostname error. - with self.assertRaisesRegex(EncryptionError, "IP address mismatch|wronghost"): + with self.assertRaisesRegex( + EncryptionError, "IP address mismatch|wronghost|IPAddressMismatch" + ): self.client_encryption_invalid_hostname.create_data_key("kmip") +# https://github.com/mongodb/specifications/blob/d4c9432/source/client-side-encryption/tests/README.rst#explicit-encryption +class TestExplicitQueryableEncryption(EncryptionIntegrationTest): + @client_context.require_no_standalone + @client_context.require_version_min(6, 0, -1) + def setUp(self): + super().setUp() + self.encrypted_fields = json_data("etc", "data", "encryptedFields.json") + self.key1_document = json_data("etc", "data", "keys", "key1-document.json") + self.key1_id = self.key1_document["_id"] + self.db = self.client.test_queryable_encryption + self.client.drop_database(self.db) + self.db.command("create", self.encrypted_fields["escCollection"]) + self.db.command("create", self.encrypted_fields["eccCollection"]) + self.db.command("create", self.encrypted_fields["ecocCollection"]) + self.db.command("create", "explicit_encryption", encryptedFields=self.encrypted_fields) + key_vault = create_key_vault(self.client.keyvault.datakeys, self.key1_document) + self.addCleanup(key_vault.drop) + self.key_vault_client = self.client + self.client_encryption = ClientEncryption( + {"local": {"key": LOCAL_MASTER_KEY}}, key_vault.full_name, self.key_vault_client, OPTS + ) + self.addCleanup(self.client_encryption.close) + opts = AutoEncryptionOpts( + {"local": {"key": LOCAL_MASTER_KEY}}, + key_vault.full_name, + bypass_query_analysis=True, + ) + self.encrypted_client = rs_or_single_client(auto_encryption_opts=opts) + self.addCleanup(self.encrypted_client.close) + + def test_01_insert_encrypted_indexed_and_find(self): + val = "encrypted indexed value" + insert_payload = self.client_encryption.encrypt(val, Algorithm.INDEXED, self.key1_id) + self.encrypted_client[self.db.name].explicit_encryption.insert_one( + {"encryptedIndexed": insert_payload} + ) + + find_payload = self.client_encryption.encrypt( + val, Algorithm.INDEXED, self.key1_id, query_type=QueryType.EQUALITY + ) + docs = list( + self.encrypted_client[self.db.name].explicit_encryption.find( + {"encryptedIndexed": find_payload} + ) + ) + self.assertEqual(len(docs), 1) + self.assertEqual(docs[0]["encryptedIndexed"], val) + + def test_02_insert_encrypted_indexed_and_find_contention(self): + val = "encrypted indexed value" + contention = 10 + for _ in range(contention): + insert_payload = self.client_encryption.encrypt( + val, Algorithm.INDEXED, self.key1_id, contention_factor=contention + ) + self.encrypted_client[self.db.name].explicit_encryption.insert_one( + {"encryptedIndexed": insert_payload} + ) + + # Find without contention_factor non-deterministically returns 0-9 documents. + find_payload = self.client_encryption.encrypt( + val, Algorithm.INDEXED, self.key1_id, query_type=QueryType.EQUALITY + ) + docs = list( + self.encrypted_client[self.db.name].explicit_encryption.find( + {"encryptedIndexed": find_payload} + ) + ) + self.assertLessEqual(len(docs), 10) + for doc in docs: + self.assertEqual(doc["encryptedIndexed"], val) + + # Find with contention_factor will return all 10 documents. + find_payload = self.client_encryption.encrypt( + val, + Algorithm.INDEXED, + self.key1_id, + query_type=QueryType.EQUALITY, + contention_factor=contention, + ) + docs = list( + self.encrypted_client[self.db.name].explicit_encryption.find( + {"encryptedIndexed": find_payload} + ) + ) + self.assertEqual(len(docs), 10) + for doc in docs: + self.assertEqual(doc["encryptedIndexed"], val) + + def test_03_insert_encrypted_unindexed(self): + val = "encrypted unindexed value" + insert_payload = self.client_encryption.encrypt(val, Algorithm.UNINDEXED, self.key1_id) + self.encrypted_client[self.db.name].explicit_encryption.insert_one( + {"_id": 1, "encryptedUnindexed": insert_payload} + ) + + docs = list(self.encrypted_client[self.db.name].explicit_encryption.find({"_id": 1})) + self.assertEqual(len(docs), 1) + self.assertEqual(docs[0]["encryptedUnindexed"], val) + + def test_04_roundtrip_encrypted_indexed(self): + val = "encrypted indexed value" + payload = self.client_encryption.encrypt(val, Algorithm.INDEXED, self.key1_id) + decrypted = self.client_encryption.decrypt(payload) + self.assertEqual(decrypted, val) + + def test_05_roundtrip_encrypted_unindexed(self): + val = "encrypted indexed value" + payload = self.client_encryption.encrypt(val, Algorithm.UNINDEXED, self.key1_id) + decrypted = self.client_encryption.decrypt(payload) + self.assertEqual(decrypted, val) + + if __name__ == "__main__": unittest.main()