Define the Widgets for variables that need to be parameterized in this notebook

In [0]:
CREATE WIDGET TEXT envlp_encryp_core_catalog_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_generate_kek_dsk_function_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_get_decrypted_dsk_function_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_secret_scope_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_sp_tenant_id_key DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_sp_client_id_key DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_sp_client_secret_key DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_key_vault_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_pci_pii_attribute_table_name DEFAULT "";
CREATE WIDGET TEXT envlp_encryp_rotate_kek_function_name DEFAULT "";

Create a Python backed SQL UDF to generate the KEK (Key Encryption Key) and DSK (Data Salt Key) for each of the PCI and PII attributes and store the KEK as Keys and DSK as secrets in key vault. The DSK will be encrypted using the respective KEK prior to storing as secrets.

In [0]:
CREATE OR REPLACE FUNCTION ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_generate_kek_dsk_function_name}(
  tenant_id STRING,
  client_id STRING,
  client_secret STRING,
  key_vault_url STRING,
  kek_name STRING,
  dsk_secret_name STRING
)
RETURNS STRING
LANGUAGE PYTHON
AS
$$
import requests
import base64
import os
import json

# Step 1: Authenticate and get access token
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
token_data = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret,
    "scope": "https://vault.azure.net/.default"
}
token_response = requests.post(token_url, data=token_data)
token_response.raise_for_status()
access_token = token_response.json()["access_token"]

headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Step 2: Check if KEK already exists (do not create if it exists)
check_kek_url = f"https://{key_vault_url}/keys/{kek_name}?api-version=7.4"
check_kek_response = requests.get(check_kek_url, headers=headers)

if check_kek_response.status_code == 200:
    print(f"KEK '{kek_name}' already exists.")
else:
    # KEK does not exist, create it
    create_key_url = f"https://{key_vault_url}/keys/{kek_name}/create?api-version=7.4"
    create_key_payload = {
        "kty": "RSA",
        "key_size": 2048
    }
    create_response = requests.post(create_key_url, headers=headers, json=create_key_payload)
    create_response.raise_for_status()
    print(f"KEK '{kek_name}' created successfully.")

    # Step 2.1: Set key rotation policy (365-day expiry, rotate 30 days prior)
    rotation_policy_url = f"https://{key_vault_url}/keys/{kek_name}/rotationpolicy?api-version=7.4"
    rotation_policy_payload = {
        "attributes": {
            "expiryTime": "P365D"
        },
        "lifetimeActions": [
            {
                "trigger": {
                    "timeBeforeExpiry": "P30D"
                },
                "action": {
                    "type": "Rotate"
                }
            }
        ]
    }
    rotation_response = requests.put(rotation_policy_url, headers=headers, json=rotation_policy_payload)
    rotation_response.raise_for_status()
    print(f"Rotation policy set for KEK '{kek_name}'.")

# Step 3: Check if DSK already exists
check_dsk_url = f"https://{key_vault_url}/secrets/{dsk_secret_name}?api-version=7.4"
check_dsk_response = requests.get(check_dsk_url, headers=headers)

if check_dsk_response.status_code == 200:
    return f"DSK secret '{dsk_secret_name}' already exists. Skipping creation."

# Step 4: Get latest KEK version
versions_url = f"https://{key_vault_url}/keys/{kek_name}/versions?api-version=7.4"
versions_response = requests.get(versions_url, headers=headers)
versions_response.raise_for_status()
versions_data = versions_response.json()["value"]
latest_version = sorted(versions_data, key=lambda x: x["attributes"]["created"], reverse=True)[0]["kid"].split("/")[-1]

# Step 5: Generate a 256-bit (32-byte) DSK
dsk_bytes = os.urandom(32)
dsk_b64 = base64.urlsafe_b64encode(dsk_bytes).decode("utf-8").rstrip("=")

# Step 6: Encrypt DSK using KEK
encrypt_url = f"https://{key_vault_url}/keys/{kek_name}/{latest_version}/encrypt?api-version=7.4"
encrypt_payload = {
    "alg": "RSA-OAEP-256",
    "value": dsk_b64
}
encrypt_response = requests.post(encrypt_url, headers=headers, json=encrypt_payload)
encrypt_response.raise_for_status()
encrypted_dsk = encrypt_response.json()["value"]

# Step 7: Store encrypted DSK with KEK version as a tag
store_url = f"https://{key_vault_url}/secrets/{dsk_secret_name}?api-version=7.4"
store_payload = {
    "value": encrypted_dsk,
    "tags": {
        "kek_version": latest_version
    }
}
store_response = requests.put(store_url, headers=headers, json=store_payload)
store_response.raise_for_status()

return f"Key Encryption Key '{kek_name}' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key '{dsk_secret_name}' is created, encrypted using KEK '{kek_name}', added KEK version as tag and stored as secret in the vault"
$$


Call the function to generate the KEK and DSK for each PCI / PII attribute

In [0]:
SELECT pci_pii_attribute_name, kek_name, dsk_name, ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_generate_kek_dsk_function_name}(
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_tenant_id_key}'),
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_client_id_key}'),
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_client_secret_key}'),
  '${envlp_encryp_key_vault_name}',
  kek_name,
  dsk_name
) as generated_kek_dsk FROM ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_pci_pii_attribute_table_name};

pci_pii_attribute_name,kek_name,dsk_name,generated_kek_dsk
dob,envlp-encryp-key-encryption-key-dob,envlp-encryp-data-salt-key-dob,"Key Encryption Key 'envlp-encryp-key-encryption-key-dob' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-dob' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-dob', added KEK version as tag and stored as secret in the vault"
email,envlp-encryp-key-encryption-key-email,envlp-encryp-data-salt-key-email,"Key Encryption Key 'envlp-encryp-key-encryption-key-email' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-email' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-email', added KEK version as tag and stored as secret in the vault"
pan,envlp-encryp-key-encryption-key-pan,envlp-encryp-data-salt-key-pan,"Key Encryption Key 'envlp-encryp-key-encryption-key-pan' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-pan' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-pan', added KEK version as tag and stored as secret in the vault"
exp-date,envlp-encryp-key-encryption-key-exp-date,envlp-encryp-data-salt-key-exp-date,"Key Encryption Key 'envlp-encryp-key-encryption-key-exp-date' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-exp-date' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-exp-date', added KEK version as tag and stored as secret in the vault"
name,envlp-encryp-key-encryption-key-name,envlp-encryp-data-salt-key-name,"Key Encryption Key 'envlp-encryp-key-encryption-key-name' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-name' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-name', added KEK version as tag and stored as secret in the vault"
phone,envlp-encryp-key-encryption-key-phone,envlp-encryp-data-salt-key-phone,"Key Encryption Key 'envlp-encryp-key-encryption-key-phone' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-phone' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-phone', added KEK version as tag and stored as secret in the vault"
ssn,envlp-encryp-key-encryption-key-ssn,envlp-encryp-data-salt-key-ssn,"Key Encryption Key 'envlp-encryp-key-encryption-key-ssn' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-ssn' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-ssn', added KEK version as tag and stored as secret in the vault"
address,envlp-encryp-key-encryption-key-address,envlp-encryp-data-salt-key-address,"Key Encryption Key 'envlp-encryp-key-encryption-key-address' is created and stored in the vault as Keys with Rotation Policy enabled. Data Salt Key 'envlp-encryp-data-salt-key-address' is created, encrypted using KEK 'envlp-encryp-key-encryption-key-address', added KEK version as tag and stored as secret in the vault"


Create a Python backed SQL UDF to get the decrypted DSK (Data Salt Key) to be used for hashing the PCI / PII attributes while loading to vault catalog.

In [0]:
CREATE OR REPLACE FUNCTION ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_get_decrypted_dsk_function_name}(
  tenant_id STRING
  ,client_id STRING
  ,client_secret STRING
  ,key_vault_url STRING
  ,kek_name STRING
  ,dsk_name STRING
)
RETURNS STRING
LANGUAGE PYTHON
AS
$$

import requests
import json
import base64
from base64 import b64encode,b64decode

scope="https://vault.azure.net/.default"

# Step 1. Define the token url and the credentials to get the access token
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
token_data = {
  "grant_type": "client_credentials",
  "client_id": client_id,
  "client_secret": client_secret,
  "scope": scope
}

#Step 2. Call the token url and get the access token
token_response = requests.post(token_url, data=token_data)
token_response.raise_for_status()
access_token = token_response.json()["access_token"]

#Step 3. Define the headers for subsequent API calls
headers = {
  "Authorization": f"Bearer {access_token}",
  "Content-Type": "application/json"
}

#Step 4. Make an API call to retrieve the encrypted dsk from the secrets
encrypted_dsk_response = requests.get(
  f"https://{key_vault_url}/secrets/{dsk_name}?api-version=7.4",
  headers=headers
)
encrypted_dsk_response.raise_for_status()
encrypted_dsk = encrypted_dsk_response.json().get("value")

#Step 5. Make an API call to get list of all the versions of the key encryption key (for e.g. fis-keky-encryption-key-pan)
kek_version_url = f"https://{key_vault_url}/keys/{kek_name}/versions?api-version=7.4"
kek_version_response = requests.get(kek_version_url, headers=headers)
#version_response.raise_for_status()
kek_version_data = kek_version_response.json()
kek_versions = kek_version_data["value"]

#Step 6. Sort the kek versions in descending order of creation date to fetch the latest version of the key encryption key
sorted_kek_version = sorted(kek_versions, key=lambda x: x["attributes"]["created"], reverse=True)

#Get only the latest version of the key encryption key and get only the version_no(ddafa9e312b446d7a78f6634eabe6084) from the url 
#(https://key_vault_url/keys/fis-key-encryption-key-pan/ddafa9e312b446d7a78f6634eabe6084)
latest_kek_version = sorted_kek_version[0]["kid"]
latest_kek_version = latest_kek_version.split("/")[-1]

#Step 7. Make API Call to Decrypt DSK using the latest version of the key encryption key
decrypt_dsk_url = f"https://{key_vault_url}/keys/{kek_name}/{latest_kek_version}/decrypt?api-version=7.4"
decrypt_dsk_payload = {
    "alg": "RSA-OAEP-256",  # Algorithm used for encryption
    "value": encrypted_dsk  # The encrypted DSK as a base64 string
}

decrypt_dsk_response = requests.post(decrypt_dsk_url, headers=headers, json=decrypt_dsk_payload)
decrypt_dsk_response.raise_for_status()

#Get the decrypted dsk which is in base64 format 
decrypted_dsk_base64 = decrypt_dsk_response.json()["value"]

# The string returned by the Azure Key Vault API is not properly padded. Hence we need to add padding to the base64 string
decrypted_dsk_base64 = decrypted_dsk_base64 + "=" * (4-len(decrypted_dsk_base64) % 4) if len(decrypted_dsk_base64) % 4 != 0 else decrypted_dsk_base64

#de-code the bsee 64 string to bytes
decrypted_dsk_bytes = base64.urlsafe_b64decode(decrypted_dsk_base64)

# Convert to hex string for human readability
decrypted_dsk_hex = decrypted_dsk_bytes.hex() 

return decrypted_dsk_bytes
$$


In [0]:
CREATE OR REPLACE FUNCTION ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_rotate_kek_function_name}
(
  tenant_id STRING,
  client_id STRING,
  client_secret STRING,
  key_vault_url STRING,
  kek_name STRING,
  dsk_secret_name STRING
)
RETURNS STRING
LANGUAGE PYTHON
AS
$$

import requests
import base64
import json

# Step 1: Authenticate
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
token_data = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret,
    "scope": "https://vault.azure.net/.default"
}
token_response = requests.post(token_url, data=token_data)
token_response.raise_for_status()
access_token = token_response.json()["access_token"]

headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Step 2: Get the encrypted DSK
get_secret_url = f"https://{key_vault_url}/secrets/{dsk_secret_name}?api-version=7.4"
secret_response = requests.get(get_secret_url, headers=headers)
secret_response.raise_for_status()
secret_data = secret_response.json()
encrypted_dsk = secret_data["value"]
old_kek_version = secret_data.get("tags", {}).get("kek_version")

# Step 3: Decrypt the DSK using old KEK version
decrypt_url = f"https://{key_vault_url}/keys/{kek_name}/{old_kek_version}/decrypt?api-version=7.4"
decrypt_payload = {
    "alg": "RSA-OAEP-256",
    "value": encrypted_dsk
}
decrypt_response = requests.post(decrypt_url, headers=headers, json=decrypt_payload)
decrypt_response.raise_for_status()
dsk_b64 = decrypt_response.json()["value"]

# Step 3.5: Trigger rotation of the KEK to create a new version
rotate_url = f"https://{key_vault_url}/keys/{kek_name}/rotate?api-version=7.4"
rotate_response = requests.post(rotate_url, headers=headers)
rotate_response.raise_for_status()

# Step 4: Get latest KEK version
versions_url = f"https://{key_vault_url}/keys/{kek_name}/versions?api-version=7.4"
versions_response = requests.get(versions_url, headers=headers)
versions_response.raise_for_status()
versions_data = versions_response.json()["value"]
latest_version = sorted(versions_data, key=lambda x: x["attributes"]["created"], reverse=True)[0]["kid"].split("/")[-1]

# Step 5: Re-encrypt the DSK with the new KEK version
encrypt_url = f"https://{key_vault_url}/keys/{kek_name}/{latest_version}/encrypt?api-version=7.4"
encrypt_payload = {
    "alg": "RSA-OAEP-256",
    "value": dsk_b64
}
encrypt_response = requests.post(encrypt_url, headers=headers, json=encrypt_payload)
encrypt_response.raise_for_status()
new_encrypted_dsk = encrypt_response.json()["value"]

# Step 6: Update the secret with new encrypted DSK and new tag
update_url = f"https://{key_vault_url}/secrets/{dsk_secret_name}?api-version=7.4"
update_payload = {
    "value": new_encrypted_dsk,
    "tags": {
        "kek_version": latest_version
    }
}
update_response = requests.put(update_url, headers=headers, json=update_payload)
update_response.raise_for_status()

return f"DEK '{dsk_secret_name}' successfully re-encrypted with KEK '{kek_name}' (version: {latest_version}) and updated in Key Vault."

$$

In [0]:
SELECT ${envlp_encryp_core_catalog_name}.default.${envlp_encryp_rotate_kek_function_name}(
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_tenant_id_key}'),
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_client_id_key}'),
  try_secret('${envlp_encryp_secret_scope_name}', '${envlp_encryp_sp_client_secret_key}'),
  '${envlp_encryp_key_vault_name}',
  'envlp-encryp-key-encryption-key-address',
  'envlp-encryp-data-salt-key-address'
) as rotated_kek;

rotated_kek
DEK 'envlp-encryp-data-salt-key-address' successfully re-encrypted with KEK 'envlp-encryp-key-encryption-key-address' (version: 87b10d7f3f7643288e9c0714b4631f1a) and updated in Key Vault.
