SOP0126 - Backup Keys For Encryption At Rest
============================================

Description
-----------

Use this notebook to connect to the `controller` database and backup
both system managed and external keys for encryption at rest.

Steps
-----

### Parameters

Set the `backup_file_path`. This is where the backup file will be
placed. Please make sure it has json file extension. Set the
`password_to_encrypt`. This is the password which will be used to
encrypt your certificate.

In [None]:
import os
import tempfile

backup_file_path = os.path.join(tempfile.mkdtemp(), 'bdcEncryptionKeys.json') # Change path there.
print(f"The keys will be backed up at {backup_file_path}")
password_to_encrypt = "your_password"
print(f"Key(s) will be saved at this path: '{backup_file_path}'. Please make sure you have permission to access this path.")

### Instantiate Kubernetes client

In [None]:
# Instantiate the Python Kubernetes client into 'api' variable

import os
from IPython.display import Markdown

try:
    from kubernetes import client, config
    from kubernetes.stream import stream
except ImportError: 

    # Install the Kubernetes module
    import sys
    !{sys.executable} -m pip install kubernetes    
    
    try:
        from kubernetes import client, config
        from kubernetes.stream import stream
    except ImportError:
        display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))
        raise

if "KUBERNETES_SERVICE_PORT" in os.environ and "KUBERNETES_SERVICE_HOST" in os.environ:
    config.load_incluster_config()
else:
    try:
        config.load_kube_config()
    except:
        display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))
        raise

api = client.CoreV1Api()

print('Kubernetes client instantiated')

### Get the namespace for the big data cluster

Get the namespace of the Big Data Cluster from the Kuberenetes API.

**NOTE:**

If there is more than one Big Data Cluster in the target Kubernetes
cluster, then either:

-   set \[0\] to the correct value for the big data cluster.
-   set the environment variable AZDATA\_NAMESPACE, before starting
    Azure Data Studio.

In [None]:
# Place Kubernetes namespace name for BDC into 'namespace' variable

if "AZDATA_NAMESPACE" in os.environ:
    namespace = os.environ["AZDATA_NAMESPACE"]
else:
    try:
        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name
    except IndexError:
        from IPython.display import Markdown
        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))
        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))
        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))
        raise

print('The kubernetes namespace for your big data cluster is: ' + namespace)

### Python function queries `controller` database and return results.

In [None]:
try:
  import pandas
except ModuleNotFoundError:
  !{sys.executable} -m pip install --user pandas
  import pandas
from io import StringIO
pandas.set_option('display.max_colwidth', -1)
name = 'controldb-0'
container = 'mssql-server'

def get_dataframe(query):
    # Executes the SQL Query and created a ^ separated data set. The first row has header and the following rows have data.
    command=f"""export SQLCMDPASSWORD=$(cat /var/run/secrets/credentials/mssql-sa-password/password);
    /opt/mssql-tools/bin/sqlcmd -b -S . -U sa -Q "SET NOCOUNT ON;
    {query}" -d controller  -s"^" -W | sed 2d
    """
    output=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)
    return pandas.read_csv(StringIO(output), sep='^')

print("Function 'get_dataframe' defined")

def run_sqlcmd(query):
    command=f"""export SQLCMDPASSWORD=$(cat /var/run/secrets/credentials/mssql-sa-password/password);
    /opt/mssql-tools/bin/sqlcmd -b -S . -U sa -Q "SET NOCOUNT ON;
    {query}" -d controller  -s"^" -W | sed 2d
    """
    output=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)
    return str(output)
print("Function 'run_sqlcmd' defined")

### Python function to execute kubernetes command.

In [None]:
pod_name = 'controldb-0'
container = 'mssql-server'

def execute_k8scommand(command):
    output=stream(api.connect_get_namespaced_pod_exec, pod_name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)
    return str(output)
print("Function 'execute_k8scommand' defined")

### Backup encryption keys.

In [None]:
import base64
import json
import os

from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.serialization.pkcs12 import load_key_and_certificates
from cryptography.hazmat.primitives.asymmetric import padding, rsa
from cryptography.hazmat.backends import default_backend

# Generate private/public key pair
def create_rsa_key(key_length=3072):
    rsa_key = rsa.generate_private_key(
        backend=default_backend(),
        public_exponent=65537,
        key_size=key_length)
    return rsa_key

def encrypt_rsa_key_as_pkcs8(rsa_key, password_to_encrypt):
    pem_with_encrypted_private_key = rsa_key.private_bytes(
        encoding=serialization.Encoding.PEM,
        format=serialization.PrivateFormat.PKCS8,
        encryption_algorithm=serialization.BestAvailableEncryption(password_to_encrypt.encode('utf-8')))
    return pem_with_encrypted_private_key

def create_aes_256_key():
    aes256_key = os.urandom(32) # 256 bit AES 256 key
    return aes256_key

def encrypt_aes_key_with_rsa_key(aes256_key, rsa_key):
    encrypted_aes_key = rsa_key.public_key().encrypt(
        aes256_key,
        padding.OAEP(
            mgf=padding.MGF1(algorithm=hashes.SHA256()),
            algorithm=hashes.SHA256(),
            label=None
        ))
    return encrypted_aes_key

def convert_rsa_key_from_pkcs12_to_jwk(rsa_key_as_pkcs12):
    key_as_p12 = serialization.pkcs12.load_key_and_certificates(
        data = rsa_key_as_pkcs12,
        password = None,
        backend = default_backend())
    private_key, certificate, additional_certificates = key_as_p12

    def int_to_bytes(number):
        # RSA numbers must be big endian as per JWK format RFC 7517.
        return number.to_bytes((number.bit_length() + 7) // 8, byteorder='big')

    def int_to_base64(number):
        return base64.urlsafe_b64encode(int_to_bytes(number)).decode('utf-8')

    private_key_numbers = private_key.private_numbers()
    public_key_numbers = private_key.public_key().public_numbers()

    rsa_numbers = {
        # Public key
        'e' : int_to_base64(public_key_numbers.e),
        'n' : int_to_base64(public_key_numbers.n),
        # Private key
        'd' : int_to_base64(private_key_numbers.d),
        'p' : int_to_base64(private_key_numbers.p),
        'q' : int_to_base64(private_key_numbers.q),
        'dp' : int_to_base64(private_key_numbers.dmp1),
        'dq' : int_to_base64(private_key_numbers.dmq1),
        'qi' : int_to_base64(private_key_numbers.iqmp)}

    # Fields specific to JSON Web Key
    jwk = {
        'key_hsm' : None,
        'key_ops' : None,
        'kid' : None,
        'kty' : 'RSA'}
    jwk.update(rsa_numbers)
    return jwk

def encrypt_data_with_aes_key(aes256_key, message):
    block_size_in_bytes = 16
    iv = os.urandom(block_size_in_bytes)
    cipher = Cipher(algorithms.AES(aes256_key), modes.CBC(iv), backend=default_backend())
    encryptor = cipher.encryptor()
    size_bytes = 4
    padded_message = len(message).to_bytes(size_bytes, byteorder='big') + message
    if len(padded_message) % block_size_in_bytes != 0:
        padded_message += os.urandom(block_size_in_bytes - len(padded_message) % block_size_in_bytes)
    aes_ciphertext = encryptor.update(padded_message) + encryptor.finalize()
    aes_ciphertext = base64.urlsafe_b64encode(aes_ciphertext).decode('utf-8')
    iv = base64.urlsafe_b64encode(iv).decode('utf-8')
    return (aes_ciphertext, iv)

symmetric_key_protection_password = base64.b64decode(str(api.read_namespaced_secret("controller-db-rw-secret", namespace).data['encryptionPassword'])).decode('utf-8')

test_version_stmt = """
SELECT count(*) FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'Credentials'  AND COLUMN_NAME = 'version' """

version = run_sqlcmd(test_version_stmt)
if version == '0':
    tsql_template = """
    OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{0}'
    select account_name, CAST(DecryptByKey([encrypted_password]) AS NVARCHAR(4000)) as unencrypted_password, CAST(application_metadata AS NVARCHAR(2000)) as application_metadata
    from Credentials where ISJSON(application_metadata) > 0 and JSON_VALUE(application_metadata, '$.vaultname') in ('hdfsvault-svc', 'mssqlvault-svc') and type = '{1}'
    """
else:
    tsql_template = """
    OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{0}'
    select account_name, CAST(DecryptByKey([encrypted_password]) AS NVARCHAR(4000)) as unencrypted_password, CAST(application_metadata AS NVARCHAR(2000)) as application_metadata,
    version, creation_timestamp_utc, credential_sequence_id
    from Credentials where ISJSON(application_metadata) > 0 and JSON_VALUE(application_metadata, '$.vaultname') in ('hdfsvault-svc', 'mssqlvault-svc') and type = '{1}'
    """

tsql_secrets_retrieval = tsql_template.format(symmetric_key_protection_password, '2')
df_secrets = get_dataframe(tsql_secrets_retrieval)

tsql_asymmetric_keys_retrieval = tsql_template.format(symmetric_key_protection_password, '3')
df_asymmetric_keys = get_dataframe(tsql_asymmetric_keys_retrieval)

rsa_key = create_rsa_key()
pem_with_encrypted_private_key = encrypt_rsa_key_as_pkcs8(rsa_key, password_to_encrypt)
aes256_key = create_aes_256_key()
encrypted_aes_key = encrypt_aes_key_with_rsa_key(aes256_key, rsa_key)

encryptor = {
    'private_key' : pem_with_encrypted_private_key.decode('utf-8'),
    'symmetric_key' : base64.urlsafe_b64encode(encrypted_aes_key).decode('utf-8')}
json_keys_entries = []
for index, row in df_secrets.iterrows():
    data = {
        'id' : row[0],
        'tags': row[2],
        'value': row[1],
        'type': 2}
    if version != '0':
        data.update({
            'version' : row[3],
            'creation_timestamp_utc' : row[4],
            'credential_sequence_id' : row[5]})
    json_keys_entries.append(data)

for index, row in df_asymmetric_keys.iterrows():
    message = row[1].encode('utf-8')

    # Python requires optional base64 padding
    #
    raw_message = base64.urlsafe_b64decode(message + b'==')
    convert_to_jwk = False
    try:
        j = json.loads(raw_message)
    except ValueError as ex:
        # Convert system managed keys stored as PKCS12 to JSON Web Key
        #
        convert_to_jwk = True

    if convert_to_jwk:
        jwk = convert_rsa_key_from_pkcs12_to_jwk(raw_message)
        message = base64.urlsafe_b64encode(json.dumps(jwk).encode('utf-8'))

    # All the keys at this point are either originally in JWK format
    # or converted from PKCS12 to JWK.
    application_metada = json.loads(row[2])
    application_metada['IsJsonWebKey'] = 'True'
    application_metada = json.dumps(application_metada)

    aes_ciphertext, iv = encrypt_data_with_aes_key(aes256_key, message)

    # parameter 'tags' represents application metadata
    data = {
        'id' : row[0],
        'tags': application_metada,
        'value': aes_ciphertext,
        'iv': iv,
        'type': 3}
    if version != '0':
        data.update({
            'version' : row[3],
            'creation_timestamp_utc' : row[4],
            'credential_sequence_id' : row[5]})
    json_keys_entries.append(data)

backup = { 'encryptor' : encryptor, 'keys' : json_keys_entries }

with open(backup_file_path, "w+") as file:
    json.dump(backup, file)

print(f"Encryption Key(s) Backed up at location: {backup_file_path}")

In [None]:
print("Notebook execution is complete.")