# The following cells contains different methods to access data from azure storage in databricks

## 1. Access data using Storage Access Keys

In [0]:
storage_account = ""
container_name = ""
access_key = ""

In [0]:
spark.conf.set(f"fs.azure.account.key.{storage_account}.dfs.core.windows.net", access_key)

In [0]:
display(dbutils.fs.ls(f"adfss://{container_name}@{storage_account}.dfs.core.windows.net"))

## 2. Access data using Shared Access Signature (SAS) tokens
**Note:**
1. provides fine grained access to storage
2. restrict access to specific resource types
3. allow specific permission like read-only
4. restrict access to specific time period
5. limit access to specific IP address
6. recommended access pattern for external clients

In [0]:
storage_account = ""
container_name = ""
sas_token = ""

In [0]:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "SAS")
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account}.dfs.core.windows.net",
               "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
              )
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account}.dfs.core.windows.net", sas_token)

In [0]:
display(dbutils.fs.ls(f"adfss://{container_name}@{storage_account}.dfs.core.windows.net"))

## 3. Access data using Service Principal
**Note:**
1. quite similar to user accounts
2. these are registered in AAD and assigned permissions to access resources using ROle based access control (RBAC)
3. recommended to use in databricks jobs and CI/CD pipelines
4. provides better security and monitoring as they can be audited

**Steps to follow:**
1. register azure AD application / Service principal
2. generate a secret/password for the principal
3. set spark config with app/client ID, directory/tenant ID and secret
4. assign role

In [0]:
storage_account = ""
container_name = ""
client_id = ""
tenant_id = ""
client_secret = ""

In [0]:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net",
               "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
              )
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net",
               f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
              )

In [0]:
display(dbutils.fs.ls(f"adfss://{container_name}@{storage_account}.dfs.core.windows.net"))

## 4. Access data using AAD Credential Passthrough
**Note:**
1. used to restrict users based on what they can see via AAD account
2. Databricks will pass the users, AAD credentials to the storage accountto authenticate. If the specific user has the required role assigned in RBAC for the storage account, they'll be able to access a storage account otherwise, they won't be able to access the storage account.

# Fetch secrets from Azure key vaults using Databricks secret scope

In [0]:
scope_name = ""
client_key = ""

In [0]:
dbutils.secrets.help()

In [0]:
dbutils.secrets.listScopes()

In [0]:
dbutils.secrets.list(scope=scope_name)

In [0]:
dbutils.secrets.get(scope=scope_name,key=client_key)

# Mount azure blob storage using service principal
**Steps to follow:**
1. Get client id, tenant id and client value form key vault
2. Set spark config with App/CLient ID, Directory/Tenant ID and Secret
3. Call the file system utility mount, to mount the storage

In [0]:
def mount_adfs(storage_account, container_name, scope_name, client_key, tenant_key, secret_key):
    # Get secrets from Key Vault
    client_id = dbutils.secrets.get(scope = scope_name, key = client_key)
    tenant_id = dbutils.secrets.get(scope = scope_name, key = tenant_key)
    client_secret = dbutils.secrets.get(scope = scope_name, key = secret_key)
    
    # Set sprark configurations
    configs = {"fs.azure.account.auth.type": "OAuth",
              "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
              "fs.azure.account.oauth2.client.id": client_id,
              "fs.azure.account.oauth2.client.secret": client_secret,
              "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"}
    
    if any(mount.mountPoint == f"/mnt/{storage_account}/{container_name}" for mount in dbutils.fs.mounts()):
        dbutils.fs.unmount(f"/mnt/{storage_account}/{container_name}")
        
    # Mount the storage account container
    dbutils.fs.mount(
      source = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net",
      mount_point = f"/mnt/{storage_account}/{container_name}",
      extra_configs = configs)
    
    display(dbutils.fs.mounts())

In [0]:
display(dbutils.fs.mounts())