In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import datetime, timedelta


In [0]:
dbutils.secrets.help()

In [0]:
dbutils.secrets.listScopes()
dbutils.secrets.list(scope= 'test')

[SecretMetadata(key='clientid'),
 SecretMetadata(key='dbpassword'),
 SecretMetadata(key='secretvalue'),
 SecretMetadata(key='tenantid'),
 SecretMetadata(key='test')]

In [0]:
application_id = dbutils.secrets.get(scope='test', key='clientid')

In [0]:
print(application_id)

[REDACTED]


In [0]:
## seting up mount points

## 1. get the available containers in the adls gen2 for instance stg, silver, raw

## 2. create a mount points for each of these containers 



In [0]:
def get_secrets():
    try:
        
        application_id = dbutils.secrets.get(scope='test', key='clientid')
        authenticationKey = dbutils.secrets.get(scope='test', key='secretvalue')
        tenant_id = dbutils.secrets.get(scope='test', key='tenantid')

        return application_id, authenticationKey, tenant_id
    
    except Exception as e :
        print(f"An error occure in get_secrets method:: {str(e)}")
        raise e 


In [0]:
## define a function , to create mount point for raw container and only for folder called "test" inside of it 

def mount_point_for_folder(adls_container_name, storage_account_name, adls_folder_name, mount_point):
    try:
            
        application_id, authenticationKey, tenant_id = get_secrets()
           
        source = f"abfss://{adls_container_name}@{storage_account_name}.dfs.core.windows.net/{adls_folder_name}/"
        print(source)
        endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"

        configs = {
            "fs.azure.account.auth.type": "OAuth",
            "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            "fs.azure.account.oauth2.client.id": f"{application_id}",
            "fs.azure.account.oauth2.client.secret": f"{authenticationKey}",
            "fs.azure.account.oauth2.client.endpoint": f"{endpoint}",
            "fs.azure.createRemoteFileSystemDuringInitialization": "true"
        }

        if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()):
            dbutils.fs.mount(
                source=source,
                mount_point=mount_point,
                extra_configs=configs
            )
            
        
    except Exception as e:
        print(f"An error occure in mount_adls_folde method:: {str(e)}")
        raise e 

In [0]:
adlsContainerName = 'raw'
storageAccountName = 'adlstraininggen2'
adls_folder_name = 'test'
mount_point = f'/mnt/testcontainer2/{adls_folder_name}'

mount_point_for_folder(adls_container_name= adlsContainerName, storage_account_name= storageAccountName, adls_folder_name= adls_folder_name, mount_point= mount_point)


## we create a mount point to raw container only for the folder called "test" inside the Raw container 

## final steps 

# create the app , copy the clientid, tenantid 
# create the secrets inside the app and copy the secret value
# in kv create the secrets for clientid, tenantid, secret value 

# in the IAM of Kv add role called secret office for the app

# in the IAM of Storage account add role called Storage Data Contributor for the app 

# create the scope in the databricks 


abfss://raw@adlstraininggen2.dfs.core.windows.net/test/


In [0]:
## mount point fot the entire Raw container
def mount_point_for_container(adls_container_name, storage_account_name, mount_point):
    try:
        # Retrieve secrets
        application_id, authenticationKey, tenant_id = get_secrets()

        # Source for the entire container (without specifying folder)
        source = f"abfss://{adls_container_name}@{storage_account_name}.dfs.core.windows.net/"
        print(source)
        endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"

        # Configuration settings for OAuth authentication
        configs = {
            "fs.azure.account.auth.type": "OAuth",
            "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            "fs.azure.account.oauth2.client.id": f"{application_id}",
            "fs.azure.account.oauth2.client.secret": f"{authenticationKey}",
            "fs.azure.account.oauth2.client.endpoint": f"{endpoint}",
            "fs.azure.createRemoteFileSystemDuringInitialization": "true"
        }

        # Check if the mount point already exists
        if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()):
            dbutils.fs.mount(
                source=source,
                mount_point=mount_point,
                extra_configs=configs
            )
        else:
            print(f"Mount point {mount_point} already exists.")
            
    except Exception as e:
        print(f"An error occurred in mount_point_for_container method: {str(e)}")
        raise e

# Define the parameters
adls_container_name = 'raw'
storage_account_name = 'adlstraininggen2'
mount_point = f'/mnt/{adls_container_name}'

# Call the function to mount the entire container
mount_point_for_container(adls_container_name=adls_container_name, storage_account_name=storage_account_name, mount_point=mount_point)


abfss://raw@adlstraininggen2.dfs.core.windows.net/


In [0]:
print(mount_pointt)

/mnt/testcontainer/raw


In [0]:
dbutils.fs.ls('/FileStore/')
#dbfs:/FileStore/ >> /mnt/Raw

[FileInfo(path='dbfs:/FileStore/tables/', name='tables/', size=0, modificationTime=1729399552000)]

In [0]:
dbutils.fs.help()

In [0]:
dbutils.fs.mounts()

[MountInfo(mountPoint='/databricks-datasets', source='databricks-datasets', encryptionType=''),
 MountInfo(mountPoint='/mnt/RawContainer', source='abfss://raw@adlstraininggen2.dfs.core.windows.net/raw/', encryptionType=''),
 MountInfo(mountPoint='/mnt/stg', source='abfss://stg@adlstraininggen2.dfs.core.windows.net/stg/', encryptionType=''),
 MountInfo(mountPoint='/Volumes', source='UnityCatalogVolumes', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-tracking', source='databricks/mlflow-tracking', encryptionType=''),
 MountInfo(mountPoint='/databricks-results', source='databricks-results', encryptionType=''),
 MountInfo(mountPoint='/mnt/raw', source='abfss://raw@adlstraininggen2.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-registry', source='databricks/mlflow-registry', encryptionType=''),
 MountInfo(mountPoint='/mnt/testcontainer/raw', source='abfss://raw@adlstraininggen2.dfs.core.windows.net/raw/', encryptionType=''),
 MountInfo(

In [0]:
dbutils.fs.ls('/mnt/testcontainer2/test/')

[FileInfo(path='dbfs:/mnt/testcontainer2/test/Pyspark_oct_19.txt', name='Pyspark_oct_19.txt', size=6052, modificationTime=1729402374000),
 FileInfo(path='dbfs:/mnt/testcontainer2/test/Pyspark_oct_6.txt', name='Pyspark_oct_6.txt', size=2294, modificationTime=1729402374000)]

In [0]:
dbutils.fs.ls('/mnt/raw/')

[FileInfo(path='dbfs:/mnt/raw/2-WheelSales.csv', name='2-WheelSales.csv', size=2085500, modificationTime=1720248338000),
 FileInfo(path='dbfs:/mnt/raw/2_wheels_mapping.csv', name='2_wheels_mapping.csv', size=2032342, modificationTime=1718431607000),
 FileInfo(path='dbfs:/mnt/raw/4-WheelSales.csv', name='4-WheelSales.csv', size=10992762, modificationTime=1720248369000),
 FileInfo(path='dbfs:/mnt/raw/API/', name='API/', size=0, modificationTime=1719121722000),
 FileInfo(path='dbfs:/mnt/raw/Book1_01.csv', name='Book1_01.csv', size=70, modificationTime=1725164419000),
 FileInfo(path='dbfs:/mnt/raw/Events/', name='Events/', size=0, modificationTime=1722662144000),
 FileInfo(path='dbfs:/mnt/raw/IncrementalOutput/', name='IncrementalOutput/', size=0, modificationTime=1717307858000),
 FileInfo(path='dbfs:/mnt/raw/LOGS/', name='LOGS/', size=0, modificationTime=1716617717000),
 FileInfo(path='dbfs:/mnt/raw/Logs2/', name='Logs2/', size=0, modificationTime=1716698955000),
 FileInfo(path='dbfs:/mnt