In [0]:
from datetime import date, timedelta

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
import pyspark.sql.functions as F

In [0]:
####################################### ENVIRONMENT VARIABLES #######################################

# Secrets from Azure Key Vault

####################################### (JSON) FILES CREATION #######################################

var_2_arrivals = dbutils.secrets.get(scope='FlightData-scope', key='var-2-arrivals')

var_2_departures = dbutils.secrets.get(scope='FlightData-scope', key='var-2-departures') 

####################################### INGESTION & TRANSFORMATION #######################################

client_id = dbutils.secrets.get(scope='FlightData-scope', key='client-id') 
tenant_id = dbutils.secrets.get(scope='FlightData-scope', key='tenant-id') 
client_secret = dbutils.secrets.get(scope='FlightData-scope', key='client-secret')

storage_account = dbutils.secrets.get(scope='FlightData-scope', key='storage-account')
storage_ingestion_container = dbutils.secrets.get(scope='FlightData-scope', key='storage-ingestion-container')
storage_transformation_container = dbutils.secrets.get(scope='FlightData-scope', key='storage-transformation-container')

In [0]:
# Create Spark Session
spark = SparkSession.builder.appName("FlightDataAnalysis").getOrCreate()

In [0]:
# Function to access (mount) the ADLS containers using Service Principals

def mount_adls_using_sp(storage_account_name,storage_container_name):

    # Set spark configurations
    configs = {"fs.azure.account.auth.type": "OAuth",
              "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
              "fs.azure.account.oauth2.client.id": client_id,
              "fs.azure.account.oauth2.client.secret": client_secret,
              "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"}
    
    # Unmount the mount point if it already exists - needed if you are re-running function
    if any(mount.mountPoint == f"/mnt/{storage_account_name}/{storage_container_name}" for mount in dbutils.fs.mounts()):
        dbutils.fs.unmount(f"/mnt/{storage_account_name}/{storage_container_name}")
    
    # Defining mount to access data in ADLS
    dbutils.fs.mount(
        source = f"abfss://{storage_container_name}@{storage_account_name}.dfs.core.windows.net/",
        mount_point = f"/mnt/{storage_account_name}/{storage_container_name}",
        extra_configs = configs
        )