In [0]:
import dlt
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.functions import current_timestamp, from_utc_timestamp
# dlt requires explicit table definations to register metadata before execution.

In [0]:
from cryptography.fernet import Fernet
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

**Creating DLT Tables**

In [0]:
#Azure Key Vault 
key = dbutils.secrets.get(scope="PII-secret", key="pii-key")
fernet = Fernet(key)

In [0]:
entities = [f.name.replace("/", "") for f in dbutils.fs.ls("abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/Piyush/Landing_zone/AdventureWorks_Piyush")]

#defining encryption function
def encrypt_val(val):
    if val is None:
        return None
    else:
        return fernet.encrypt(val.encode()).decode()
 
encrypt_udf = udf(encrypt_val, StringType())

def create_bronze_table(entity_name):
    df = (
        spark.readStream.format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option("header", "true")
        .option("inferSchema", "true")
        .option("cloudFiles.schemaEvolutionMode", "rescue") 
        .option("recursiveFileLookup", "true")
        .load(f"abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/Piyush/Landing_zone/AdventureWorks_Piyush/{entity_name}/*")
        .withColumn("Entity", lit(entity_name))
        .withColumn("File_Name", regexp_extract(col("_metadata.file_path"), r'Landing_zone/AdventureWorks_Piyush/'+entity_name+r'/\d{4}/([^/]+)', 1))
        .withColumn("Year", regexp_extract(col("_metadata.file_path"), r'Landing_zone/AdventureWorks_Piyush/'+entity_name+r'/(\d{4})', 1))
        .withColumn("Inserted_by", current_user())
        .withColumn('Ingested_time', from_utc_timestamp(current_timestamp(), "Asia/Kolkata"))
    )

    if entity_name == "Customer":
        df = df.withColumn("AccountNumber", encrypt_udf(col("AccountNumber")))
    elif entity_name == "Address":
        df = df.withColumn("PostalCode", encrypt_udf(col("PostalCode")))
    elif entity_name == "EmailAddress":
        df = df.withColumn("EmailAddress", encrypt_udf(col("EmailAddress")))
    elif entity_name == "Person":
        df = df.withColumn("PersonType", encrypt_udf(col("PersonType")))

    return df

for entity in entities:
   
    @dlt.table(
        name = f"training.piyush.{entity}_bronze",
        comment = f"Bronze table for entities"
    )
    def bronze_table(entity_name = entity):
        return create_bronze_table(entity_name)