In [0]:
from pyspark.sql.functions import col, sum, avg, when

def clean_column_names(df):
    for col_name in df.columns:
        clean_name = col_name.strip().lower().replace(" ", "_").replace(".", "_")
        df = df.withColumnRenamed(col_name, clean_name)
    return df

# Your full SAS token string 
sas_token = "API_KEY"

# Define storage info
storage_account = "dwcontoso"
container = "dataset"
csv_files = [
    "Product.csv",
    "Region.csv",
    "Reseller.csv",
    "Sales.csv",
    "Salesperson.csv",
    "SalespersonRegion.csv",
    "Targets.csv"
]

# define the output storage

bronze_bath_path="/mnt/bronze"

# Configure Spark to access the storage with SAS
spark.conf.set(
  f"fs.azure.sas.{container}.{storage_account}.blob.core.windows.net",
  sas_token
)

# Build the WASBS URL

for file_name in csv_files:

  # path

  file_path = f"wasbs://{container}@{storage_account}.blob.core.windows.net/{file_name}"

  #table name

  table_name = file_name.replace(".csv","")

  #load csv

  df = spark.read.options(header=True, inferSchema=True, sep="\t").csv(file_path)
  df=clean_column_names(df)

  #save as delta table

  bronze_path = f"{bronze_bath_path}/{table_name}"

  df.write.format("delta").mode("overwrite").save(bronze_path)

In [0]:
%fs ls /mnt/bronze


path,name,size,modificationTime
dbfs:/mnt/bronze/Product/,Product/,0,1747416805000
dbfs:/mnt/bronze/Region/,Region/,0,1747416819000
dbfs:/mnt/bronze/Reseller/,Reseller/,0,1747416822000
dbfs:/mnt/bronze/Sales/,Sales/,0,1747416826000
dbfs:/mnt/bronze/Salesperson/,Salesperson/,0,1747416829000
dbfs:/mnt/bronze/SalespersonRegion/,SalespersonRegion/,0,1747416831000
dbfs:/mnt/bronze/Targets/,Targets/,0,1747416833000
