In [0]:
# --- ADLS via SAS (FixedSASTokenProvider) — short & robust -----------------
storage_account = "stnzrentdev"
container = "nz-rent"
dfs_fqdn = f"{storage_account}.dfs.core.windows.net"
abfss_url = f"abfss://{container}@{dfs_fqdn}/"

# Use token WITHOUT leading '?'
sas_token_raw = "sv=2024-11-04&ss=bfqt&srt=co&sp=rwdlacupyx&se=2025-10-25T16:26:07Z&st=2025-10-15T08:11:07Z&spr=https&sig=Xddwgamve%2Fr6c2FKAWLKWax2cOWBZwUJ5t%2BpmxPWOdg%3D"

# 0) Clear possible conflicting configs (ignore errors if not set)
for k in [
    f"fs.azure.account.key.{dfs_fqdn}",
    f"fs.azure.sas.{container}.{storage_account}.dfs.core.windows.net",
]:
    try: spark.conf.unset(k)
    except Exception: pass

# 1) Tell Spark to use SAS with FixedSASTokenProvider
spark.conf.set(f"fs.azure.account.auth.type.{dfs_fqdn}", "SAS")
spark.conf.set(f"fs.azure.sas.token.provider.type.{dfs_fqdn}",
               "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set(f"fs.azure.sas.fixed.token.{dfs_fqdn}", sas_token_raw)

# 2) List container; bootstrap /bronze if empty
entries = dbutils.fs.ls(abfss_url)
if len(entries) == 0:
    dbutils.fs.mkdirs(abfss_url + "bronze")
    dbutils.fs.put(abfss_url + "bronze/_sanity.txt", "hello databricks", overwrite=True)
    entries = dbutils.fs.ls(abfss_url)

# 3) Show as table
rows = [(e.path, e.size, e.modificationTime) for e in entries]
display(spark.createDataFrame(rows, ["path", "size", "mtime"]).orderBy("path"))

In [0]:
local_src = "/FileStore/shared_uploads/zym0170@autuni.ac.nz/staging_rent.csv"
bronze_dst = f"{abfss_url}bronze/staging_rent_from_workspace.csv"

dbutils.fs.cp(local_src, bronze_dst, recurse=False)
display(dbutils.fs.ls(f"{abfss_url}bronze/"))

In [0]:
from pyspark.sql.functions import col
from pyspark.sql.types import *

bronze_csv = f"{abfss_url}bronze/staging_rent_from_workspace.csv"

df_bronze = (spark.read
             .option("header", True)
             .option("inferSchema", True)
             .csv(bronze_csv))

print("Bronze rows:", df_bronze.count())
df_bronze.printSchema()
display(df_bronze.limit(10))

display(df_bronze.select('region').distinct().orderBy('region'))
display(df_bronze.select('property_type').distinct().orderBy('property_type'))

In [0]:
bronze_delta = f"{abfss_url}bronze_delta/staging_rent_delta"
(df_bronze.write
 .format('delta')
 .mode('overwrite')
 .save(bronze_delta))