In [1]:
# Synapse: read ADF Notebook activity parameters via job tags.
# When you run manually in Studio, it falls back to the defaults below.

from notebookutils import mssparkutils

def get_param(name: str, default: str):
    try:
        v = mssparkutils.env.getJobTag(name)  # set by ADF baseParameters
        if v and v.lower() != "null":
            return v
    except Exception:
        pass
    return default

# ---- set defaults used only when running interactively in Studio ----
ingest_date = get_param("ingest_date", "2025-07-01")
file_name   = get_param("file_name",   "yellow_tripdata_2025-07.parquet")

print("ingest_date:", ingest_date)
print("file_name  :", file_name)


StatementMeta(spsmall01, 2, 2, Finished, Available, Finished)

ingest_date: 2025-07-01
file_name  : yellow_tripdata_2025-07.parquet


In [2]:
from notebookutils import mssparkutils

acct = "eltazr1adls"  # your default workspace storage account
test_dir = f"abfss://raw@{acct}.dfs.core.windows.net/_syn_auth_test/"

# Create/list a tiny file. If this fails with auth, fix RBAC on the storage account:
# grant Storage Blob Data Contributor to the Synapse workspace's managed identity.
mssparkutils.fs.mkdirs(test_dir)
mssparkutils.fs.put(test_dir + "ok.txt", "hello", True)
display(mssparkutils.fs.ls(test_dir))


StatementMeta(spsmall01, 2, 3, Finished, Available, Finished)

[FileInfo(path=abfss://raw@eltazr1adls.dfs.core.windows.net/_syn_auth_test/ok.txt, name=ok.txt, size=5)]

In [3]:
from notebookutils import mssparkutils

acct = "eltazr1adls"

src = f"abfss://raw@{acct}.dfs.core.windows.net/nyc_taxi/ingest_date={ingest_date}/{file_name}"
dst = f"abfss://raw@{acct}.dfs.core.windows.net/nyc_taxi_snappy/ingest_date={ingest_date}/file_name={file_name}/"

# Read the original parquet (ZSTD), write as Snappy
df = spark.read.parquet(src)

# If files are big, you may remove coalesce(1); leaving it keeps one snappy file per input file_name
(df.coalesce(1)
   .write
   .mode("overwrite")                # idempotent for reruns/backfills
   .option("compression","snappy")
   .parquet(dst))

print("Wrote:", dst)
display(mssparkutils.fs.ls(dst))


StatementMeta(spsmall01, 2, 4, Finished, Available, Finished)

Wrote: abfss://raw@eltazr1adls.dfs.core.windows.net/nyc_taxi_snappy/ingest_date=2025-07-01/file_name=yellow_tripdata_2025-07.parquet/


[FileInfo(path=abfss://raw@eltazr1adls.dfs.core.windows.net/nyc_taxi_snappy/ingest_date=2025-07-01/file_name=yellow_tripdata_2025-07.parquet/_SUCCESS, name=_SUCCESS, size=0),
 FileInfo(path=abfss://raw@eltazr1adls.dfs.core.windows.net/nyc_taxi_snappy/ingest_date=2025-07-01/file_name=yellow_tripdata_2025-07.parquet/part-00000-e4dcff02-b1ac-4c15-9fa9-c689418fd50b-c000.snappy.parquet, name=part-00000-e4dcff02-b1ac-4c15-9fa9-c689418fd50b-c000.snappy.parquet, size=81696094)]