In [0]:
%load_ext autoreload
%autoreload 2


In [0]:
import importlib
from scripts_spark import extract_spark_discogs
importlib.reload(extract_spark_discogs)

from scripts_spark.extract_spark_discogs import Step02ExtractSparkDiscogs


In [0]:
# Apply Keys
from pyspark.dbutils import DBUtils
dbutils = DBUtils(spark)

adls_key = dbutils.secrets.get("markscope", "azure-storage-account-key").strip()

spark.conf.set(
    "fs.azure.account.key.ungcapstor01.dfs.core.windows.net",
    adls_key
)

print("✅ Azure ADLS key successfully configured.")


In [0]:
# Discog creds test
import requests
params = {
    "q": "Inception soundtrack",
    "type": "release",
    "key": "woTphKEaoIPOYHLyYhlp",
    "secret": "ODdqgEESNliJmkOHZtsEqxurdzcgNyRL"
}
headers = {"User-Agent": "DataEngineeringCapstone/1.0"}
print(requests.get("https://api.discogs.com/database/search", params=params, headers=headers).status_code)


In [0]:
# Databricks notebook: Step 01 - Extract Spark TMDB Notebook
# ----------------------------------------------------------
# Purpose: Run prepare_spark_tmdb_input.py in Databricks Runtime 16.4 LTS
# Environment: ungcap-dlws / markscope secrets
# Author: M. Holahan

# COMMAND ----------
from scripts_spark.extract_spark_tmdb import Step01ExtractSparkTMDB
from pyspark.sql import SparkSession

# 1️⃣ Cleanly retrieve and trim your ADLS key
adls_key = dbutils.secrets.get("markscope", "azure-storage-account-key").strip()

# 2️⃣ Configure Spark with the CORRECT account name
spark = (
    SparkSession.builder.appName("Step01ExtractSparkTMDB")
    .config("fs.azure.account.key.ungcapstor01.dfs.core.windows.net", adls_key)
    .getOrCreate()
)

# 3️⃣ Verify that Spark can reach your storage account
display(dbutils.fs.ls("abfss://raw@ungcapstor01.dfs.core.windows.net/"))

# 4️⃣ Run your extraction job
job = Step01ExtractSparkTMDB(spark)
job.container_uri = "abfss://raw@ungcapstor01.dfs.core.windows.net/raw/tmdb/"
job.run()


In [0]:
# Databricks notebook: Step 02 - Extract Spark Discogs Notebook
# ----------------------------------------------------------
# Purpose: Run prepare_spark_tmdb_input.py in Databricks Runtime 16.4 LTS
# Environment: ungcap-dlws / markscope secrets
# Author: M. Holahan

# COMMAND ----------
from scripts_spark.extract_spark_discogs import Step02ExtractSparkDiscogs
from pyspark.sql import SparkSession

# 1️⃣ Cleanly retrieve and trim your ADLS key
adls_key = dbutils.secrets.get("markscope", "azure-storage-account-key").strip()

# 2️⃣ Configure Spark with the CORRECT account name
spark = (
    SparkSession.builder.appName("Step02ExtractSparkDiscogs")
    .config("fs.azure.account.key.ungcapstor01.dfs.core.windows.net", adls_key)
    .getOrCreate()
)

# 3️⃣ Verify that Spark can reach your storage account
display(dbutils.fs.ls("abfss://raw@ungcapstor01.dfs.core.windows.net/"))

# 4️⃣ Run your extraction job (update container path to Discogs)
job = Step02ExtractSparkDiscogs(spark)
job.container_uri = "abfss://raw@ungcapstor01.dfs.core.windows.net/raw/discogs/"
job.run()


In [0]:
# Databricks notebook: Step 03 - Prepare TMDB Discog Candidates Notebook
# ---------------------------------------------------------------
# Purpose: Run prepare_spark_tmdb_input.py on Databricks Runtime 16.4 LTS
# Environment: ungcap-dlws / markscope secrets
# Author: M. Holahan

# COMMAND ----------
from scripts_pandas.prepare_tmdb_discogs_candidates import Step03PrepareTMDBDiscogsCandidates
step = Step03PrepareTMDBDiscogsCandidates()
df_out = step.run(limit=1000)