In [0]:
# Install required dependencies for ABFSS and Azure Blob access
%pip install --upgrade fsspec adlfs azure-storage-blob streamlit


Collecting fsspec
  Obtaining dependency information for fsspec from https://files.pythonhosted.org/packages/44/4b/e0cfc1a6f17e990f3e64b7d941ddc4acdc7b19d6edd51abf495f32b1a9e4/fsspec-2025.3.2-py3-none-any.whl.metadata
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting adlfs
  Obtaining dependency information for adlfs from https://files.pythonhosted.org/packages/cb/ed/d1bf75c089857d38332cf45416e419b47382b345ba5dfc4fae69397830d9/adlfs-2024.12.0-py3-none-any.whl.metadata
  Downloading adlfs-2024.12.0-py3-none-any.whl.metadata (7.7 kB)
Collecting azure-storage-blob
  Obtaining dependency information for azure-storage-blob from https://files.pythonhosted.org/packages/57/33/085d9352d416e617993821b9d9488222fbb559bc15c3641d6cbd6d16d236/azure_storage_blob-12.25.1-py3-none-any.whl.metadata
  Downloading azure_storage_blob-12.25.1-py3-none-any.whl.metadata (26 kB)
Collecting azure-datalake-store<0.1,>=0.0.53 (from adlfs)
  Obtaining dependency information for azure-datala

In [0]:
%restart_python

In [0]:
# Paths using abfs syntax, compatible with fsspec+adlfs
input_path = f"abfs://{container_name}/BooksDatasetWithSummary.parquet"
output_path = f"abfs://{container_name}/BooksDatasetWithAudioPreview.parquet"

# Read Parquet from ADLS
df = pd.read_parquet(input_path, engine="pyarrow", storage_options={
    "account_name": storage_account_name,
    "account_key": storage_account_key
})

# Print schema / columns
print("📄 Columns:", df.columns.tolist())

# Add sanitized audio_path column
def sanitize_filename(title):
    return re.sub(r'[\\/*?:"<>|]', "", title).strip().replace(" ", "_") + ".mp3"

df["audio_path"] = df["Title"].apply(lambda x: f"audio_summaries/{sanitize_filename(x)}")

# Save updated Parquet back to ADLS
df.to_parquet(output_path, engine="pyarrow", index=False, storage_options={
    "account_name": storage_account_name,
    "account_key": storage_account_key
})

print("✅ Saved with audio_path column to BooksDatasetWithAudioPreview.parquet")

📄 Columns: ['Title', 'Authors', 'Description', 'Category', 'Publisher', 'Publish Date', 'Price', 'Summary', 'AudioPath']
✅ Saved with audio_path column to BooksDatasetWithAudioPreview.parquet


In [0]:
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
from datetime import datetime, timedelta
from urllib.parse import quote
import streamlit as st

# Set your connection string here
connection_string = ""

container_name = "gold"
folder_name = "audio_summaries"

# Initialize blob service
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

blob_list = blob_service_client.get_container_client(container_name).list_blobs(name_starts_with=f"{folder_name}/")

# Prepare audio file list
audio_files = []
for blob in blob_list:
    if blob.name.endswith(".mp3"):
        sas_token = generate_blob_sas(
            account_name=blob_service_client.account_name,
            container_name=container_name,
            blob_name=blob.name,
            account_key=blob_service_client.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1)
        )
        public_url = f"https://{blob_service_client.account_name}.blob.core.windows.net/{container_name}/{quote(blob.name)}?{sas_token}"
        audio_files.append((blob.name.split("/")[-1].replace(".mp3", ""), public_url))


In [0]:
# UI navigation
if 'index' not in st.session_state:
    st.session_state.index = 0

book_title, audio_url = audio_files[st.session_state.index]

# Buttons
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
    if st.button("⬅️ Previous") and st.session_state.index > 0:
        st.session_state.index -= 1
with col3:
    if st.button("Next ➡️") and st.session_state.index < len(audio_files) - 1:
        st.session_state.index += 1

# Show current book
st.markdown(f"### 🎧 Now Playing: `{book_title}`")
st.audio(audio_url)

# Audio UI styling
st.markdown("""
<style>
    .stAudio audio {
        width: 100% !important;
    }
</style>
""", unsafe_allow_html=True)


2025-04-07 12:16:50.802 
  command:

    streamlit run /databricks/python_shell/scripts/db_ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [0]:
from IPython.display import Audio, display, HTML

# Preview one of the audio files directly
book_title, audio_url = audio_files[0]

display(HTML(f"<h2>Now Playing: {book_title}</h2>"))
display(Audio(audio_url))


In [0]:
# Step 1: Mount the 'gold' container of your Azure Data Lake Gen2
configs = {
  "fs.azure.account.key.kkstoragemo.dfs.core.windows.net": "+9WdBSxkHwHolT4KC9DYDd6/qme7IQpZ3L0M58EnBbJ6QPgj3OiX/hE50C1OxXSgtu+w2h0kW2BE+AStwXncWA=="
}

# Mount only if not already mounted
mount_point = "/mnt/gold"
if not any(m.mountPoint == mount_point for m in dbutils.fs.mounts()):
    dbutils.fs.mount(
        source = "abfss://gold@kkstoragemo.dfs.core.windows.net/",
        mount_point = mount_point,
        extra_configs = configs
    )

# Step 2: Copy audio_summaries folder to local /tmp
dbutils.fs.cp("/mnt/gold/audio_summaries", "file:/tmp/audio_summaries", recurse=True)

# Step 3: Zip the folder so it's downloadable
import shutil
shutil.make_archive("/tmp/audio_summaries", 'zip', "/tmp/audio_summaries")

# Step 4: Move the zip to /dbfs/FileStore so you can download via browser
dbutils.fs.cp("file:/tmp/audio_summaries.zip", "dbfs:/FileStore/audio_summaries.zip")

# Step 5: Show download link
displayHTML("""
  <h3>✅ Your download is ready:</h3>
  <a href="/files/audio_summaries.zip" target="_blank">📥 Click here to download audio_summaries.zip</a>
""")
