# Full Idempotent Setup

## 1. Setup

In [0]:
dbutils.widgets.text(
    "catalog_name", 
    "apex_bank_demo", 
    "1. Catalog Name"
)
dbutils.widgets.dropdown(
    "reset_mode", 
    "False", 
    ["True", "False"], 
    "2. Nuke & Reset?"
)
catalog_name = dbutils.widgets.get("catalog_name")
reset_mode = dbutils.widgets.get("reset_mode")

## 2. Cleanup

In [0]:
if reset_mode == "True":
    print(f"Dropping catalog {catalog_name}...")
    spark.sql(
        f"DROP CATALOG IF EXISTS {catalog_name} CASCADE"
    )

## 3. Infrastructure

In [0]:
print(f"Building architecture in {catalog_name}...")
# Create Catalog
spark.sql(
    f"CREATE CATALOG IF NOT EXISTS {catalog_name}"
)
spark.sql(
    f"USE CATALOG {catalog_name}"
)
# Create Schemas
spark.sql(
    "CREATE SCHEMA IF NOT EXISTS raw_data"
)
spark.sql(
    "CREATE SCHEMA IF NOT EXISTS analytics"
)
# Create Volume (The Landing Zone)
spark.sql(
    "CREATE VOLUME IF NOT EXISTS raw_data.landing_zone"
)
print("Infrastructure ready")

Building architecture in apex_bank_demo...
Infrastructure ready


## 4. Seed Data

_Note: this is blocked on a shared cluster restricting access to local filesystem_

In [0]:
import os

# Dynamic path retrieval for the Git repo
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
repo_root = f"/Workspace{os.path.dirname(notebook_path)}"
source_path = os.path.join(repo_root, "data")

# Target Volume Path (trailing slash required)
destination_volume_path = (
    f"/Volumes/{catalog_name}/raw_data/landing_zone/"
)

print(
    f"Copying data...\nFrom: {source_path}\nTo: {destination_volume_path}"
)

if not os.path.exists(source_path):
    print(f"Error: Data directory not found at {source_path}")
else:
    files = [
        f for f in os.listdir(source_path) 
        if f.endswith(".csv")
    ]
    for file in files:
        source_file_uri = f"file:{os.path.join(source_path, file)}"
        target_file_uri = f"{destination_volume_path}{file}"
        dbutils.fs.cp(
            source_file_uri, 
            target_file_uri
        )
        print(f"  -> Copied {file}")

print(
    f"Data seeding complete. {len(files)} files transferred."
)

Copying data...
From: /Workspace/Users/philip@thephilipjones.com/apex-bank-demo/data
To: /Volumes/apex_bank_demo/raw_data/landing_zone/


[0;31m---------------------------------------------------------------------------[0m
[0;31mExecutionError[0m                            Traceback (most recent call last)
File [0;32m<command-7095524988749325>, line 27[0m
[1;32m     25[0m         source_file_uri [38;5;241m=[39m [38;5;124mf[39m[38;5;124m"[39m[38;5;124mfile:[39m[38;5;132;01m{[39;00mos[38;5;241m.[39mpath[38;5;241m.[39mjoin(source_path, file)[38;5;132;01m}[39;00m[38;5;124m"[39m
[1;32m     26[0m         target_file_uri [38;5;241m=[39m [38;5;124mf[39m[38;5;124m"[39m[38;5;132;01m{[39;00mdestination_volume_path[38;5;132;01m}[39;00m[38;5;132;01m{[39;00mfile[38;5;132;01m}[39;00m[38;5;124m"[39m
[0;32m---> 27[0m         dbutils[38;5;241m.[39mfs[38;5;241m.[39mcp(
[1;32m     28[0m             source_file_uri, 
[1;32m     29[0m             target_file_uri
[1;32m     30[0m         )
[1;32m     31[0m         [38;5;28mprint[39m([38;5;124mf[39m[38;5;124m"[39m[38;5;124m  -> Co

## 5. Verify

In [0]:
display(
    dbutils.fs.ls(destination_volume_path)
)

path,name,size,modificationTime
dbfs:/Volumes/apex_bank_demo/raw_data/landing_zone/synthetic_accounts.csv,synthetic_accounts.csv,961096,1763478289000
dbfs:/Volumes/apex_bank_demo/raw_data/landing_zone/synthetic_fraud_labels.csv,synthetic_fraud_labels.csv,8812501,1763478290000
dbfs:/Volumes/apex_bank_demo/raw_data/landing_zone/synthetic_transactions.csv,synthetic_transactions.csv,11383193,1763478290000
