 ## Install Dependencies

In [0]:
!pip install kagglehub

## STEP 1: Create a Unity Catalog Volume (one-time)

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS credit_catalog;
CREATE SCHEMA IF NOT EXISTS credit_catalog.credit_schema;

CREATE VOLUME IF NOT EXISTS credit_catalog.credit_schema.raw_data;


## STEP 2: Download dataset using KaggleHub (Python)

In [0]:
import kagglehub
import os

dataset_path = kagglehub.dataset_download(
    "parisrohan/credit-score-classification"
)

os.listdir(dataset_path)


## STEP 3: Copy files to Unity Catalog Volume

In [0]:
import shutil

volume_path = "/Volumes/credit_catalog/credit_schema/raw_data"

for file in os.listdir(dataset_path):
    src = os.path.join(dataset_path, file)
    dst = os.path.join(volume_path, file)
    shutil.copy(src, dst)

print("âœ… Files copied to Unity Catalog Volume")


## STEP 4: Read CSV from Volume using Spark

In [0]:
train_df = spark.read.csv(
    "/Volumes/credit_catalog/credit_schema/raw_data/train.csv",
    header=True,
    inferSchema=True
)

test_df = spark.read.csv(
    "/Volumes/credit_catalog/credit_schema/raw_data/test.csv",
    header=True,
    inferSchema=True
)


## STEP 5: Save into Unity Catalog tables

In [0]:
train_df.write.mode("overwrite").saveAsTable(
    "credit_catalog.credit_schema.credit_train"
)

test_df.write.mode("overwrite").saveAsTable(
    "credit_catalog.credit_schema.credit_test"
)


## STEP 6: Create Medallion Schemas

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS credit_catalog;

CREATE SCHEMA IF NOT EXISTS credit_catalog.bronze;
CREATE SCHEMA IF NOT EXISTS credit_catalog.silver;
CREATE SCHEMA IF NOT EXISTS credit_catalog.gold;
