main

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import *
import re

# Configuration
catalog_name = "main"
database_name = "retail_lakehouse"

# Paths
bronze_path = f"/Volumes/{catalog_name}/{database_name}/bronze"
silver_path = f"/Volumes/{catalog_name}/{database_name}/silver"
checkpoints_path = f"/Volumes/{catalog_name}/{database_name}/checkpoints"

# Create silver directory
try:
    dbutils.fs.mkdirs(silver_path)
except:
    print("Note: dbutils not available - running in local environment")

# Use the database
try:
    spark.sql(f"USE {database_name}")
except:
    print(f"Note: Database {database_name} may not exist yet")

print(f"Bronze Path: {bronze_path}")
print(f"Silver Path: {silver_path}")
print(f"Using database: {database_name}")


def clean_sales_data():
    """Clean and enrich sales data from bronze to silver layer"""
    
    print("Processing Sales Data...")
    
    try:
        # Read directly from Delta path
        sales_bronze = spark.read.format("delta").load(f"{bronze_path}/sales_bronze")
    except Exception as e:
        print(f"Could not read sales bronze data from path: {e}")
        print("Creating empty sales dataframe with expected schema...")
        # Create empty dataframe with schema
        sales_schema = StructType([
            StructField("transaction_id", StringType(), True),
            StructField("transaction_date", StringType(), True),
            StructField("transaction_time", StringType(), True),
            StructField("store_id", StringType(), True),
            StructField("store_name", StringType(), True),
            StructField("region", StringType(), True),
            StructField("product_id", StringType(), True),
            StructField("product_name", StringType(), True),
            StructField("category", StringType(), True),
            StructField("quantity", IntegerType(), True),
            StructField("unit_price", DoubleType(), True),
            StructField("total_amount", DoubleType(), True),
            StructField("discount_percent", IntegerType(), True),
            StructField("discount_amount", DoubleType(), True),
            StructField("final_amount", DoubleType(), True),
            StructField("payment_method", StringType(), True),
            StructField("customer_id", StringType(), True),
            StructField("customer_segment", StringType(), True),
            StructField("sales_person_id", StringType(), True),
            StructField("promotion_code", StringType(), True),
            StructField("file_name", StringType(), True),
            StructField("processing_time", TimestampType(), True),
            StructField("bronze_layer", StringType(), True)
        ])
        sales_bronze = spark.createDataFrame([], sales_schema)
    
    if sales_bronze.count() == 0:
        print("No sales data found, returning empty dataframe")
        return sales_bronze
    
    # Data cleaning and enrichment
    sales_silver = (sales_bronze
                    # Data type conversions and cleaning
                    .withColumn("transaction_date", to_date(col("transaction_date"), "yyyy-MM-dd"))
                    .withColumn("transaction_datetime", 
                               to_timestamp(concat(col("transaction_date"), lit(" "), col("transaction_time")), 
                                          "yyyy-MM-dd HH:mm:ss"))
                    
                    # Clean and validate numeric fields
                    .withColumn("quantity", 
                               when(col("quantity") > 0, col("quantity")).otherwise(1))
                    .withColumn("unit_price", 
                               when(col("unit_price") > 0, col("unit_price")).otherwise(0.0))
                    .withColumn("final_amount", 
                               when(col("final_amount") >= 0, col("final_amount")).otherwise(0.0))
                    
                    # Clean text fields
                    .withColumn("store_name", trim(upper(col("store_name"))))
                    .withColumn("product_name", trim(initcap(col("product_name"))))
                    .withColumn("category", trim(initcap(col("category"))))
                    .withColumn("region", trim(upper(col("region"))))
                    .withColumn("payment_method", trim(initcap(col("payment_method"))))
                    .withColumn("customer_segment", trim(initcap(col("customer_segment"))))
                    
                    # Add enrichment columns
                    .withColumn("year", year(col("transaction_date")))
                    .withColumn("month", month(col("transaction_date")))
                    .withColumn("quarter", quarter(col("transaction_date")))
                    .withColumn("day_of_week", dayofweek(col("transaction_date")))
                    .withColumn("day_name", date_format(col("transaction_date"), "EEEE"))
                    .withColumn("month_name", date_format(col("transaction_date"), "MMMM"))
                    .withColumn("hour", hour(col("transaction_datetime")))
                    
                    # Business logic enrichment
                    .withColumn("is_weekend", 
                               when(col("day_of_week").isin([1, 7]), True).otherwise(False))
                    .withColumn("time_of_day", 
                               when(col("hour") < 6, "Night")
                               .when(col("hour") < 12, "Morning")
                               .when(col("hour") < 18, "Afternoon")
                               .otherwise("Evening"))
                    
                    # Profit calculation (assuming margin of 30%)
                    .withColumn("estimated_cost", col("final_amount") * 0.7)
                    .withColumn("estimated_profit", col("final_amount") * 0.3)
                    
                    # Transaction size categorization
                    .withColumn("transaction_size", 
                               when(col("final_amount") < 25, "Small")
                               .when(col("final_amount") < 100, "Medium")
                               .when(col("final_amount") < 500, "Large")
                               .otherwise("Very Large"))
                    
                    # Data quality flags
                    .withColumn("data_quality_flag", 
                               when((col("transaction_id").isNull()) | 
                                    (col("final_amount") < 0) | 
                                    (col("quantity") <= 0), "Poor")
                               .when(col("promotion_code").isNull() & 
                                     (col("discount_percent") > 0), "Questionable")
                               .otherwise("Good"))
                    
                    # Add processing metadata
                    .withColumn("silver_processing_time", current_timestamp())
                    .withColumn("record_source", lit("sales_bronze"))
                    
                    # Remove duplicates based on transaction_id
                    .dropDuplicates(["transaction_id"])
                    
                    # Filter out poor quality records
                    .filter(col("data_quality_flag") != "Poor")
    )
    
    return sales_silver


def clean_customer_data():
    """Clean and enrich customer data from bronze to silver layer"""
    
    print("Processing Customer Data...")
    
    try:
        # Read directly from Delta path
        customer_bronze = spark.read.format("delta").load(f"{bronze_path}/customer_bronze")
    except Exception as e:
        print(f"Could not read customer bronze data from path: {e}")
        print("Creating empty customer dataframe with expected schema...")
        # Create empty dataframe with schema
        customer_schema = StructType([
            StructField("customer_id", StringType(), True),
            StructField("first_name", StringType(), True),
            StructField("last_name", StringType(), True),
            StructField("email", StringType(), True),
            StructField("phone", StringType(), True),
            StructField("date_of_birth", StringType(), True),
            StructField("gender", StringType(), True),
            StructField("address_line1", StringType(), True),
            StructField("city", StringType(), True),
            StructField("state", StringType(), True),
            StructField("zip_code", StringType(), True),
            StructField("country", StringType(), True),
            StructField("customer_segment", StringType(), True),
            StructField("registration_date", StringType(), True),
            StructField("is_active", BooleanType(), True),
            StructField("file_name", StringType(), True),
            StructField("processing_time", TimestampType(), True),
            StructField("bronze_layer", StringType(), True)
        ])
        customer_bronze = spark.createDataFrame([], customer_schema)
    
    if customer_bronze.count() == 0:
        print("No customer data found, returning empty dataframe")
        return customer_bronze
    
    # Data cleaning and enrichment
    customer_silver = (customer_bronze
                       # Data type conversions
                       .withColumn("date_of_birth", to_date(col("date_of_birth"), "yyyy-MM-dd"))
                       .withColumn("registration_date", to_date(col("registration_date"), "yyyy-MM-dd"))
                       
                       # Clean text fields
                       .withColumn("first_name", trim(initcap(col("first_name"))))
                       .withColumn("last_name", trim(initcap(col("last_name"))))
                       .withColumn("email", trim(lower(col("email"))))
                       .withColumn("city", trim(initcap(col("city"))))
                       .withColumn("state", trim(upper(col("state"))))
                       .withColumn("country", trim(upper(col("country"))))
                       .withColumn("customer_segment", trim(initcap(col("customer_segment"))))
                       
                       # Clean phone numbers (remove special characters)
                       .withColumn("phone_clean", 
                                  regexp_replace(col("phone"), "[^0-9]", ""))
                       
                       # Calculate age
                       .withColumn("age", 
                                  floor(datediff(current_date(), col("date_of_birth")) / 365.25))
                       
                       # Age group categorization
                       .withColumn("age_group", 
                                  when(col("age") < 25, "18-24")
                                  .when(col("age") < 35, "25-34")
                                  .when(col("age") < 45, "35-44")
                                  .when(col("age") < 55, "45-54")
                                  .when(col("age") < 65, "55-64")
                                  .otherwise("65+"))
                       
                       # Customer tenure
                       .withColumn("customer_tenure_days", 
                                  datediff(current_date(), col("registration_date")))
                       .withColumn("customer_tenure_years", 
                                  floor(col("customer_tenure_days") / 365.25))
                       
                       # Tenure category
                       .withColumn("tenure_category", 
                                  when(col("customer_tenure_years") < 1, "New")
                                  .when(col("customer_tenure_years") < 3, "Regular")
                                  .when(col("customer_tenure_years") < 5, "Loyal")
                                  .otherwise("VIP"))
                       
                       # Email validation
                       .withColumn("email_valid", 
                                  when(col("email").rlike("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"), True)
                                  .otherwise(False))
                       
                       # Data quality assessment
                       .withColumn("data_quality_score", 
                                  (when(col("first_name").isNotNull(), 1).otherwise(0) +
                                   when(col("last_name").isNotNull(), 1).otherwise(0) +
                                   when(col("email_valid") == True, 1).otherwise(0) +
                                   when(col("phone_clean").isNotNull(), 1).otherwise(0) +
                                   when(col("date_of_birth").isNotNull(), 1).otherwise(0)))
                       
                       .withColumn("data_quality_flag", 
                                  when(col("data_quality_score") >= 4, "Excellent")
                                  .when(col("data_quality_score") >= 3, "Good")
                                  .when(col("data_quality_score") >= 2, "Fair")
                                  .otherwise("Poor"))
                       
                       # Add processing metadata
                       .withColumn("silver_processing_time", current_timestamp())
                       .withColumn("record_source", lit("customer_bronze"))
                       
                       # Remove duplicates
                       .dropDuplicates(["customer_id"])
                       
                       # Filter out poor quality records
                       .filter(col("data_quality_flag") != "Poor")
    )
    
    return customer_silver


def clean_product_data():
    """Clean and enrich product data from bronze to silver layer"""
    
    print("Processing Product Data...")
    
    try:
        # Read directly from Delta path
        product_bronze = spark.read.format("delta").load(f"{bronze_path}/product_bronze")
    except Exception as e:
        print(f"Could not read product bronze data from path: {e}")
        print("Creating empty product dataframe with expected schema...")
        # Create empty dataframe with schema
        product_schema = StructType([
            StructField("product_id", StringType(), True),
            StructField("product_name", StringType(), True),
            StructField("category", StringType(), True),
            StructField("sub_category", StringType(), True),
            StructField("brand", StringType(), True),
            StructField("supplier_id", StringType(), True),
            StructField("cost_price", DoubleType(), True),
            StructField("selling_price", DoubleType(), True),
            StructField("weight", DoubleType(), True),
            StructField("dimensions", StringType(), True),
            StructField("color", StringType(), True),
            StructField("size", StringType(), True),
            StructField("launch_date", StringType(), True),
            StructField("is_active", BooleanType(), True),
            StructField("file_name", StringType(), True),
            StructField("processing_time", TimestampType(), True),
            StructField("bronze_layer", StringType(), True)
        ])
        product_bronze = spark.createDataFrame([], product_schema)
    
    if product_bronze.count() == 0:
        print("No product data found, returning empty dataframe")
        return product_bronze
    
    # Data cleaning and enrichment
    product_silver = (product_bronze
                      .withColumn("launch_date", to_date(col("launch_date"), "yyyy-MM-dd"))
                      .withColumn("product_name", trim(initcap(col("product_name"))))
                      .withColumn("category", trim(initcap(col("category"))))
                      .withColumn("sub_category", trim(initcap(col("sub_category"))))
                      .withColumn("brand", trim(initcap(col("brand"))))
                      .withColumn("color", trim(initcap(col("color"))))
                      .withColumn("size", trim(upper(col("size"))))
                      .withColumn("cost_price", 
                                 when(col("cost_price") > 0, col("cost_price")).otherwise(0.0))
                      .withColumn("selling_price", 
                                 when(col("selling_price") > 0, col("selling_price")).otherwise(0.0))
                      .withColumn("margin_amount", col("selling_price") - col("cost_price"))
                      .withColumn("margin_percentage", 
                                 when(col("selling_price") > 0, 
                                      round((col("margin_amount") / col("selling_price")) * 100, 2))
                                 .otherwise(0.0))
                              .withColumn("price_category", 
                                 when(col("selling_price") < 20, "Budget")
                                 .when(col("selling_price") < 100, "Mid-Range")
                                 .when(col("selling_price") < 300, "Premium")
                                 .otherwise("Luxury"))
                                            .withColumn("product_age_days", 
                                 datediff(current_date(), col("launch_date")))
                      .withColumn("product_age_category", 
                                 when(col("product_age_days") < 30, "New")
                                 .when(col("product_age_days") < 180, "Recent")
                                 .when(col("product_age_days") < 365, "Established")
                                 .otherwise("Mature"))
                      .withColumn("weight_category", 
                                 when(col("weight") < 1, "Light")
                                 .when(col("weight") < 5, "Medium")
                                 .when(col("weight") < 20, "Heavy")
                                 .otherwise("Very Heavy"))
                      .withColumn("data_quality_score", 
                                 (when(col("product_name").isNotNull(), 1).otherwise(0) +
                                  when(col("category").isNotNull(), 1).otherwise(0) +
                                  when(col("selling_price") > 0, 1).otherwise(0) +
                                  when(col("cost_price") > 0, 1).otherwise(0) +
                                  when(col("launch_date").isNotNull(), 1).otherwise(0)))
                      
                      .withColumn("data_quality_flag", 
                                 when(col("data_quality_score") >= 4, "Excellent")
                                 .when(col("data_quality_score") >= 3, "Good")
                                 .otherwise("Fair"))
                      .withColumn("silver_processing_time", current_timestamp())
                      .withColumn("record_source", lit("product_bronze"))
                      .dropDuplicates(["product_id"])
    )
    
    return product_silver


# Process all datasets
print(" Starting Silver Layer Data Processing...")

# Clean sales data
sales_silver_df = clean_sales_data()

# Clean customer data
customer_silver_df = clean_customer_data()

# Clean product data
product_silver_df = clean_product_data()

print("Data cleaning completed!")

# COMMAND ----------

# Write sales silver data
print("Writing Sales Silver data...")
try:
    if sales_silver_df.count() > 0:
        (sales_silver_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{silver_path}/sales_silver")
        )
        print("Sales Silver data saved successfully!")
    else:
        print(" No sales data to write")
except Exception as e:
    print(f"Error writing sales silver data: {e}")

# Write customer silver data
print("Writing Customer Silver data...")
try:
    if customer_silver_df.count() > 0:
        (customer_silver_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{silver_path}/customer_silver")
        )
        print("Customer Silver data saved successfully!")
    else:
        print(" No customer data to write")
except Exception as e:
    print(f"Error writing customer silver data: {e}")

# Write product silver data
print("Writing Product Silver data...")
try:
    if product_silver_df.count() > 0:
        (product_silver_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{silver_path}/product_silver")
        )
        print("Product Silver data saved successfully!")
    else:
        print(" No product data to write")
except Exception as e:
    print(f"Error writing product silver data: {e}")

print("Silver layer data saving process completed!")


# Create Delta tables for silver layer
print("Creating Silver Layer Delta Tables...")

try:
    # Sales Silver Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.sales_silver
    USING DELTA
    LOCATION '{silver_path}/sales_silver'
    """)
    print("Sales Silver table created!")
except Exception as e:
    print(f"Error creating sales silver table: {e}")

try:
    # Customer Silver Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.customer_silver
    USING DELTA
    LOCATION '{silver_path}/customer_silver'
    """)
    print("Customer Silver table created!")
except Exception as e:
    print(f"Error creating customer silver table: {e}")

try:
    # Product Silver Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.product_silver
    USING DELTA
    LOCATION '{silver_path}/product_silver'
    """)
    print("Product Silver table created!")
except Exception as e:
    print(f"Error creating product silver table: {e}")

print("Silver layer tables creation process completed!")


# Data quality assessment using direct Delta paths
print("=== SILVER LAYER DATA QUALITY ASSESSMENT ===\n")

try:
    # Sales data quality
    print(" Sales Data Quality:")
    sales_quality = spark.sql(f"""
    SELECT 
        data_quality_flag,
        COUNT(*) as record_count,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM delta.`{silver_path}/sales_silver`), 2) as percentage
    FROM delta.`{silver_path}/sales_silver` 
    GROUP BY data_quality_flag 
    ORDER BY record_count DESC
    """)
    sales_quality.show()
except Exception as e:
    print(f"Could not assess sales data quality: {e}")

try:
    # Customer data quality
    print(" Customer Data Quality:")
    customer_quality = spark.sql(f"""
    SELECT 
        data_quality_flag,
        COUNT(*) as record_count,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM delta.`{silver_path}/customer_silver`), 2) as percentage
    FROM delta.`{silver_path}/customer_silver` 
    GROUP BY data_quality_flag 
    ORDER BY record_count DESC
    """)
    customer_quality.show()
except Exception as e:
    print(f"Could not assess customer data quality: {e}")

try:
    # Product data quality
    print(" Product Data Quality:")
    product_quality = spark.sql(f"""
    SELECT 
        data_quality_flag,
        COUNT(*) as record_count,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM delta.`{silver_path}/product_silver`), 2) as percentage
    FROM delta.`{silver_path}/product_silver` 
    GROUP BY data_quality_flag 
    ORDER BY record_count DESC
    """)
    product_quality.show()
except Exception as e:
    print(f"Could not assess product data quality: {e}")

# COMMAND ----------

# Generate data statistics using direct Delta paths
print("=== DATA PROFILING SUMMARY ===\n")

try:
    # Sales data statistics
    print(" Sales Data Statistics:")
    spark.sql(f"""
    SELECT 
        COUNT(*) as total_records,
        COUNT(DISTINCT transaction_id) as unique_transactions,
        COUNT(DISTINCT customer_id) as unique_customers,
        COUNT(DISTINCT product_id) as unique_products,
        MIN(transaction_date) as earliest_date,
        MAX(transaction_date) as latest_date,
        ROUND(AVG(final_amount), 2) as avg_transaction_amount,
        ROUND(SUM(final_amount), 2) as total_revenue
    FROM delta.`{silver_path}/sales_silver`
    """).show()
except Exception as e:
    print(f"Could not generate sales statistics: {e}")

try:
    # Top categories by revenue
    print(" Top Categories by Revenue:")
    spark.sql(f"""
    SELECT 
        category,
        COUNT(*) as transactions,
        ROUND(SUM(final_amount), 2) as total_revenue,
        ROUND(AVG(final_amount), 2) as avg_transaction_amount
    FROM delta.`{silver_path}/sales_silver`
    GROUP BY category
    ORDER BY total_revenue DESC
    LIMIT 10
    """).show()
except Exception as e:
    print(f"Could not generate category revenue data: {e}")

try:
    # Customer segments distribution
    print(" Customer Segments Distribution:")
    spark.sql(f"""
    SELECT 
        customer_segment,
        COUNT(*) as customer_count,
        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM delta.`{silver_path}/customer_silver`), 2) as percentage
    FROM delta.`{silver_path}/customer_silver`
    GROUP BY customer_segment
    ORDER BY customer_count DESC
    """).show()
except Exception as e:
    print(f"Could not generate customer segment data: {e}")

try:
    # Product categories distribution
    print(" Product Categories Distribution:")
    spark.sql(f"""
    SELECT 
        category,
        COUNT(*) as product_count,
        ROUND(AVG(selling_price), 2) as avg_price,
        ROUND(AVG(margin_percentage), 2) as avg_margin_pct
    FROM delta.`{silver_path}/product_silver`
    WHERE is_active = true
    GROUP BY category
    ORDER BY product_count DESC
    """).show()
except Exception as e:
    print(f"Could not generate product category data: {e}")

# COMMAND ----------

print("=== SILVER LAYER SAMPLE DATA ===\n")

try:
    # Sales silver sample
    print(" Sales Silver Sample:")
    sales_sample = spark.sql(f"SELECT * FROM delta.`{silver_path}/sales_silver` ORDER BY transaction_date DESC LIMIT 5")
    sales_sample.show()
except Exception as e:
    print(f"Could not show sales sample: {e}")

try:
    # Customer silver sample
    print(" Customer Silver Sample:")
    customer_sample = spark.sql(f"SELECT * FROM delta.`{silver_path}/customer_silver` WHERE data_quality_flag = 'Excellent' LIMIT 5")
    customer_sample.show()
except Exception as e:
    print(f"Could not show customer sample: {e}")

try:
    # Product silver sample
    print(" Product Silver Sample:")
    product_sample = spark.sql(f"SELECT * FROM delta.`{silver_path}/product_silver` WHERE is_active = true LIMIT 5")
    product_sample.show()
except Exception as e:
    print(f"Could not show product sample: {e}")

# COMMAND ----------

print("Creating Silver Layer Views...")

try:
    # Active customers view
    spark.sql(f"""
    CREATE OR REPLACE VIEW {database_name}.active_customers_vw AS
    SELECT *
    FROM delta.`{silver_path}/customer_silver`
    WHERE is_active = true AND data_quality_flag IN ('Excellent', 'Good')
    """)
    print("Active customers view created!")
except Exception as e:
    print(f"Could not create active customers view: {e}")

try:
    # Active products view
    spark.sql(f"""
    CREATE OR REPLACE VIEW {database_name}.active_products_vw AS
    SELECT *
    FROM delta.`{silver_path}/product_silver`
    WHERE is_active = true
    """)
    print("Active products view created!")
except Exception as e:
    print(f"Could not create active products view: {e}")

try:
    # Clean sales view
    spark.sql(f"""
    CREATE OR REPLACE VIEW {database_name}.clean_sales_vw AS
    SELECT *
    FROM delta.`{silver_path}/sales_silver`
    WHERE data_quality_flag = 'Good'
    """)
    print("Clean sales view created!")
except Exception as e:
    print(f"Could not create clean sales view: {e}")

print("Silver layer views creation process completed!")

print("Silver Layer Processing Completed!")

# Final record counts
try:
    sales_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{silver_path}/sales_silver`").collect()[0][0]
    customer_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{silver_path}/customer_silver`").collect()[0][0]
    product_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{silver_path}/product_silver`").collect()[0][0]

    print(f"\n Final Record Counts:")
    print(f"   Sales: {sales_count:,}")
    print(f"   Customers: {customer_count:,}")
    print(f"   Products: {product_count:,}")
except Exception as e:
    print(f"Could not get final record counts: {e}")

Bronze Path: /Volumes/main/retail_lakehouse/bronze
Silver Path: /Volumes/main/retail_lakehouse/silver
Using database: retail_lakehouse
🚀 Starting Silver Layer Data Processing...
Processing Sales Data...
Processing Customer Data...
Processing Product Data...
✅ Data cleaning completed!
Writing Sales Silver data...
✅ Sales Silver data saved successfully!
Writing Customer Silver data...
✅ Customer Silver data saved successfully!
Writing Product Silver data...
✅ Product Silver data saved successfully!
✅ Silver layer data saving process completed!
Creating Silver Layer Delta Tables...
Error creating sales silver table: [RequestId=3aef6a60-186a-49e6-b2ed-4492277543a4 ErrorClass=INVALID_PARAMETER_VALUE.INVALID_PARAMETER_VALUE] Missing cloud file system scheme

JVM stacktrace:
com.databricks.sql.managedcatalog.UnityCatalogServiceException
	at com.databricks.managedcatalog.ErrorDetailsHandler.wrapServiceException(ErrorDetailsHandler.scala:126)
	at com.databricks.managedcatalog.ErrorDetailsHandle