In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *



In [0]:
# Check if catalog exists
catalogs = [row.catalog for row in spark.sql("SHOW CATALOGS").collect()]

if 'practise' not in catalogs:
    spark.sql("CREATE CATALOG practise")
    print("✅ Catalog 'practise' created")
else:
    print("✅ Catalog 'practise' already exists")

# Use the catalog
spark.sql("USE CATALOG practise")

# Create schemas
spark.sql("CREATE SCHEMA IF NOT EXISTS bronze")
spark.sql("CREATE SCHEMA IF NOT EXISTS silver")
spark.sql("CREATE SCHEMA IF NOT EXISTS gold")

print("✅ Schemas created: bronze, silver, gold")

In [0]:
from pyspark.sql.functions import current_timestamp

volume_path = "/Volumes/practise/bronze/raw_data"


df_features = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{volume_path}/Features*.csv") \
    .withColumn("Bronze_Ing_Time", current_timestamp())

df_sales = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{volume_path}/sales*.csv") \
    .withColumn("Bronze_Ing_Time", current_timestamp())


df_stores = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{volume_path}/stores*.csv") \
    .withColumn("Bronze_Ing_Time", current_timestamp())


df_features.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("bronze.features")
df_sales.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("bronze.sales")
df_stores.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("bronze.stores")

In [0]:
display(df_features.limit(5))