In [0]:
spark.sql("USE CATALOG workspace_webcreative1")
spark.sql("USE gold")

In [0]:
# --- Catalog / schemas ---
CATALOG = "workspace_webcreative1"
BRONZE_SCHEMA = "bronze"
SILVER_SCHEMA = "silver"
GOLD_SCHEMA   = "gold"

# --- Storage paths (RAW) ---
ACCOUNT = "stdbxfinalwebcreative01"
FQDN = f"{ACCOUNT}.dfs.core.windows.net"
RAW_CONTAINER = "raw"

AUTOMOBILE_CSV = f"abfss://{RAW_CONTAINER}@{FQDN}/automobile/Automobile.csv"
ECOMMERCE_CSV  = f"abfss://{RAW_CONTAINER}@{FQDN}/ecommerce/ecommerce.csv"

# --- Tables names ---
BRONZE_AUTO = f"{CATALOG}.{BRONZE_SCHEMA}.automobile_raw"
BRONZE_ECOM = f"{CATALOG}.{BRONZE_SCHEMA}.ecommerce_raw"

SILVER_AUTO = f"{CATALOG}.{SILVER_SCHEMA}.automobile_clean"
SILVER_ECOM = f"{CATALOG}.{SILVER_SCHEMA}.ecommerce_clean"

GOLD_AUTO_KPI = f"{CATALOG}.{GOLD_SCHEMA}.automobile_kpis"
GOLD_ECOM_KPI = f"{CATALOG}.{GOLD_SCHEMA}.ecommerce_kpis"

In [0]:
from pyspark.sql.functions import count, avg, sum as fsum, desc
spark.sql(f"USE CATALOG {CATALOG}")

In [0]:
from pyspark.sql.functions import avg, count

auto = spark.table("workspace_webcreative1.silver.automobile")

gold_auto_make = (auto.groupBy("make")
                  .agg(avg("price").alias("avg_price"),
                       count("*").alias("cars"))
                 )

(gold_auto_make.write.mode("overwrite")
 .format("delta")
 .saveAsTable("workspace_webcreative1.gold.auto_kpi_by_make"))

In [0]:
CAT = "workspace_webcreative1"   
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CAT}.gold")

In [0]:
CAT = "workspace_webcreative1"  
spark.sql(f"DROP TABLE IF EXISTS {CAT}.gold.auto_kpi_by_make")

In [0]:
CAT = "workspace_webcreative1"  

spark.sql(f"DROP TABLE IF EXISTS {CAT}.gold.auto_kpi_by_make")

In [0]:
display(spark.sql("SELECT current_catalog()").collect())
display(spark.sql("SHOW CATALOGS"))

In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CAT}.gold")

In [0]:
CAT = "workspace_webcreative1"

spark.sql(f"DROP TABLE IF EXISTS {CAT}.gold.auto_kpi_by_make")

(
  gold_auto_make.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable(f"{CAT}.gold.auto_kpi_by_make")
)

In [0]:
display(spark.sql(f"SELECT * FROM {CAT}.gold.auto_kpi_by_make LIMIT 20"))

In [0]:
auto = spark.table(SILVER_AUTO)

# KPI base: cantidad de registros + columnas num√©ricas promedio (si existen)
num_cols = [c for c,t in auto.dtypes if t in ("int","bigint","double","float","decimal(10,0)","decimal(38,18)")]

exprs = [count("*").alias("rows")]
for c in num_cols[:10]:  # limita para no explotar
    exprs.append(avg(col(c)).alias(f"avg_{c}"))

auto_kpi = auto.select(*exprs)
(auto_kpi.write.format("delta").mode("overwrite").option("overwriteSchema","true").saveAsTable(GOLD_AUTO_KPI))
display(spark.table(GOLD_AUTO_KPI))

In [0]:
ecom = spark.table(SILVER_ECOM)
num_cols = [c for c,t in ecom.dtypes if t in ("int","bigint","double","float","decimal(10,0)","decimal(38,18)")]

exprs = [count("*").alias("rows")]
for c in num_cols[:10]:
    exprs.append(avg(col(c)).alias(f"avg_{c}"))

ecom_kpi = ecom.select(*exprs)
(ecom_kpi.write.format("delta").mode("overwrite").option("overwriteSchema","true").saveAsTable(GOLD_ECOM_KPI))
display(spark.table(GOLD_ECOM_KPI))

In [0]:
spark.sql(f"SELECT * FROM {GOLD_AUTO_KPI}").show(truncate=False)
spark.sql(f"SELECT * FROM {GOLD_ECOM_KPI}").show(truncate=False)

spark.sql(f"SHOW TABLES IN {CATALOG}.{BRONZE_SCHEMA}").show(truncate=False)
spark.sql(f"SHOW TABLES IN {CATALOG}.{SILVER_SCHEMA}").show(truncate=False)
spark.sql(f"SHOW TABLES IN {CATALOG}.{GOLD_SCHEMA}").show(truncate=False)