In [0]:
%sql
CREATE OR REPLACE TABLE gold.daily_product_metrics
USING DELTA
AS
SELECT
    DATE(event_time) AS event_date,
    product_id,

    COUNT(CASE WHEN event_type = 'view' THEN 1 END)     AS views,
    COUNT(CASE WHEN event_type = 'cart' THEN 1 END)     AS cart_adds,
    COUNT(CASE WHEN event_type = 'purchase' THEN 1 END) AS purchases,

    AVG(price) AS avg_price

FROM silver.events
GROUP BY DATE(event_time), product_id;



num_affected_rows,num_inserted_rows


In [0]:
%sql
SHOW TABLES IN gold;


database,tableName,isTemporary
gold,daily_events,False
gold,daily_product_metrics,False
gold,events_view,False


In [0]:
%sql
SHOW SCHEMAS;


databaseName
bronze
default
ecommerce
gold
information_schema
silver


In [0]:
%sql
SHOW TABLES IN gold;


database,tableName,isTemporary
gold,daily_events,False
gold,daily_product_metrics,False
gold,events_view,False


In [0]:
%sql
SELECT * FROM gold.daily_product_metrics LIMIT 5;


event_date,product_id,views,cart_adds,purchases,avg_price
2019-11-16,26300804,117,4,4,214.15999999999988
2019-11-16,13201310,179,1,0,985.1000000000016
2019-11-16,26401414,1443,59,11,59.72000000000064
2019-11-16,10800006,94,1,1,46.30999999999997
2019-11-16,28720375,34,3,2,89.57999999999997


In [0]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Prepare data
df = spark.table("gold.daily_product_metrics").toPandas()
X = df[["views", "cart_adds"]]
y = df["purchases"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# MLflow experiment
with mlflow.start_run(run_name="linear_regression_v1"):
    # Log parameters
    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("test_size", 0.2)

    # Train
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate
    score = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", score)

    # Log model
    mlflow.sklearn.log_model(model, "model")

print(f"R² Score: {score:.4f}")




R² Score: 0.5988
