In [0]:
%sql
SELECT COUNT(*) FROM silver.events;



In [0]:
%sql
CREATE OR REPLACE TABLE gold.products AS
SELECT
  product_id,
  COUNT(CASE WHEN event_type = 'view' THEN 1 END)        AS views,
  COUNT(CASE WHEN event_type = 'cart' THEN 1 END)        AS cart_adds,
  COUNT(CASE WHEN event_type = 'purchase' THEN 1 END)    AS purchases,
  ROUND(SUM(CASE WHEN event_type = 'purchase' THEN price ELSE 0 END), 2) AS revenue
FROM silver.events
GROUP BY product_id;


In [0]:
%sql
SELECT * FROM gold.products ORDER BY purchases DESC LIMIT 10;


In [0]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

df = spark.table("gold.products").toPandas()

X = df[["views", "cart_adds"]]
y = df["purchases"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

mlflow.set_experiment("/Shared/day12_mlflow_experiment")

with mlflow.start_run(run_name="linear_regression_v1"):
    mlflow.log_param("features", "views, cart_adds")
    mlflow.log_param("model", "LinearRegression")

    model = LinearRegression()
    model.fit(X_train, y_train)

    r2 = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", r2)

    mlflow.sklearn.log_model(model, "model")

print(f"R2 Score: {r2:.4f}")


In [0]:
df = spark.table("gold.products").toPandas()


In [0]:
X = df[["views", "cart_adds"]]
y = df["purchases"]


In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [0]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)


In [0]:
r2 = model.score(X_test, y_test)

In [0]:
import mlflow
mlflow.set_experiment("/Shared/day12_mlflow_experiment")


In [0]:
with mlflow.start_run(run_name="linear_regression_v1"):
    ...


In [0]:
mlflow.log_param("model_type", "LinearRegression")
mlflow.log_param("test_size", 0.2)


In [0]:
mlflow.log_metric("r2_score", r2)


In [0]:
import mlflow.sklearn
mlflow.sklearn.log_model(model, "model")
mlflow.sklearn.log_model(model, "model", input_example=X_train.head())
