# Mister Car Wash - ML Model Training

This notebook trains three machine learning models for the Mister Car Wash Intelligence Agent:
1. **Churn Risk Predictor**: Predicts if a member will cancel.
2. **Equipment Failure Predictor**: Predicts if equipment needs maintenance.
3. **Upsell Propensity Scorer**: Predicts if a member is likely to upgrade.

**Single Source of Truth**: All models train on `V_..._FEATURES` views in `ANALYTICS` schema.

In [None]:
from snowflake.snowpark import Session
from snowflake.ml.registry import Registry
from snowflake.ml.modeling.linear_model import LogisticRegression
from snowflake.ml.modeling.ensemble import RandomForestClassifier
from snowflake.snowpark.functions import col
import pandas as pd

# Connect to Snowflake
# Note: In Snowflake Notebooks, session is automatically available
# session = Session.builder.create()

In [None]:
# Set context
session.use_database("MISTER_CAR_WASH_INTELLIGENCE")
session.use_schema("ANALYTICS")
session.use_warehouse("MISTER_CAR_WASH_WH")

# Initialize Model Registry
reg = Registry(session=session, database_name="MISTER_CAR_WASH_INTELLIGENCE", schema_name="ANALYTICS")

## 1. Train Churn Risk Predictor

In [None]:
# Load features from Feature View
churn_df = session.table("V_CHURN_RISK_FEATURES")

# Select numeric columns + label
feature_cols = ["LTV_SCORE", "TENURE_DAYS", "DAYS_SINCE_LAST_WASH", "TOTAL_WASHES"]
label_col = "IS_CHURNED"
output_col = "CHURN_LABEL"

# Split Data
train_df, test_df = churn_df.random_split([0.8, 0.2], seed=42)

# Train Logistic Regression
lr = LogisticRegression(
    input_cols=feature_cols,
    label_cols=label_col,
    output_cols=output_col
)

lr_model = lr.fit(train_df)

# Evaluate
eval_df = lr_model.predict(test_df)
print("Churn Model Evaluation:")
eval_df.select(label_col, output_col).show(5)

In [None]:
# Register Model
model_name = "CHURN_RISK_PREDICTOR"

try:
    reg.delete_model(model_name)
    print(f"Deleted existing model {model_name}")
except:
    pass

mv = reg.log_model(
    model_name=model_name,
    model_version=None, # Auto-generate version
    model=lr_model,
    sample_input_data=train_df.select(feature_cols).limit(10)
)

print(f"Registered {model_name} version {mv.version_name}")

## 2. Train Equipment Failure Predictor

In [None]:
# Load features from Feature View
maint_df = session.table("V_MAINTENANCE_RISK_FEATURES")

# Features
maint_features = ["DAYS_SINCE_LAST_SERVICE", "LAST_SERVICE_COST", "SEVERITY_SCORE"]
maint_label = "FAILURE_RISK_LABEL"
maint_output = "FAILURE_LABEL"

# Split
m_train, m_test = maint_df.random_split([0.8, 0.2], seed=42)

# Train Random Forest
rf = RandomForestClassifier(
    input_cols=maint_features,
    label_cols=maint_label,
    output_cols=maint_output,
    n_estimators=50
)

rf_model = rf.fit(m_train)

# Evaluate
m_eval = rf_model.predict(m_test)
print("Maintenance Model Evaluation:")
m_eval.select(maint_label, maint_output).show(5)

In [None]:
# Register Model
model_name = "EQUIPMENT_FAILURE_PREDICTOR"

try:
    reg.delete_model(model_name)
    print(f"Deleted existing model {model_name}")
except:
    pass

mv = reg.log_model(
    model_name=model_name,
    model_version=None,
    model=rf_model,
    sample_input_data=m_train.select(maint_features).limit(10)
)

print(f"Registered {model_name} version {mv.version_name}")

## 3. Train Upsell Propensity Scorer

In [None]:
# Load features
upsell_df = session.table("V_UPSELL_FEATURES")

# Features
upsell_cols = ["LTV_SCORE", "VISIT_COUNT", "AVG_RATING"]
upsell_label = "UPSELL_LABEL"
upsell_output = "UPSELL_LABEL_PRED"

# Split
u_train, u_test = upsell_df.random_split([0.8, 0.2], seed=42)

# Train Logistic Regression
upsell_lr = LogisticRegression(
    input_cols=upsell_cols,
    label_cols=upsell_label,
    output_cols=upsell_output
)

upsell_model = upsell_lr.fit(u_train)

# Evaluate
u_eval = upsell_model.predict(u_test)
print("Upsell Model Evaluation:")
u_eval.select(upsell_label, upsell_output).show(5)

In [None]:
# Register Model
model_name = "UPSELL_PROPENSITY_SCORER"

try:
    reg.delete_model(model_name)
    print(f"Deleted existing model {model_name}")
except:
    pass

mv = reg.log_model(
    model_name=model_name,
    model_version=None,
    model=upsell_model,
    sample_input_data=u_train.select(upsell_cols).limit(10)
)

print(f"Registered {model_name} version {mv.version_name}")

In [None]:
# Verify all models are registered
models = reg.show_models()
print(models)