# Register Model with MLflow
This notebook trains a lightweight model and registers it in MLflow.

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import joblib
import os

try:
    import mlflow
    import mlflow.sklearn
    mlflow_available = True
except ImportError:
    mlflow_available = False

# Load cleaned dataset
df = pd.read_csv("/content/sample_data/cleaned_data.csv")
print(df.columns) # Print columns to diagnose the KeyError
X = df.drop(columns=["num__Exited"]) # Corrected column name
y = df["num__Exited"].astype(int) # Convert target to integer

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Train model
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = f1_score(y_test, preds)

# Save locally
os.makedirs("models", exist_ok=True)
joblib.dump(model, "models/random_forest.pkl")

if mlflow_available:
    mlflow.set_experiment("bank_churn_pipeline")
    with mlflow.start_run(run_name="register_model") as run:
        mlflow.log_param("model_type", "RandomForest")
        mlflow.log_metric("f1_score", f1)
        mlflow.sklearn.log_model(model, "model")
        print(f"✅ Model logged to MLflow (Run ID: {run.info.run_id})")
else:
    print("✅ Model saved locally at models/random_forest.pkl (MLflow not installed)")

Index(['num__CreditScore', 'num__Age', 'num__Tenure', 'num__Balance',
       'num__NumOfProducts', 'num__HasCrCard', 'num__IsActiveMember',
       'num__EstimatedSalary', 'num__Exited', 'cat__Geography_France',
       'cat__Geography_Germany', 'cat__Geography_Spain', 'cat__Gender_Female',
       'cat__Gender_Male', 'cat__Gender_nan'],
      dtype='object')
✅ Model saved locally at models/random_forest.pkl (MLflow not installed)
