### Implementing and Logging an ML Experiment with MLFlow
**Description**: Train an ML model for an e-commerce recommendation engine using MLFlow to track models and experiments.

**Steps**:
1. MLFlow Integration Setup
2. Training the Model
3. Logging the Experiment
4. Accessing MLFlow UI

In [None]:
# write your code from here

In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
import logging
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# Configure logging
logging.basicConfig(level=logging.INFO)

# Step 1: Generate Sample Data (E-commerce click prediction)
def generate_sample_data():
    np.random.seed(42)
    n_samples = 500

    data = pd.DataFrame({
        'price': np.random.uniform(10, 1000, n_samples),
        'user_rating': np.random.uniform(1, 5, n_samples),
        'time_on_site': np.random.uniform(1, 30, n_samples),
        'clicked': np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3])  # target
    })

    assert not data.isnull().any().any(), "Missing data found"
    assert (data[['price', 'user_rating', 'time_on_site']] >= 0).all().all(), "Negative values found"
    return data

# Step 2: Split Data
def prepare_data(data):
    X = data[['price', 'user_rating', 'time_on_site']]
    y = data['clicked']
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train & Log with MLflow
def run_experiment():
    data = generate_sample_data()
    X_train, X_test, y_train, y_test = prepare_data(data)

    with mlflow.start_run(run_name="ecommerce_recommendation"):
        try:
            model = GradientBoostingClassifier(n_estimators=100, max_depth=4, learning_rate=0.1)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            y_proba = model.predict_proba(X_test)[:, 1]

            acc = accuracy_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_proba)

            # Log params and metrics
            mlflow.log_param("model_type", "GradientBoostingClassifier")
            mlflow.log_param("n_estimators", 100)
            mlflow.log_param("max_depth", 4)
            mlflow.log_param("learning_rate", 0.1)

            mlflow.log_metric("accuracy", acc)
            mlflow.log_metric("roc_auc", auc)

            # Log model
            mlflow.sklearn.log_model(model, "ecommerce_model")

            logging.info(f"✅ Experiment completed - Accuracy: {acc:.3f}, AUC: {auc:.3f}")

        except Exception as e:
            logging.error(f"❌ Experiment failed: {e}")

# Step 4: Run the complete pipeline
if __name__ == "__main__":
    run_experiment()
    print("To view results, run 'mlflow ui' in your terminal and open http://localhost:5000")


INFO:root:✅ Experiment completed - Accuracy: 0.600, AUC: 0.386


To view results, run 'mlflow ui' in your terminal and open http://localhost:5000
