### Implementing and Logging an ML Experiment with MLFlow
**Description**: Train an ML model for an e-commerce recommendation engine using MLFlow to track models and experiments.

**Steps**:
1. MLFlow Integration Setup
2. Training the Model
3. Logging the Experiment
4. Accessing MLFlow UI

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn
import logging
import unittest

# Setup logging
logging.basicConfig(level=logging.INFO)

# -----------------------------
# STEP 1: Generate Synthetic E-commerce Data
# -----------------------------
def create_synthetic_data(n=1000):
    try:
        if n <= 0:
            raise ValueError("Number of samples must be > 0")
        np.random.seed(42)
        data = pd.DataFrame({
            "user_age": np.random.randint(18, 65, n),
            "product_views": np.random.poisson(3, n),
            "cart_adds": np.random.poisson(1, n),
            "purchase_made": np.random.choice([0, 1], size=n, p=[0.7, 0.3])
        })
        return data
    except Exception as e:
        logging.error(f"Data generation failed: {e}")
        raise

# -----------------------------
# STEP 2: Train Model
# -----------------------------
def train_model(data):
    try:
        if data.empty or "purchase_made" not in data.columns:
            raise ValueError("Invalid data format")

        X = data.drop("purchase_made", axis=1)
        y = data["purchase_made"]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = LogisticRegression(max_iter=1000)
        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(X_test_scaled)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)

        return model, scaler, {"accuracy": accuracy, "precision": precision}
    except Exception as e:
        logging.error(f"Model training failed: {e}")
        raise

# -----------------------------
# STEP 3: Log to MLflow
# -----------------------------
def log_experiment(model, scaler, metrics):
    try:
        mlflow.set_experiment("E-Commerce_Recommendation_Tracking")

        with mlflow.start_run():
            mlflow.log_param("model_type", "LogisticRegression")
            mlflow.log_param("scaler", "StandardScaler")

            for k, v in metrics.items():
                mlflow.log_metric(k, v)

            mlflow.sklearn.log_model(model, "model")
            mlflow.sklearn.log_model(scaler, "scaler")

            logging.info("MLflow logging completed.")
    except Exception as e:
        logging.error(f"MLflow logging failed: {e}")
        raise

# -----------------------------
# STEP 4: Unit Tests
# -----------------------------
class TestMLPipeline(unittest.TestCase):

    def test_data_creation(self):
        df = create_synthetic_data(100)
        self.assertEqual(df.shape[0], 100)
        self.assertIn("purchase_made", df.columns)

    def test_model_training_success(self):
        df = create_synthetic_data(200)
        model, scaler, metrics = train_model(df)
        self.assertTrue(metrics["accuracy"] > 0)
        self.assertTrue(metrics["precision"] >= 0)

    def test_training_with_empty_data(self):
        with self.assertRaises(ValueError):
            train_model(pd.DataFrame())

# -----------------------------
# STEP 5: Main Execution
# -----------------------------
def main():
    print("🚀 Running MLflow Experiment Pipeline...\n")
    try:
        df = create_synthetic_data()
        model, scaler, metrics = train_model(df)
        log_experiment(model, scaler, metrics)
        print("\n✅ Run complete! Use `mlflow ui` to view results.")
    except Exception as e:
        print(f"❌ Pipeline error: {e}")

if __name__ == "__main__":
    main()
    print("\n🧪 Running Tests...\n")
    unittest.main(argv=[''], exit=False)

ModuleNotFoundError: No module named 'mlflow'

In [1]:
# write your code from here

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, classification_report
import mlflow
import mlflow.sklearn
from sklearn.preprocessing import StandardScaler
import os

# -----------------------------
# STEP 1: Simulate E-commerce Data
# -----------------------------
def create_synthetic_data():
    np.random.seed(42)
    n = 1000
    data = pd.DataFrame({
        "user_age": np.random.randint(18, 65, n),
        "product_views": np.random.poisson(3, n),
        "cart_adds": np.random.poisson(1, n),
        "purchase_made": np.random.choice([0, 1], size=n, p=[0.7, 0.3])
    })
    return data

# -----------------------------
# STEP 2: Train ML Model
# -----------------------------
def train_model(data):
    X = data.drop("purchase_made", axis=1)
    y = data["purchase_made"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LogisticRegression()
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)

    print("[INFO] Model Training Completed.")
    print("[INFO] Accuracy:", acc)
    print("[INFO] Precision:", prec)
    
    return model, acc, prec, scaler

# -----------------------------
# STEP 3: MLflow Logging
# -----------------------------
def log_experiment(model, accuracy, precision, scaler):
    mlflow.set_experiment("E-Commerce-Recommendation-Engine")

    with mlflow.start_run():
        mlflow.log_param("model_type", "LogisticRegression")
        mlflow.log_param("scaler", "StandardScaler")
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)

        # Log model
        mlflow.sklearn.log_model(model, "model")
        mlflow.sklearn.log_model(scaler, "scaler")
        print("[MLflow] Experiment logged successfully.")

# -----------------------------
# MAIN: Full Pipeline Execution
# -----------------------------
if __name__ == "__main__":
    print("=== MLflow Experiment: E-Commerce Recommendation ===\n")

    # Step 1: Generate data
    df = create_synthetic_data()

    # Step 2: Train model
    model, acc, prec, scaler = train_model(df)

    # Step 3: Log to MLflow
    log_experiment(model, acc, prec, scaler)

    print("\n=== DONE: Check the MLflow UI with `mlflow ui` ===")

ModuleNotFoundError: No module named 'mlflow'