### Basic ML Model Monitoring with MLFlow
**Description**: Set up a basic ML model monitoring pipeline using MLFlow to track experiment parameters and results.

**Steps**:
1. Installation
2. Set Up MLFlow Tracking
3. Training a Simple Model
4. Logging Model and Metrics
5. View Logged Data
    - Access the MLFlow UI to view your logged parameters, metrics, and models.

In [None]:
# write your code from here

In [1]:
pip install mlflow scikit-learn pandas


Defaulting to user installation because normal site-packages is not writeable
Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl (29.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.0/29.0 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting gunicorn<24
  Downloading gunicorn-23.0.0-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mlflow-skinny==2.22.0
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting graphene<4
  Downloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic!=1.10.0,<2
  Downloading alembic-1.16.1-

In [3]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

# Step 1: Data Loading and Validation
def load_data():
    data = pd.DataFrame({
        'age': [25, 45, 35, 50, 23, 30, 44, 37, 29, 41],
        'cholesterol': [180, 220, 190, 210, 170, 200, 230, 210, 185, 195],
        'diagnosis': [0, 1, 0, 1, 0, 0, 1, 1, 0, 1]
    })
    assert not data.isnull().any().any(), "Missing values found in dataset"
    assert (data[['age', 'cholesterol']] >= 0).all().all(), "Negative values in input features"
    return train_test_split(data[['age', 'cholesterol']], data['diagnosis'], test_size=0.2, random_state=42)

# Step 2: Train Model and Log to MLflow
def train_and_log_model(X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        try:
            model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
            model.fit(X_train, y_train)

            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)

            # Log parameters and metrics
            mlflow.log_param("n_estimators", 100)
            mlflow.log_param("max_depth", 3)
            mlflow.log_metric("accuracy", acc)
            mlflow.log_metric("precision", prec)
            mlflow.log_metric("recall", rec)

            mlflow.sklearn.log_model(model, "model")

            logging.info(f"Run complete. Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")
        except Exception as e:
            logging.error(f"Error during training or logging: {e}")
            raise

# Step 3: Unit Test Functions
def test_data_integrity():
    try:
        X_train, X_test, y_train, y_test = load_data()
        assert X_train.shape[1] == 2, "Expected 2 features"
        assert y_train.nunique() <= 2, "Target column not binary"
        logging.info("✅ Data integrity test passed.")
    except AssertionError as e:
        logging.error(f"❌ Data integrity test failed: {e}")

def test_model_training():
    try:
        X_train, X_test, y_train, y_test = load_data()
        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        assert len(y_pred) == len(y_test), "Prediction length mismatch"
        logging.info("✅ Model training test passed.")
    except Exception as e:
        logging.error(f"❌ Model training test failed: {e}")

# Main Execution
if __name__ == "__main__":
    test_data_integrity()
    test_model_training()

    X_train, X_test, y_train, y_test = load_data()
    train_and_log_model(X_train, X_test, y_train, y_test)


INFO:root:✅ Data integrity test passed.


INFO:root:✅ Model training test passed.
INFO:root:Run complete. Accuracy: 1.000, Precision: 1.000, Recall: 1.000
