# Logging Plots in MLflow

This is a minimal notebook demonstrating how to log plots in MLflow.

In [None]:
# Import common libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

import mlflow

In [None]:
# Load data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)

In [None]:
# Create and log plots with MLflow
with mlflow.start_run():
    # Log model parameters
    mlflow.log_param("n_estimators", 100)

    # Create and log confusion matrix
    plt.figure(figsize=(8, 6))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")

    # Save figure to a temporary file and log it
    plt.savefig("/tmp/confusion_matrix.png")
    mlflow.log_artifact("/tmp/confusion_matrix.png")
    plt.close()

    # Create and log feature importance plot
    plt.figure(figsize=(10, 6))
    feature_importance = model.feature_importances_
    sorted_idx = np.argsort(feature_importance)
    plt.barh(range(len(sorted_idx)), feature_importance[sorted_idx])
    plt.yticks(range(len(sorted_idx)), np.array(iris.feature_names)[sorted_idx])
    plt.title("Feature Importance")

    # Save and log feature importance
    plt.savefig("/tmp/feature_importance.png")
    mlflow.log_artifact("/tmp/feature_importance.png")
    plt.close()