In [1]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.21.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.21.3 (from mlflow)
  Downloading mlflow_skinny-2.21.3-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.21.3->mlflow)
  Downloading databricks_sdk-0.50.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 k

In [4]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the experiment
mlflow.set_experiment("Breast Cancer Prediction")

# Training and Logging
with mlflow.start_run() as run:
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", acc)

    input_example = X_train.iloc[:1]
    mlflow.sklearn.log_model(clf, "model", input_example=input_example)


    # Save run ID for loading the model later
    run_id = run.info.run_id

print(f"\nModel trained and saved! Run ID: {run_id}\n")

# ------------------------
# Testing the Saved Model
# ------------------------

print("🔍 Testing the saved model on the test data...\n")

# Load the model from MLflow
model_uri = f"runs:/{run_id}/model"
loaded_model = mlflow.sklearn.load_model(model_uri)

# Predict using the loaded model
y_loaded_pred = loaded_model.predict(X_test)

# Check accuracy
test_accuracy = accuracy_score(y_test, y_loaded_pred)
print(f"✅ Accuracy of the loaded model on test data: {test_accuracy:.4f}")

# Show some predictions
print("\n🔮 Sample Predictions:")
for i in range(10):
    print(f"Features: {X_test.iloc[i].values}")
    print(f"True Label: {y_test.iloc[i]} - Predicted: {y_loaded_pred[i]}\n")





Model trained and saved! Run ID: ef906ee4023d430792dd6d830d7a4bff

🔍 Testing the saved model on the test data...

✅ Accuracy of the loaded model on test data: 0.9649

🔮 Sample Predictions:
Features: [1.247e+01 1.860e+01 8.109e+01 4.819e+02 9.965e-02 1.058e-01 8.005e-02
 3.821e-02 1.925e-01 6.373e-02 3.961e-01 1.044e+00 2.497e+00 3.029e+01
 6.953e-03 1.911e-02 2.701e-02 1.037e-02 1.782e-02 3.586e-03 1.497e+01
 2.464e+01 9.605e+01 6.779e+02 1.426e-01 2.378e-01 2.671e-01 1.015e-01
 3.014e-01 8.750e-02]
True Label: 1 - Predicted: 1

Features: [1.894e+01 2.131e+01 1.236e+02 1.130e+03 9.009e-02 1.029e-01 1.080e-01
 7.951e-02 1.582e-01 5.461e-02 7.888e-01 7.975e-01 5.486e+00 9.605e+01
 4.444e-03 1.652e-02 2.269e-02 1.370e-02 1.386e-02 1.698e-03 2.486e+01
 2.658e+01 1.659e+02 1.866e+03 1.193e-01 2.336e-01 2.687e-01 1.789e-01
 2.551e-01 6.589e-02]
True Label: 0 - Predicted: 0

Features: [1.546e+01 1.948e+01 1.017e+02 7.489e+02 1.092e-01 1.223e-01 1.466e-01
 8.087e-02 1.931e-01 5.796e-02 4.743e