In [3]:
import joblib
import pandas as pd
import mlflow
import os
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TargetDriftPreset
from evidently import ColumnMapping
import warnings

warnings.filterwarnings("ignore")

# ✅ Load MLflow model OR local model
use_mlflow = True  # Change to False to use local model

if use_mlflow:
    logged_model = 'runs:/af5a50e2b83d4fbfb59ef85f4b9c8acc/model'  # Use model name instead of a specific run ID
    model = mlflow.pyfunc.load_model(logged_model)
    print("✅ Model loaded from MLflow")
else:
    model_path = 'models/iris_model.pkl'
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"🚨 Model file not found at {model_path}")
    model = joblib.load(model_path)
    print(f"✅ Model loaded from {model_path}")

# ✅ Load Iris Data (Reference: Train, Current: Test)
data_paths = {
    "reference": "data/processed/train_iris.csv",
    "current": "data/processed/test_iris.csv"
}

datasets = {}
for key, path in data_paths.items():
    if not os.path.exists(path):
        raise FileNotFoundError(f"🚨 Data file not found: {path}")
    datasets[key] = pd.read_csv(path)
    print(f"✅ {key.capitalize()} data loaded from {path}")

# ✅ Clean and Prepare Data
target = 'species'
feature_columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

for key in datasets.keys():
    datasets[key]['prediction'] = model.predict(datasets[key][feature_columns])

# ✅ Define Column Mapping for Evidently AI
column_mapping = ColumnMapping()
column_mapping.target = target
column_mapping.prediction = 'prediction'
column_mapping.numerical_features = feature_columns
column_mapping.categorical_features = []

# ✅ Run Evidently Data Drift Report
data_drift_report = Report(metrics=[
    DataDriftPreset(),
    DataQualityPreset(),
    TargetDriftPreset()
])
data_drift_report.run(reference_data=datasets["reference"], current_data=datasets["current"], column_mapping=column_mapping)

# ✅ Save Report
drift_report_path = "reports/iris_drift_report.html"
os.makedirs(os.path.dirname(drift_report_path), exist_ok=True)
data_drift_report.save_html(drift_report_path)
print(f"✅ Data drift report saved at {drift_report_path}")

# ✅ Log Report to MLflow
mlflow.set_experiment("Iris Data Drift Monitoring")

with mlflow.start_run():
    mlflow.log_artifact(drift_report_path)
    print("✅ Drift report logged to MLflow")

print("\n🎯 Iris Data Drift Analysis Complete. Check MLflow UI for logs.")


 - cloudpickle (current: 3.0.0, required: cloudpickle==3.1.1)
 - psutil (current: 5.9.0, required: psutil==7.0.0)
 - scikit-learn (current: 1.5.1, required: scikit-learn==1.6.1)
 - scipy (current: 1.13.1, required: scipy==1.15.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


✅ Model loaded from MLflow
✅ Reference data loaded from data/processed/train_iris.csv
✅ Current data loaded from data/processed/test_iris.csv


2025/03/14 12:35:21 INFO mlflow.tracking.fluent: Experiment with name 'Iris Data Drift Monitoring' does not exist. Creating a new experiment.


✅ Data drift report saved at reports/iris_drift_report.html
✅ Drift report logged to MLflow

🎯 Iris Data Drift Analysis Complete. Check MLflow UI for logs.
