# MLflow Multiple Model Ensemble Tutorial

This is a minimal notebook demonstrating how to create and log multiple model ensembles in MLflow.

In [None]:
# Import common libraries
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.pyfunc import PythonModel

In [None]:
# Load data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train individual models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

lr_model = LogisticRegression(random_state=42, max_iter=200)
lr_model.fit(X_train, y_train)

In [None]:
# Define a custom ensemble model
class EnsembleModel(PythonModel):
    def __init__(self, models, weights=None):
        self.models = models
        self.weights = weights if weights is not None else [1 / len(models)] * len(models)

    def predict(self, context, model_input):
        # Get predictions from each model
        predictions = []
        for model in self.models:
            pred_proba = model.predict_proba(model_input)
            predictions.append(pred_proba)

        # Weighted average of predictions
        weighted_preds = np.zeros_like(predictions[0])
        for i, pred in enumerate(predictions):
            weighted_preds += pred * self.weights[i]

        # Return class with highest probability
        return np.argmax(weighted_preds, axis=1)

In [None]:
# Create the ensemble
ensemble = EnsembleModel(models=[rf_model, lr_model], weights=[0.7, 0.3])

# Log the ensemble model
with mlflow.start_run():
    mlflow.pyfunc.log_model(name="ensemble_model", python_model=ensemble)