## Using Parameters to wrap multiple models in a single MLflow model.

In [None]:
from mlflow_for_ml_dev.experiments.exp_utils import get_or_create_experiment
from mlflow_for_ml_dev.utils.utils import get_root_project

from mlflow_for_ml_dev.experiments.custom_models import MultiModel

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models.signature import infer_signature

from mlflow.models.signature import ModelSignature
from mlflow.models.signature import Schema
from mlflow.types.schema import ColSpec
from mlflow.types.schema import TensorSpec
from mlflow.types.schema import ParamSpec
from mlflow.types.schema import ParamSchema
import numpy as np

In [None]:
# create experiment
experiment_name = "wrapping_multiple_models"
experiment = get_or_create_experiment(
    experiment_name = experiment_name,
    tags = {
        "proejct_name": "multiple_models",
        "topic":"pyfunc_flavor",
        "mlflow.note.content": "This experiment is to show how to wrap multiple models in a single model"
    }
)

# Load Iris Dataset

In [None]:
iris = load_iris(as_frame=True)

X = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Defining signature of the model

We can build the model signature using the corresponding classes from mlflow. 

In [None]:
# defining input schema. In this case, we are using all the features as input
input_schema = Schema([ColSpec(name=feature_name, type="float") for feature_name in x_train.columns])

# defining output schema. In this case, we are using the target as output
output_schema = Schema([TensorSpec(name="model_id", shape=(-1,), type=np.dtype(np.int32))]) 

# defining param schema. In this case, we are using the model_id as a parameter
param_schema = ParamSchema(params = [ParamSpec(name="model_id", dtype="string", default="rfc")])

# defining the model signature
model_signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=param_schema)
print(model_signature)

We can infer the schema from the input and outputs of the model.

In [None]:
models = {"rfc":RandomForestClassifier(),"gbc": GradientBoostingClassifier()}

model = MultiModel(models)

# fitting the models
model.fit_estimators(x_train, y_train)

# getting the model predictions (output)
output = model.predict(None, x_test,params={"model_id":"rfc"})

# defining the model signature
model_signature = infer_signature(model_input=x_train,model_output= output, params={"model_id":"rfc"})

print(model_signature)

### Training and Logging the model with Signature

In [None]:
models = {"rfc":RandomForestClassifier(),"gbc": GradientBoostingClassifier()}
multi_model = MultiModel(models=models)

project_dir = get_root_project()
code_path = project_dir / "mlflow_for_ml_dev/experiments/custom_models.py"
# start mlflow run 
with mlflow.start_run(run_name="multi_model", experiment_id=experiment.experiment_id) as run:
    multi_model.fit_estimators(x_train, y_train)
    mlflow.pyfunc.log_model(artifact_path="multi_model", python_model=multi_model, signature=model_signature, code_path=[code_path])

## Scoring model

Now we can use the same mlflow model to score both the Random Forest Classifier and Gradient Boost Classifier

In [None]:
loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run.info.run_id}/multi_model")

In [None]:
loaded_model.predict(x_test, params={"model_id":"rfc"})

In [None]:
loaded_model.predict(x_test, params={"model_id":"gbc"})

: 