# Managing Your Model Registry

## Registering a Model with MLFlow

In [1]:
import mlflow
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Prepare a dataset and train a simple model
X, y = make_classification()
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = RandomForestClassifier().fit(X_train, y_train)

# Setting the MLflow tracking URI and experiment
mlflow.set_tracking_uri('http://127.0.0.1:5000')  # Adjust as necessary
experiment_name = 'MLBook_Experiment4'
mlflow.set_experiment(experiment_name)

# Start an MLflow run and log the model
with mlflow.start_run():
    mlflow.sklearn.log_model(model, "RandomForest")
    run_id = mlflow.active_run().info.run_id

# Register the model in the model registry
model_uri = f"runs:/{run_id}/RandomForest"
mlflow.register_model(model_uri, "RandomForestClassifier")


2024/06/17 16:24:06 INFO mlflow.tracking.fluent: Experiment with name 'MLBook_Experiment4' does not exist. Creating a new experiment.
Successfully registered model 'RandomForestClassifier'.
2024/06/17 16:24:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier, version 1
Created version '1' of model 'RandomForestClassifier'.


<ModelVersion: aliases=[], creation_timestamp=1718637848361, current_stage='None', description='', last_updated_timestamp=1718637848361, name='RandomForestClassifier', run_id='1db3d60f0b934badbfc1704ed35e6a24', run_link='', source='/Users/sebasmos/Desktop/Data-Drift-in-Machine-Learning_book/mlruns/2/1db3d60f0b934badbfc1704ed35e6a24/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', version='1'>

## Updating Model Registry

In [2]:
# Assume drift_detection_function() is a function that detects drift
def drift_detection_function(X_test, y_test):
    return True

if drift_detection_function(X_test, y_test):
    # Retrain the model with new data
    new_model = RandomForestClassifier().fit(X_train, y_train)
    
    # Log and register the new model version
    with mlflow.start_run():
        mlflow.sklearn.log_model(new_model, "RandomForest")
        new_run_id = mlflow.active_run().info.run_id
        model_uri = f"runs:/{new_run_id}/RandomForest"
        mlflow.register_model(model_uri, "RandomForestClassifier")
        # Promote this model to staging or production after validation
        client = mlflow.tracking.MlflowClient()
        run_id = mlflow.active_run().info.run_id
        client.transition_model_version_stage(
            name="RandomForestClassifier",
            version=client.get_latest_versions("RandomForestClassifier")[0].version,
            stage="Production"
        )


Registered model 'RandomForestClassifier' already exists. Creating a new version of this model...
2024/06/17 16:24:09 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier, version 2
Created version '2' of model 'RandomForestClassifier'.
  version=client.get_latest_versions("RandomForestClassifier")[0].version,
  client.transition_model_version_stage(


## Managing Model Metadata and Access

In [3]:
# When logging the model, include additional metadata
with mlflow.start_run() as run:
    mlflow.log_params(model.get_params())
    mlflow.log_metrics({'accuracy': accuracy_score(y_test, model.predict(X_test))})
    mlflow.set_tags({'data_version': 'v1', 'model_type': 'RandomForest'})

    mlflow.sklearn.log_model(model, "my_model", registered_model_name="RandomForestClassifier")

# Set up access control
client.update_model_version(
    name="RandomForestClassifier",
    version=1,
    description="This version is for users in the analytics team."
)

# Specify who can transition models to production
client.update_registered_model(
    name="RandomForestClassifier",
    description="Production models can only be updated by the ML team."
)


Registered model 'RandomForestClassifier' already exists. Creating a new version of this model...
2024/06/17 16:24:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier, version 3
Created version '3' of model 'RandomForestClassifier'.


<RegisteredModel: aliases={}, creation_timestamp=1718637848342, description='Production models can only be updated by the ML team.', last_updated_timestamp=1718637851356, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1718637851334, current_stage='None', description='', last_updated_timestamp=1718637851334, name='RandomForestClassifier', run_id='61835ab485994d6fb5017822863c67e6', run_link='', source='/Users/sebasmos/Desktop/Data-Drift-in-Machine-Learning_book/mlruns/2/61835ab485994d6fb5017822863c67e6/artifacts/my_model', status='READY', status_message='', tags={}, user_id='', version='3'>,
 <ModelVersion: aliases=[], creation_timestamp=1718637849962, current_stage='Production', description='', last_updated_timestamp=1718637849977, name='RandomForestClassifier', run_id='0012ed58f8064c6b941b47921e6a3245', run_link='', source='/Users/sebasmos/Desktop/Data-Drift-in-Machine-Learning_book/mlruns/2/0012ed58f8064c6b941b47921e6a3245/artifacts/RandomForest', status='READY', statu