## Tracking MLFlow

In [2]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 500,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)


In [4]:
# Set our tracking server uri for logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Quickstart")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2024/11/22 15:24:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 4
Created version '4' of model 'tracking-quickstart'.
2024/11/22 15:24:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run efficient-turtle-368 at: http://127.0.0.1:5000/#/experiments/767523613443453729/runs/5961b9381632465dafe145c0c0c40b20.
2024/11/22 15:24:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/767523613443453729.


In [5]:
# Load the model back for predictions as a generic Python Function model
model_uri = 'mlflow-artifacts:/767523613443453729/e1f860a07d84445ea97b80939a57fab7/artifacts/iris_model'
loaded_model = mlflow.pyfunc.load_model(model_uri=model_uri)

predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1


In [None]:
break

# Chargement d'une run spécifique d'une expérience réalisée sur MLFlow

## Liste des runs dans une expérience spécifique

In [20]:
from mlflow.tracking import MlflowClient

# Initialiser le client MLflow
client = MlflowClient()

# Nom de l'expérience
experiment_name = "MLflow Quickstart"

# Étape 1 : Récupérer l'expérience par son nom
experiment = client.get_experiment_by_name(experiment_name)
if experiment is None:
    raise ValueError(f"Aucune expérience trouvée avec le nom '{experiment_name}'")
experiment_id = experiment.experiment_id
print(f"ID de l'expérience : {experiment_id}")

# Étape 2 : Lister les runs de l'expérience
runs = client.search_runs(
    experiment_ids=[experiment_id],
    filter_string="",  # Aucun filtre (récupérer toutes les runs)
    order_by=["start_time DESC"],  # Trier par la plus récente
    max_results=10  # Limiter à 10 résultats
)
print(f"{len(runs)} runs trouvées dans l'expérience '{experiment_name}'.")

# Afficher les informations des runs trouvées
for i, run in enumerate(runs):
    print(f"Run {i + 1}: ID={run.info.run_id}, Start Time={run.info.start_time}, Tags={run.data.tags}")

ID de l'expérience : 767523613443453729
3 runs trouvées dans l'expérience 'MLflow Quickstart'.
Run 1: ID=1e3ed0bef25a429d9863777008ba11d1, Start Time=1732193496195, Tags={'mlflow.user': 'photoli93', 'mlflow.runName': 'wistful-bear-622', 'mlflow.source.name': '/opt/anaconda3/envs/ocr_p7/lib/python3.8/site-packages/ipykernel_launcher.py', 'mlflow.log-model.history': '[{"run_id": "1e3ed0bef25a429d9863777008ba11d1", "artifact_path": "iris_model", "utc_time_created": "2024-11-21 12:51:36.241990", "model_uuid": "8019383ca1e44fb296188727af386d6a", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.8.20", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.3.2", "serialization_format": "cloudpickle", "code": null}}}]', 'mlflow.source.type': 'LOCAL', 'Training Info': 'Basic LR model for iris data'}
Run 2: ID=e1f860a07d84445ea97b80

## Sélectionner une run spécifique

In [70]:
# Étape 3 : Sélectionner une run spécifique
# Exemple : Choisir la première run (ou en filtrer une par tag ou ID)
selected_run = runs[2]
run_id = selected_run.info.run_id
run_name = selected_run.info.run_name
print(f"Run sélectionnée : ID={run_id}")
print(f"Run sélectionnée : Name={run_name}")

Run sélectionnée : ID=6403a55bd6ab445e9d4a596d07bf5964
Run sélectionnée : Name=abundant-mule-155


## Charger le modèle logué

In [71]:
# Étape 4 : Charger un artefact ou un modèle logué dans cette run
# Exemple : Charger un modèle logué
artifact_path = 'iris_model'
model_uri = f"runs:/{run_id}/{artifact_path}"
print(f"URI du modèle : {model_uri}")

# Charger le modèle
loaded_model = mlflow.pyfunc.load_model(model_uri)

URI du modèle : runs:/6403a55bd6ab445e9d4a596d07bf5964/iris_model


In [72]:
predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1
