<a href="https://colab.research.google.com/github/jpantojaj/Credit_Scoring_Specialization/blob/main/Sesi%C3%B3n_4_MLFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **1. Registro de modelos con MlFlow**

In [1]:
!pip install mlflow --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m25.0/25.0 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.2/128.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.1/106.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.4/84.4 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

In [2]:
import mlflow
from mlflow.models import infer_signature
from mlflow.tracking import MlflowClient
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import warnings

In [3]:
# informacion del modelo
model_name = "modelo_clase"

In [4]:
# preprocesamiento de datos
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# registrar experimento
experiment_info = mlflow.set_experiment(model_name)
experiment_id = int(dict(experiment_info)["experiment_id"])

2024/05/29 00:20:08 INFO mlflow.tracking.fluent: Experiment with name 'modelo_clase' does not exist. Creating a new experiment.


In [6]:
# registro del modelo con mlflow
with mlflow.start_run(experiment_id=experiment_id):
    # estimacion del modelo
    model_rf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
    model_rf.fit(X_train, y_train)

    # parametros del modelo
    params = model_rf.get_params()

    # metricas del modelo
    y_pred_train = model_rf.predict(X_train)
    y_pred_test = model_rf.predict(X_test)
    metricas = {"accuracy_train": accuracy_score(y_train, y_pred_train),
                "accuracy_test": accuracy_score(y_test, y_pred_test)}

    # log parametros del modelo
    mlflow.log_params(params)

    # log metricas
    mlflow.log_metrics(metricas)

    # log firma de input y outpur
    signature = infer_signature(X_train, model_rf.predict(X_train))

    # registro del modelo
    model_info = mlflow.sklearn.log_model(model_rf,
                                          artifact_path=model_name,
                                          registered_model_name=model_name,
                                          signature=signature)

    # informacion del modelo
    print(model_info.model_uri)

runs:/b20534cc399249adbadf5aeb02f92680/modelo_clase


Successfully registered model 'modelo_clase'.
Created version '1' of model 'modelo_clase'.


In [7]:
# ver modelos registrados y versiones
client = MlflowClient()

# lista de modelos registrados
print("Lista de todos los modelo registrados:")
print("=" * 80)
for model in client.search_registered_models():
  print(model)
print()

# lista de modelos y versiones
print(f"Lista de versiones del modelo {model_name}:")
print("=" * 80)
for version in client.search_model_versions(f"name='{model_name}'"):
  print(version)

Lista de todos los modelo registrados:
<RegisteredModel: aliases={}, creation_timestamp=1716942135265, description=None, last_updated_timestamp=1716942135269, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1716942135269, current_stage='None', description=None, last_updated_timestamp=1716942135269, name='modelo_clase', run_id='b20534cc399249adbadf5aeb02f92680', run_link=None, source='file:///content/mlruns/198174227049351734/b20534cc399249adbadf5aeb02f92680/artifacts/modelo_clase', status='READY', status_message=None, tags={}, user_id=None, version=1>], name='modelo_clase', tags={}>

Lista de versiones del modelo modelo_clase:
<ModelVersion: aliases=[], creation_timestamp=1716942135269, current_stage='None', description=None, last_updated_timestamp=1716942135269, name='modelo_clase', run_id='b20534cc399249adbadf5aeb02f92680', run_link=None, source='file:///content/mlruns/198174227049351734/b20534cc399249adbadf5aeb02f92680/artifacts/modelo_clase', status='READY', status_m

## **2. Mostrar información registrada del modelo**

In [8]:
# obtener informacion del modelo registrado
mlflow.search_runs(experiment_ids=[str(experiment_id)])

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy_test,metrics.accuracy_train,params.max_features,params.min_impurity_decrease,...,params.min_samples_leaf,params.warm_start,params.ccp_alpha,params.criterion,params.min_weight_fraction_leaf,tags.mlflow.log-model.history,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.runName
0,b20534cc399249adbadf5aeb02f92680,198174227049351734,FINISHED,file:///content/mlruns/198174227049351734/b205...,2024-05-29 00:22:09.646000+00:00,2024-05-29 00:22:15.274000+00:00,1.0,0.958333,sqrt,0.0,...,1,False,0.0,gini,0.0,"[{""run_id"": ""b20534cc399249adbadf5aeb02f92680""...",LOCAL,/usr/local/lib/python3.10/dist-packages/colab_...,root,beautiful-roo-247


## **3. Carga de modelo registrado para predicciones**

In [9]:
# obtener ultima version del modelo
model_version = client.get_latest_versions(name=model_name)[0].version

# cargar el modelo registrado
loaded_model = mlflow.sklearn.load_model(model_uri=f"models:/{model_name}/{model_version}")

  model_version = client.get_latest_versions(name=model_name)[0].version


In [10]:
# hacer predicciones con modelo registrado
X, y = datasets.load_iris(return_X_y=True)
y_pred = loaded_model.predict(X)
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 2 2 2 2
 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
