In [1]:
import os
from dotenv import load_dotenv
import pandas as pd
from typing import Dict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
from mlflow.tracking.client import MlflowClient
from mlflow_utils import create_dataset

load_dotenv()

True

In [2]:
def get_classification_metrics(
    y_true: pd.Series, y_pred: pd.Series, prefix: str) -> Dict[str, float]:
    """
    Get the classification metrics.

    :param y_true: The true target values.
    :param y_pred: The predicted target values.
    :param prefix: The prefix of the metric names.
    :return: The classification metrics.
    """

    return {
        f"{prefix}_accuracy": accuracy_score(y_true=y_true, y_pred=y_pred),
        f"{prefix}_precision": precision_score(y_true=y_true, y_pred=y_pred),
        f"{prefix}_recall": recall_score(y_true=y_true, y_pred=y_pred),
        f"{prefix}_f1": f1_score(y_true=y_true, y_pred=y_pred),
    }

In [3]:
# conectar con mlflow y minio (por http)
mlflow.set_tracking_uri("http://127.0.0.1:5000")

os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://127.0.0.1:9000"
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('ACCESS_KEY')

# Inference

In [4]:
model_name = "r_model3"

In [5]:
df = create_dataset()

x_train, x_test, y_train, y_test = train_test_split(
    df.drop("target", axis=1),
    df["target"],
    test_size=0.2,
    random_state=42,
)

numerical_features = [f for f in x_train.columns if f.startswith("feature")]

In [6]:
client = MlflowClient()

# Obtener todas las versiones del modelo registrado
registered_models = client.get_registered_model(model_name).latest_versions
registered_models

[<ModelVersion: aliases=[], creation_timestamp=1718841769510, current_stage='None', description='', last_updated_timestamp=1718841769510, name='r_model3', run_id='089d26dde9b245fe964a7eb91ca874ea', run_link='', source='s3://mlflow/0/089d26dde9b245fe964a7eb91ca874ea/artifacts/random_forest_classifier2', status='READY', status_message='', tags={}, user_id='', version='1'>]

In [7]:
# Filtrar la última versión
latest_version = max(registered_models, key=lambda x: int(x.version))
latest_version_number = latest_version.version
latest_version_number

'1'

In [8]:
model_uri = f"models:/{model_name}/{latest_version_number}"
model = mlflow.sklearn.load_model(model_uri=model_uri)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 9/9 [00:00<00:00, 259.68it/s]  


In [9]:
model

In [10]:
model.predict

<bound method Pipeline.predict of Pipeline(steps=[('preprocessing',
                 ColumnTransformer(transformers=[('numerical',
                                                  SimpleImputer(strategy='median'),
                                                  ['feature_0', 'feature_1',
                                                   'feature_2', 'feature_3',
                                                   'feature_4', 'feature_5',
                                                   'feature_6', 'feature_7',
                                                   'feature_8', 'feature_9',
                                                   'feature_10', 'feature_11',
                                                   'feature_12', 'feature_13',
                                                   'feature_14', 'feature_15',
                                                   'feature_16', 'feature_17',
                                                   'feature_18', 'feature_19',
     

In [11]:
# Usar el modelo para predicciones
y_pred = model.predict(x_test)
new_data = pd.DataFrame({"target": y_test, "predictions": y_pred})
new_data.head()

Unnamed: 0,target,predictions
6252,1,1
4684,1,1
1731,0,0
4742,1,1
4521,1,1


In [12]:
metrics = get_classification_metrics(y_true=y_test, y_pred=y_pred, prefix="test")
metrics

{'test_accuracy': 0.902,
 'test_precision': 0.9278996865203761,
 'test_recall': 0.8748768472906404,
 'test_f1': 0.9006085192697769}

In [None]:
mlflow.end_run()

In [13]:
print('ok_')

ok_
