In [2]:
from datetime import datetime
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import (
    GridSearchCV
)
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import mlflow
from mlflow.models import infer_signature

%env AWS_ACCESS_KEY_ID=root  
%env AWS_SECRET_ACCESS_KEY=12345678
%env MLFLOW_S3_ENDPOINT_URL=http://localhost:9010

env: AWS_ACCESS_KEY_ID=root
env: AWS_SECRET_ACCESS_KEY=12345678
env: MLFLOW_S3_ENDPOINT_URL=http://localhost:9010


Configuramos el experimento con MLflow

In [3]:
MLFLOW_TRACKING_URI = "http://localhost:5000"
MLFLOW_EXPERIMENT_NAME = "machine_failure_prediction_svc"

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
experiment = mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME)

if experiment is None:
    experiment_id = mlflow.create_experiment(MLFLOW_EXPERIMENT_NAME)
else:
    experiment_id = experiment.experiment_id
print(f"Using experiment name {MLFLOW_EXPERIMENT_NAME} ID: {experiment_id}")

Using experiment name machine_failure_prediction_svc ID: 1


Cargamos el dataset balanceado y dividido en train y test.

In [6]:
dataset_train = pd.read_csv('./data_train_balanced.csv')
dataset_test = pd.read_csv('./data_test.csv')

X_train = dataset_train.drop(columns=['target'])
y_train = dataset_train['target']

X_test = dataset_test.drop(columns=['target'])
y_test = dataset_test['target']

dataset_test.head()

Unnamed: 0,airtemperature_k,process_temperature_k,rotational_speed_rpm,torque_nm,tool_wear_min,type_l,type_m,target
0,300.5,309.8,1345,62.7,153,True,False,0
1,303.7,312.4,1513,40.1,135,True,False,0
2,302.5,311.4,1559,37.6,209,True,False,0
3,295.6,306.3,1509,35.8,60,False,False,0
4,300.5,310.0,1358,60.4,102,False,False,0


Creamos el pipeline con los hiperparámetros que queremos probar.

In [7]:
X_train['type_l'] = X_train['type_l'].astype('category')
X_train['type_m'] = X_train['type_m'].astype('category')

X_test['type_l'] = X_test['type_l'].astype('category')
X_test['type_m'] = X_test['type_m'].astype('category')

numeric_features = X_train.select_dtypes(exclude=['category']).columns
categorical_features = X_train.select_dtypes(include=['category']).columns


preprocessor_svm = ColumnTransformer(
    transformers=[("num", StandardScaler(), numeric_features),
                  ("cat", "passthrough", categorical_features)]
)

# Build pipeline: preprocessing + classifier
model_svm = Pipeline(steps=[
    ("preprocessor", preprocessor_svm),
    ("classifier", SVC())
])

Busqueda de hiperparametros con GridSearchCV

In [8]:
run_name_parent = \
    "best_hyperparams_" + datetime.today().strftime('%Y/%m/%d-%H:%M:%S')

with mlflow.start_run(
    experiment_id=experiment_id, run_name=run_name_parent, nested=True):

    param_grid = {
        'classifier__C': [0.01, 0.11, 10],
        'classifier__kernel': ['linear']
    }
    grid_search = GridSearchCV(model_svm, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    mlflow.log_params(grid_search.best_params_)
    mlflow.log_metric("best_train_accuracy", grid_search.best_score_)

    mlflow.set_tags(
        tags={
            "project": "Predictive Maintenance",
            "optimizer_engine": "GridSearchCV",
            "model_family": "sklearn",
            "feature_set_version": 1,
        }
    )

    # Hacemos fit con los mejores parametros sobre el dataset completo
    model_svm = Pipeline(steps=[
        ("preprocessor", preprocessor_svm),
        ("classifier", SVC(C=grid_search.best_params_['classifier__C'],
                            kernel=grid_search.best_params_['classifier__kernel']))
    ])

    test_score = model_svm.fit(X_train, y_train).score(X_test, y_test)
    mlflow.log_metric("best_test_accuracy", test_score)

    print("Best Accuracy train :", grid_search.best_score_)
    print("Best accuracy test:", test_score)
    print("Best params:", grid_search.best_params_)

    # guardamos el modelo como artefacto
    # Guardamos el artefacto del modelo
    artifact_path = "model"

    signature = infer_signature(X_train, model_svm.predict(X_train))

    mlflow.sklearn.log_model(
        sk_model=model_svm,
        artifact_path=artifact_path,
        signature=signature,
        serialization_format='cloudpickle',
        registered_model_name="machine_failure_svm",
        metadata={"model_data_version": 1}
    )

    # Obtenemos la ubicación del modelo guardado en MLFlow
    model_uri = mlflow.get_artifact_uri(artifact_path)
    print(f"Model artifact saved in: {model_uri}")

Best Accuracy train : 0.8388538188112801
Best accuracy test: 0.849
Best params: {'classifier__C': 10, 'classifier__kernel': 'linear'}


Successfully registered model 'machine_failure_svm'.
2025/10/14 12:48:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: machine_failure_svm, version 1
Successfully registered model 'machine_failure_svm'.
2025/10/14 12:48:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: machine_failure_svm, version 1
Created version '1' of model 'machine_failure_svm'.
Created version '1' of model 'machine_failure_svm'.


Model artifact saved in: s3://mlflow/1/b8e46fff288849f6a80f5f0a7fb2532f/artifacts/model
🏃 View run best_hyperparams_2025/10/14-12:47:25 at: http://localhost:5000/#/experiments/1/runs/b8e46fff288849f6a80f5f0a7fb2532f
🧪 View experiment at: http://localhost:5000/#/experiments/1


Registramos el modelo `champion` que usaremos en produccion

In [20]:
def save_model_with_alias(
    model, 
    model_name,
    alias,
    run_context=None,
    artifact_path="model",
    description=None,
    tags=None,
    metadata=None
):
    """
    Save a model to MLflow registry and set an alias for easy retrieval
    
    Parameters:
    - model: The trained model to save
    - model_name: Name for the registered model
    - alias: Alias to set (e.g., 'champion', 'challenger', 'production')
    - run_context: MLflow run context (if saving within a run)
    - artifact_path: Path where model artifacts are stored
    - description: Description for the registered model
    - tags: Dictionary of tags to add to the model version
    - metadata: Dictionary of metadata for the model
    
    Returns:
    - model_version: The created model version object
    """
    
    client = mlflow.MlflowClient()
    
    # Step 1: Ensure the registered model exists
    try:
        registered_model = client.get_registered_model(model_name)
        print(f"✅ Found existing registered model: {model_name}")
    except Exception:
        print(f"📝 Creating new registered model: {model_name}")
        client.create_registered_model(
            name=model_name,
            description=description or f"Machine learning model: {model_name}"
        )
    
    # Step 2: Log the model (within or outside a run)
    if run_context or mlflow.active_run():
        # We're inside an MLflow run
        print(f"💾 Logging model within active run...")
        
        # Create signature if we have training data
        signature = None
        if 'X_train' in globals():
            signature = infer_signature(X_train, model.predict(X_train))
        
        model_info = mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path=artifact_path,
            signature=signature,
            registered_model_name=model_name,
            metadata=metadata or {}
        )
        
        model_uri = model_info.model_uri
        run_id = mlflow.active_run().info.run_id
        
    else:
        # We need to create a run
        print(f"📦 Creating new run to save model...")
        with mlflow.start_run() as run:
            # Create signature if we have training data
            signature = None
            if 'X_train' in globals():
                signature = infer_signature(X_train, model.predict(X_train))
            
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path=artifact_path,
                signature=signature,
                registered_model_name=model_name,
                metadata=metadata or {}
            )
            
            model_uri = model_info.model_uri
            run_id = run.info.run_id
    
    # Step 3: Get the model version that was just created
    versions = client.search_model_versions(f"name='{model_name}'")
    latest_version = max(versions, key=lambda x: int(x.version))
    
    # Step 4: Add tags if provided
    if tags:
        print(f"🏷️  Adding tags to model version {latest_version.version}...")
        for key, value in tags.items():
            client.set_model_version_tag(
                name=model_name,
                version=latest_version.version,
                key=key,
                value=str(value)
            )
    
    # Step 5: Set the alias
    print(f"🎯 Setting alias '{alias}' for model version {latest_version.version}...")
    client.set_registered_model_alias(
        name=model_name,
        alias=alias,
        version=latest_version.version
    )
    
    print(f"✅ Model saved successfully!")
    print(f"   - Model Name: {model_name}")
    print(f"   - Version: {latest_version.version}")
    print(f"   - Alias: {alias}")
    print(f"   - URI: {model_uri}")
    print(f"   - Load with: mlflow.sklearn.load_model('models:/{model_name}@{alias}')")
    
    return latest_version


def load_model_by_alias(model_name, alias="champion"):
    """
    Load model from MLflow registry using model name and alias
    """
    try:
        # Format: models:/model_name@alias
        model_uri = f"models:/{model_name}@{alias}"
        print(f"Loading model from URI: {model_uri}")
        
        model = mlflow.sklearn.load_model(model_uri)
        print(f"✅ Successfully loaded model {model_name} with alias '{alias}'")
        return model
    
    except Exception as e:
        print(f"❌ Error loading model {model_name} with alias '{alias}': {e}")
        return None

In [19]:
model_version = save_model_with_alias(
    model=model_svm,
    model_name="svm_prod", 
    alias="champion",
    tags={"accuracy": 0.95, "version": "v2.0"},
    metadata={"features": ["f1", "f2", "f3"]}
)
print(model_version)

✅ Found existing registered model: svm_prod
📦 Creating new run to save model...


Registered model 'svm_prod' already exists. Creating a new version of this model...
2025/10/14 13:10:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: svm_prod, version 2
Registered model 'svm_prod' already exists. Creating a new version of this model...
2025/10/14 13:10:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: svm_prod, version 2
Created version '2' of model 'svm_prod'.
Created version '2' of model 'svm_prod'.


🏃 View run shivering-moth-340 at: http://localhost:5000/#/experiments/0/runs/08374961c297447484cc2c76d0a902e9
🧪 View experiment at: http://localhost:5000/#/experiments/0
🏷️  Adding tags to model version 2...
🎯 Setting alias 'champion' for model version 2...
✅ Model saved successfully!
   - Model Name: svm_prod
   - Version: 2
   - Alias: champion
   - URI: models:/m-7a6354995a4746de88fdd44e6afc640e
   - Load with: mlflow.sklearn.load_model('models:/svm_prod@champion')
<ModelVersion: aliases=[], creation_timestamp=1760458255630, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1760458255630, metrics=None, model_id=None, name='svm_prod', params=None, run_id='08374961c297447484cc2c76d0a902e9', run_link='', source='models:/m-7a6354995a4746de88fdd44e6afc640e', status='READY', 

Probamos levantar el modelo champion desde mlflow

In [22]:
loaded_model = load_model_by_alias(model_name='svm_prod', alias='champion')
print(loaded_model)

Loading model from URI: models:/svm_prod@champion


Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 2910.29it/s] 

✅ Successfully loaded model svm_prod with alias 'champion'
Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  Index(['airtemperature_k', 'process_temperature_k', 'rotational_speed_rpm',
       'torque_nm', 'tool_wear_min'],
      dtype='object')),
                                                 ('cat', 'passthrough',
                                                  Index(['type_l', 'type_m'], dtype='object'))])),
                ('classifier', SVC(C=10, kernel='linear'))])



