## Configuracion

In [0]:
import yaml
from pathlib import Path
import mlflow
import mlflow.sklearn
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient
from models import FraudRandomForest
from config_loader import Config

paths = Config.get_paths()
params = Config.get_params()

target_col = params['feature_engineering']['target_column']
features_list = params['feature_engineering']['features']

model_params = params['model_params']
model_full_name = f"{paths['catalog']}.{paths['schemas']['feature_store']}.{model_params['model_name']}"

## Extraer data

In [0]:
train_path = f"{paths['catalog']}.{paths['schemas']['feature_store']}.{paths['tables']['features']['train_set']}"
test_path = f"{paths['catalog']}.{paths['schemas']['feature_store']}.{paths['tables']['features']['test_set']}"

X_train = spark.read.table(train_path).select(*features_list).toPandas()
y_train = spark.read.table(train_path).select(target_col).toPandas().values.ravel()

X_test = spark.read.table(test_path).select(*features_list).toPandas()
y_test = spark.read.table(test_path).select(target_col).toPandas().values.ravel()

## Entrenar modelo

In [0]:
%run "./evaluation"

In [0]:
user_name = "matiasadell@hotmail.com"
experiment_path = f"/Users/{user_name}/{paths['mlflow']['experiment_prefix']}"      # path completo en config
mlflow.set_experiment(experiment_path)

with mlflow.start_run(run_name="model") as run:
    # Log de todos los parÃ¡metros definidos en el YAML
    mlflow.log_params(model_params)
    
    # InicializaciÃ³n y entrenamiento usando la clase externa
    fraud_obj = FraudRandomForest(
        n_estimators=model_params['n_estimators'], 
        max_depth=model_params['max_depth']
    )
    fraud_obj.fit(X_train, y_train) 
    
    # Go/No Go
    probs = fraud_obj.predict_proba(X_test)[:, 1]
    is_go, auc, threshold = check_go_no_go(y_test, probs)

    mlflow.log_metric("auc_roc", auc)

    signature = infer_signature(X_train, fraud_obj.predict(X_test))
    
    if is_go:
        print(f"âœ… GO: El AUC ({auc:.4f}) superÃ³ el umbral de {threshold}. Registrando modelo...")
        
        # Logueamos el modelo en el run
        mlflow.sklearn.log_model(
            sk_model=fraud_obj.clf,
            artifact_path="model",
            signature=signature
        )
        
        # Registramos en Unity Catalog (Model Registry)
        model_uri = f"runs:/{run.info.run_id}/model"
        result = mlflow.register_model(model_uri=model_uri, name=model_full_name)

        # Asignamos el alias para que el endpoint de Deploy lo tome
        client = MlflowClient()
        client.set_registered_model_alias(
            name=model_full_name,
            alias=model_params['alias'],
            version=result.version
        )
        print(f"ðŸš€ Modelo {model_full_name} registrado exitosamente (v{result.version}) con alias '{model_params['alias']}'")
        
    else:
        raise Exception(f"NO GO: El AUC ({auc:.4f}) no alcanzÃ³ el umbral de {threshold}.)")
        