In [1]:
# aplicacao.py

import mlflow
import pandas as pd
from sklearn.metrics import log_loss, f1_score

def pipeline_aplicacao():

  
    # 1. Carrega dados de produção
    df = catalog.load("dataset_kobe_prod")
    df_clean = (
        df
        .dropna(subset=[
            "lat", "long", "minutes_remaining", 
            "period", "playoffs", "shot_distance", 
            "shot_made_flag"
        ])
        .loc[:, [
            "lat", "long", "minutes_remaining", 
            "period", "playoffs", "shot_distance", 
            "shot_made_flag"
        ]]
    )

    # 2. Carrega modelo
    model = catalog.load("modelo_final")

    # 3. Inicia MLflow Run
    with mlflow.start_run(run_name="PipelineAplicacao"):
        # 4. Gera predições
        # pyfunc espera um DataFrame só com features
        X = df_clean.drop(columns="shot_made_flag")
        y_true = df_clean["shot_made_flag"].values

        y_proba = model.predict_proba(X)[:, 1]
        y_pred = (y_proba >= 0.5).astype(int)

        # 5. Calcula métricas
        ll = log_loss(y_true, y_proba)
        f1 = f1_score(y_true, y_pred)
        
        mlflow.log_metric("logloss_prod", ll)
        mlflow.log_metric("f1_prod", f1)

        # 6. Salva tabela de resultados
        df_out = df_clean.copy()
        df_out["y_proba"] = y_proba
        df_out["y_pred"] = y_pred

        
        mlflow.log_text(df_out.to_csv(index=False), "resultado_pipeline.csv")

    print(f"Pipeline concluído. logloss={ll:.4f}, f1={f1:.4f}")
    return df_out

pipeline_aplicacao()
