# Random Forest com MLflow: Registro e Comparação de Modelos em Classificação de Crédito

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
#from sklearn.metrics import *
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, log_loss, ConfusionMatrixDisplay, RocCurveDisplay
import matplotlib.pyplot as plt

import mlflow
import mlflow.sklearn

In [3]:
credito = pd.read_csv('Credit.csv')
credito.shape

(1000, 21)

In [5]:
credito.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6,'critical/other existing credit',radio/tv,1169,'no known savings',>=7,4,'male single',none,...,'real estate',67,none,own,2,skilled,1,yes,yes,good
1,0<=X<200,48,'existing paid',radio/tv,5951,<100,1<=X<4,2,'female div/dep/mar',none,...,'real estate',22,none,own,1,skilled,1,none,yes,bad
2,'no checking',12,'critical/other existing credit',education,2096,<100,4<=X<7,2,'male single',none,...,'real estate',49,none,own,1,'unskilled resident',2,none,yes,good
3,<0,42,'existing paid',furniture/equipment,7882,<100,4<=X<7,2,'male single',guarantor,...,'life insurance',45,none,'for free',1,skilled,2,none,yes,good
4,<0,24,'delayed previously','new car',4870,<100,1<=X<4,3,'male single',none,...,'no known property',53,none,'for free',2,skilled,2,none,yes,bad


In [7]:
for col in credito.columns:
    if credito[col].dtype == 'object':
        credito[col] = credito[col].astype('category').cat.codes

In [9]:
credito.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,2,6,1,7,1169,0,3,4,3,2,...,2,67,1,1,2,3,1,1,1,1
1,1,48,3,7,5951,3,0,2,0,2,...,2,22,1,1,1,3,1,0,1,0
2,0,12,1,4,2096,3,1,2,3,2,...,2,49,1,1,1,2,2,0,1,1
3,2,42,3,5,7882,3,1,2,3,1,...,0,45,1,0,1,3,2,0,1,1
4,2,24,2,1,4870,3,0,3,3,2,...,1,53,1,0,2,3,2,0,1,0


In [11]:
previsores = credito.iloc[:,0:20].values
classe = credito.iloc[:,20].values

In [13]:
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores,classe,
                                        test_size=0.3,random_state=123)

In [15]:
resultados = []

def treina_rf(n_estimators, max_depth, min_samples_split):
    mlflow.set_experiment("RFexperimento")
    with mlflow.start_run():
        modelorf = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            random_state=42
        )
        modelorf.fit(X_treinamento, y_treinamento)
        previsoes = modelorf.predict(X_teste)

        # Log de hiperparâmetros
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("min_samples_split", min_samples_split)

        # Métricas
        acuracia = accuracy_score(y_teste, previsoes)
        recall = recall_score(y_teste, previsoes)
        precision = precision_score(y_teste, previsoes)
        f1 = f1_score(y_teste, previsoes)
        auc = roc_auc_score(y_teste, previsoes)
        log = log_loss(y_teste, previsoes)

        mlflow.log_metric("acuracia", acuracia)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("f1", f1)
        mlflow.log_metric("auc", auc)
        mlflow.log_metric("log", log)

        # Gráficos
        ConfusionMatrixDisplay.from_estimator(modelorf, X_teste, y_teste)
        plt.savefig("confusionrf.png")
        plt.close()

        RocCurveDisplay.from_estimator(modelorf, X_teste, y_teste)
        plt.savefig("rocfr.png")
        plt.close()

        mlflow.log_artifact("confusionrf.png")
        mlflow.log_artifact("rocfr.png")

        mlflow.sklearn.log_model(modelorf, "ModeloRF")

        run_id = mlflow.active_run().info.run_uuid
        print(f"Modelo treinado com n_estimators={n_estimators}, max_depth={max_depth}, min_samples_split={min_samples_split}")
        print("Run ID:", run_id)

    mlflow.end_run()

    # Salvar os resultados para análise posterior
    resultados.append({
        "n_estimators": n_estimators,
        "max_depth": max_depth,
        "min_samples_split": min_samples_split,
        "acuracia": acuracia,
        "recall": recall,
        "precision": precision,
        "f1": f1,
        "auc": auc,
        "log_loss": log,
        "run_id": run_id
    })

In [17]:
# ====== execução dos testes ======

arvores = [50, 100, 500, 750, 1000]
profundidades = [None, 10, 20]
min_samples = [2, 5, 10]

for n_estimators in arvores:
    for max_depth in profundidades:
        for min_samples_split in min_samples:
            treina_rf(n_estimators, max_depth, min_samples_split)



2025/04/22 09:11:39 INFO mlflow.tracking.fluent: Experiment with name 'RFexperimento' does not exist. Creating a new experiment.


Modelo treinado com n_estimators=50, max_depth=None, min_samples_split=2
Run ID: 2ddbdbddb24a4fcfb6a6ba2fa6858b86




Modelo treinado com n_estimators=50, max_depth=None, min_samples_split=5
Run ID: 45ef29432b764a6fab371006d518fd67




Modelo treinado com n_estimators=50, max_depth=None, min_samples_split=10
Run ID: 970fce7f17644688a28db527aa07b65c




Modelo treinado com n_estimators=50, max_depth=10, min_samples_split=2
Run ID: ac2868e6da7941e7a0a555707746f0e1




Modelo treinado com n_estimators=50, max_depth=10, min_samples_split=5
Run ID: 5dd857d8962347e783e96bfade05b81b




Modelo treinado com n_estimators=50, max_depth=10, min_samples_split=10
Run ID: 18142a75356243f292b2ea52e7ef0ab1




Modelo treinado com n_estimators=50, max_depth=20, min_samples_split=2
Run ID: 8c4cdbc6c28b4fcb83c545bd227015da




Modelo treinado com n_estimators=50, max_depth=20, min_samples_split=5
Run ID: 86822e22e31847d5b0e3314459d6a892




Modelo treinado com n_estimators=50, max_depth=20, min_samples_split=10
Run ID: 250a550e6a7542a7805e41440b59e439




Modelo treinado com n_estimators=100, max_depth=None, min_samples_split=2
Run ID: 59b8f0e24a3d474bb40797557a8f57dd




Modelo treinado com n_estimators=100, max_depth=None, min_samples_split=5
Run ID: a1a363ce68ba4ebc83b609269a657a8c




Modelo treinado com n_estimators=100, max_depth=None, min_samples_split=10
Run ID: d9a49fb3e52a40f5baa52dae451ee97a




Modelo treinado com n_estimators=100, max_depth=10, min_samples_split=2
Run ID: da6c2aa6f12148aa8fcf2d3161cc92c2




Modelo treinado com n_estimators=100, max_depth=10, min_samples_split=5
Run ID: 6075604463c84f70ac0bc1d068739c45




Modelo treinado com n_estimators=100, max_depth=10, min_samples_split=10
Run ID: fc94899796be4befa7b2ed5d68252c20




Modelo treinado com n_estimators=100, max_depth=20, min_samples_split=2
Run ID: 205443ec0ad94b3da22be266cf1be2e2




Modelo treinado com n_estimators=100, max_depth=20, min_samples_split=5
Run ID: 6708ca72d4bf44d5b68f956e6d231d65




Modelo treinado com n_estimators=100, max_depth=20, min_samples_split=10
Run ID: 0220c05f5fb6446ea9fe848fe2632c9b




Modelo treinado com n_estimators=500, max_depth=None, min_samples_split=2
Run ID: 2e1f9692caf44b4fb5430bd68afb4493




Modelo treinado com n_estimators=500, max_depth=None, min_samples_split=5
Run ID: 18ef493bda0f4124bd3aa7082072b812




Modelo treinado com n_estimators=500, max_depth=None, min_samples_split=10
Run ID: e63fe3b4de67410c9c8ad825be8a31f5




Modelo treinado com n_estimators=500, max_depth=10, min_samples_split=2
Run ID: ef2229113c30487298073c216b28db2c




Modelo treinado com n_estimators=500, max_depth=10, min_samples_split=5
Run ID: f0c866a7193d4e248a8216c8e4f8c7ca




Modelo treinado com n_estimators=500, max_depth=10, min_samples_split=10
Run ID: cecc020425324bd296d2df6adf0f9d01




Modelo treinado com n_estimators=500, max_depth=20, min_samples_split=2
Run ID: e63f9b34624a4f8695cac8ecdd7eb647




Modelo treinado com n_estimators=500, max_depth=20, min_samples_split=5
Run ID: b7eef5a7b54b4587bfca3f82142942d2




Modelo treinado com n_estimators=500, max_depth=20, min_samples_split=10
Run ID: 093bea0c6e484c2997470e61da5a9e6e




Modelo treinado com n_estimators=750, max_depth=None, min_samples_split=2
Run ID: 5c24c2ca78f246ed8421cfff422c89a3




Modelo treinado com n_estimators=750, max_depth=None, min_samples_split=5
Run ID: 22b57a4b96544f29897941e2c8492de2




Modelo treinado com n_estimators=750, max_depth=None, min_samples_split=10
Run ID: a86bc8d6201f45feb6d154d5d15b06f7




Modelo treinado com n_estimators=750, max_depth=10, min_samples_split=2
Run ID: a6f349cc8eda45669e4fd0406e9c2654




Modelo treinado com n_estimators=750, max_depth=10, min_samples_split=5
Run ID: 27856835f09f43939c38b96713c53704




Modelo treinado com n_estimators=750, max_depth=10, min_samples_split=10
Run ID: d5a32cc47e4444218b33a6e30fae98c9




Modelo treinado com n_estimators=750, max_depth=20, min_samples_split=2
Run ID: 851806829d114626b023eebef4edf16a




Modelo treinado com n_estimators=750, max_depth=20, min_samples_split=5
Run ID: a9899340561d47ef805a1c341a6ad2f1




Modelo treinado com n_estimators=750, max_depth=20, min_samples_split=10
Run ID: b63fcf6742404697883cfd7378b4b310




Modelo treinado com n_estimators=1000, max_depth=None, min_samples_split=2
Run ID: 1a7ebf442fc649628c52b59c2566e4b2




Modelo treinado com n_estimators=1000, max_depth=None, min_samples_split=5
Run ID: 4239f85e4b704493a4baefac80d7f0f5




Modelo treinado com n_estimators=1000, max_depth=None, min_samples_split=10
Run ID: 60a8a2229b5b43cd9a7806a40ee3230a




Modelo treinado com n_estimators=1000, max_depth=10, min_samples_split=2
Run ID: 6ccd5b88f6c940c1ba5703185092d0d2




Modelo treinado com n_estimators=1000, max_depth=10, min_samples_split=5
Run ID: af33b6c20de44a8a98f433ea2c794e45




Modelo treinado com n_estimators=1000, max_depth=10, min_samples_split=10
Run ID: 11ca710a69f84a509d12a5be34cb545b




Modelo treinado com n_estimators=1000, max_depth=20, min_samples_split=2
Run ID: f3f81d8e4da841049d2ddbc6adeb699e




Modelo treinado com n_estimators=1000, max_depth=20, min_samples_split=5
Run ID: 8988c54047004aae8854b5c0bc81f095




Modelo treinado com n_estimators=1000, max_depth=20, min_samples_split=10
Run ID: 94eac843769a4aaabdc3361b0b7b8e04


In [21]:
# ====== Selecionar o melhor modelo e registrar ======
from mlflow import register_model

df_resultados = pd.DataFrame(resultados)

melhor_metrica = "acuracia"
melhor_modelo = df_resultados.sort_values(by=melhor_metrica, ascending=False).iloc[0]
melhor_run_id = melhor_modelo["run_id"]

print("\n📊 Top 5 modelos por acurácia:")
print(df_resultados.sort_values(by=melhor_metrica, ascending=False).head())

print(f"\n✅ Melhor modelo com base em {melhor_metrica}:")
print(melhor_modelo)

# Registrar o melhor modelo no Model Registry
model_uri = f"runs:/{melhor_run_id}/ModeloRF"
nome_do_modelo = "MelhorModeloRF"
register_model(model_uri, nome_do_modelo)

print(f"\n📦 Modelo registrado no MLflow com nome '{nome_do_modelo}' a partir do run_id: {melhor_run_id}")



📊 Top 5 modelos por acurácia:
    n_estimators  max_depth  min_samples_split  acuracia  recall  precision  \
10           100        NaN                  5  0.773333   0.945   0.768293   
16           100       20.0                  5  0.773333   0.945   0.768293   
41          1000       10.0                 10  0.770000   0.960   0.758893   
27           750        NaN                  2  0.770000   0.945   0.765182   
36          1000        NaN                  2  0.770000   0.940   0.767347   

          f1     auc  log_loss                            run_id  
10  0.847534  0.6875  8.169895  a1a363ce68ba4ebc83b609269a657a8c  
16  0.847534  0.6875  8.169895  6708ca72d4bf44d5b68f956e6d231d65  
41  0.847682  0.6750  8.290040  11ca710a69f84a509d12a5be34cb545b  
27  0.845638  0.6825  8.290040  5c24c2ca78f246ed8421cfff422c89a3  
36  0.844944  0.6850  8.290040  1a7ebf442fc649628c52b59c2566e4b2  

✅ Melhor modelo com base em acuracia:
n_estimators                                      100

Successfully registered model 'MelhorModeloRF'.
Created version '1' of model 'MelhorModeloRF'.
