In [1]:
import mlflow
from mlflow.models import infer_signature
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Leitura da base
dataset_path = '../data/raw/heart.csv'
dataset = pd.read_csv(dataset_path)
# Sanity test - 5 primeiros registros
dataset.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [4]:
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values

In [5]:
X_train, X_test, y_train, y_test, = train_test_split(X, y, test_size=0.2, random_state=0) 

In [6]:
# Melhores par√¢metros definidos pelo cross-validation/tunning de hiperpar√¢metros
tree_params = {
    'criterion': 'entropy', 
    'min_samples_leaf':1, 
    'min_samples_split': 5,
    'splitter':'best'
}

rf_params = {
    'criterion': 'entropy',
    'min_samples_leaf': 1,
    'n_estimators': 40
}

gb_params = {
    'criterion': 'friedman_mse', 
    'learning_rate': 0.1,
    'loss': 'exponential',
    'n_estimators': 300
}

knn_params = {
    'n_neighbors': 20,
    'p': 1
}

lr_params = {
    'C': 1.0,
    'solver': 'lbfgs',
    'tol': 0.0001
}

svm_params = {
    'C': 1.5,
    'kernel': 'rbf',
    'tol': 0.001
}

nn_params = {
    'activation': 'relu',
    'batch_size': 10,
    'solver': 'adam'
}


In [9]:
mlflow.set_tracking_uri(uri='http://localhost:5000')
mlflow.set_experiment('(teste) Heart disease experiment')

<Experiment: artifact_location='mlflow-artifacts:/757413839863936445', creation_time=1764852381665, experiment_id='757413839863936445', last_update_time=1764852381665, lifecycle_stage='active', name='(teste) Heart disease experiment', tags={}>

In [10]:
def train_and_log_models(estimator, X_train, X_test, y_train, y_test, params, dataset, dataset_path):

    mlflow_dataset = mlflow.data.from_pandas(
        dataset, 
        source=dataset_path,
        name=dataset_path.split('/')[-1]
    )
    
    estimator_name = estimator.__name__
    estimator = estimator(**params)
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    
    
    with mlflow.start_run():
        mlflow.log_params(params)
        
        mlflow.log_metric('Accuracy', accuracy_score(y_test, y_pred))
        mlflow.log_metric('Precision', precision_score(y_test, y_pred))
        mlflow.log_metric('Recall', recall_score(y_test, y_pred))
        mlflow.log_metric('F1-Score', f1_score(y_test, y_pred))
        
        mlflow.log_input(mlflow_dataset, context='raw-data')
        
        signature = infer_signature(X_train, estimator.predict(X_test))
        
        model_info = mlflow.sklearn.log_model(
            sk_model=estimator,
            signature=signature, 
            input_example=X_test[[0]],
            registered_model_name=estimator_name
        )
        
            
        print(f'Caminho do artefato registrado: {model_info.artifact_path}')
        print(f'URI do modelo registrado: {model_info.model_uri}')
        

In [11]:
train_and_log_models(DecisionTreeClassifier, X_train, X_test, y_train, y_test, tree_params, dataset, dataset_path)

Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
2025/12/16 16:48:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DecisionTreeClassifier, version 10


Caminho do artefato registrado: mlflow-artifacts:/757413839863936445/models/m-ada1b4ad927349e79d2283b0a2791184/artifacts
URI do modelo registrado: models:/m-ada1b4ad927349e79d2283b0a2791184
üèÉ View run resilient-vole-848 at: http://localhost:5000/#/experiments/757413839863936445/runs/00304af731114d5bb320e9a12e774c97
üß™ View experiment at: http://localhost:5000/#/experiments/757413839863936445


Created version '10' of model 'DecisionTreeClassifier'.
