In [1]:
# Importando as bibliotecas necessárias
import mlflow
import prefect
from prefect import task, Flow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Definindo um experimento no MLFlow
mlflow.set_experiment("Pipeline com MLFlow e Prefect")

2024/09/30 07:24:25 INFO mlflow.tracking.fluent: Experiment with name 'Pipeline com MLFlow e Prefect' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/marcosf/Desktop/inteli-exercises/mlruns/837391395039808788', creation_time=1727691865549, experiment_id='837391395039808788', last_update_time=1727691865549, lifecycle_stage='active', name='Pipeline com MLFlow e Prefect', tags={}>

In [3]:
# Função para carregar dados (simples dataset do scikit-learn)
@task
def carregar_dados():
    from sklearn.datasets import load_iris
    iris = load_iris(as_frame=True)
    data = iris['data']
    target = iris['target']
    return data, target

In [4]:
# Função para dividir os dados
@task
def dividir_dados(data, target):
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

In [5]:
# Função para treinar o modelo
@task
def treinar_modelo(X_train, y_train):
    with mlflow.start_run(run_name="Treinando RandomForest"):
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        mlflow.sklearn.log_model(model, "random_forest_model")
        return model

In [6]:
# Função para avaliar o modelo
@task
def avaliar_modelo(model, X_test, y_test):
    predictions = model.predict(X_test)
    acc = accuracy_score(y_test, predictions)
    mlflow.log_metric("accuracy", acc)
    print(f"Acurácia: {acc}")
    return acc

In [7]:
# Definindo o fluxo do Prefect
with Flow("Pipeline de MLFlow com Prefect") as flow:
    data, target = carregar_dados()
    X_train, X_test, y_train, y_test = dividir_dados(data, target)
    modelo = treinar_modelo(X_train, y_train)
    resultado = avaliar_modelo(modelo, X_test, y_test)

TypeError: 'fn' must be callable

In [8]:
# Executando o fluxo
flow.run()

NameError: name 'flow' is not defined