In [None]:
# notebook: pipeline_boston.ipynb
# ==========================================================
# 🚀 Pipeline interativo do Boston Housing
# ==========================================================

from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, clear_output

import pandas as pd

# Importar módulos customizados
from boston_housing.config import PROCESSED_DATA_DIR, FIGURES_DIR, MODELS_DIR
from boston_housing.features import main as features_main
from boston_housing.modeling import train as train_module
from boston_housing.plots import plot_predictions, plot_residuals

# ==========================================================
# Widgets para seleção de modelo
# ==========================================================
model_selector = widgets.Dropdown(
    options=['random_forest', 'linear_regression', 'svr', 'knn'],
    value='random_forest',
    description='Modelo:',
)

run_button = widgets.Button(
    description="Rodar pipeline",
    button_style='success'
)

output = widgets.Output()

display(model_selector, run_button, output)

# ==========================================================
# Função que executa o pipeline
# ==========================================================
def run_pipeline(model_type: str):
    with output:
        output.clear_output()
        print(f"🚀 Pipeline iniciado com modelo: {model_type}")

        # --- 1️⃣ Gerar features e labels ---
        features_path, labels_path = features_main(PROCESSED_DATA_DIR / "boston.csv")
        print(f"✅ Features e labels gerados em: {PROCESSED_DATA_DIR}")

        # --- 2️⃣ Treinar modelo ---
        print("🏋️ Treinando modelo...")
        model_path = train_module.main(
            features_path=features_path,
            labels_path=labels_path,
            overwrite=True,
            experiment_name="boston_housing_experiment",
            model_type=model_type
        )
        print(f"💾 Modelo salvo em: {model_path}")

        # --- 3️⃣ Fazer predições e gerar plots ---
        X = pd.read_csv(features_path)
        y = pd.read_csv(labels_path).squeeze()

        from joblib import load
        model = load(model_path)
        y_pred = pd.Series(model.predict(X), index=y.index)

        # Plots
        pred_plot_path = FIGURES_DIR / f"pred_vs_real_{model_type}.png"
        resid_plot_path = FIGURES_DIR / f"residuals_{model_type}.png"

        plot_predictions(y, y_pred, pred_plot_path)
        plot_residuals(y, y_pred, resid_plot_path)

        print(f"📊 Plots salvos em: {FIGURES_DIR}")
        print("✅ Pipeline concluído!")

# ==========================================================
# Conectar botão ao pipeline
# ==========================================================
run_button.on_click(lambda b: run_pipeline(model_selector.value))


Dropdown(description='Modelo:', options=('random_forest', 'linear_regression', 'svr', 'knn'), value='random_fo…

Button(button_style='success', description='Rodar pipeline', style=ButtonStyle())

Output()