In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import os

# Configurar MLflow
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("thermal_comfort_prediction")

## 1. Carregamento e Limpeza de Dados

In [None]:
# Carregar dados
data_path = "/app/data/sample_thermal_data.csv"

# Se o arquivo não existir
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Arquivo {data_path} não encontrado. Execute 'python scripts/convert_inmet_data.py' para preparar os dados.")

print("Carregando dados existentes...")
df = pd.read_csv(data_path)

print(f"Total de registros: {len(df)}")
print(f"Período: {df['timestamp'].min()} a {df['timestamp'].max()}")
df.head()

In [None]:
# Verificar nulos
print(df.isnull().sum())

# Tratamento básico (se houver nulos)
df = df.dropna()

## 2. Preparação dos Dados

In [None]:
# Features e Target
X = df[['temperature', 'humidity', 'wind_velocity', 'pressure', 'solar_radiation']]
y = df['comfort_zone']

# Split treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Treino: {X_train.shape}, Teste: {X_test.shape}")

## 3. Treinamento e Registro no MLflow

In [None]:
with mlflow.start_run():
    # Parâmetros do modelo
    n_estimators = 100
    max_depth = 10
    
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    
    # Treinar modelo
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    clf.fit(X_train, y_train)
    
    # Predições
    y_pred = clf.predict(X_test)
    
    # Métricas
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Acurácia: {accuracy:.4f}")
    
    mlflow.log_metric("accuracy", accuracy)
    
    # Salvar modelo
    mlflow.sklearn.log_model(clf, "random_forest_model")
    
    # Matriz de Confusão
    plt.figure(figsize=(10, 8))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title("Matriz de Confusão")
    plt.ylabel("Real")
    plt.xlabel("Previsto")
    plt.savefig("confusion_matrix.png")
    mlflow.log_artifact("confusion_matrix.png")
    
    print("Experimento registrado no MLflow com sucesso!")