In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import os
import sys

# Configurar MLflow
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("thermal_comfort_prediction")

MlflowException: API request to http://mlflow:5000/api/2.0/mlflow/experiments/get-by-name failed with exception HTTPConnectionPool(host='mlflow', port=5000): Max retries exceeded with url: /api/2.0/mlflow/experiments/get-by-name?experiment_name=thermal_comfort_prediction (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x78746ef8d670>: Failed to resolve 'mlflow' ([Errno -3] Temporary failure in name resolution)"))

## 1. Carregamento e Limpeza de Dados

In [None]:
# Carregar dados
data_path = "/app/data/sample_thermal_data.csv"

# Se o arquivo não existir ou quisermos forçar a regeneração
if not os.path.exists(data_path):
    print("Gerando dados sintéticos (2023-2025)...")
    sys.path.append("/app/scripts")
    from generate_data import generate_thermal_data
    # Gerar apenas 3 anos de dados
    df = generate_thermal_data(years_range=(2023, 2025))
    df.to_csv(data_path, index=False)
else:
    print("Carregando dados existentes...")
    df = pd.read_csv(data_path)

print(f"Total de registros: {len(df)}")
print(f"Período: {df['timestamp'].min()} a {df['timestamp'].max()}")
df.head()

In [None]:
# Verificar nulos
print(df.isnull().sum())

# Tratamento básico (se houver nulos)
df = df.dropna()

## 2. Preparação dos Dados

In [None]:
# Features e Target
X = df[['temperature', 'humidity', 'wind_velocity', 'pressure', 'solar_radiation']]
y = df['comfort_zone']

# Split treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Treino: {X_train.shape}, Teste: {X_test.shape}")

## 3. Treinamento e Registro no MLflow

In [None]:
with mlflow.start_run():
    # Parâmetros do modelo
    n_estimators = 100
    max_depth = 10
    
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    
    # Treinar modelo
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    clf.fit(X_train, y_train)
    
    # Predições
    y_pred = clf.predict(X_test)
    
    # Métricas
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Acurácia: {accuracy:.4f}")
    
    mlflow.log_metric("accuracy", accuracy)
    
    # Salvar modelo
    mlflow.sklearn.log_model(clf, "random_forest_model")
    
    # Matriz de Confusão
    plt.figure(figsize=(10, 8))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title("Matriz de Confusão")
    plt.ylabel("Real")
    plt.xlabel("Previsto")
    plt.savefig("confusion_matrix.png")
    mlflow.log_artifact("confusion_matrix.png")
    
    print("Experimento registrado no MLflow com sucesso!")