# Treinamento de Modelo de Machine Learning

Este notebook executa o fluxo de treino/teste de um modelo Random Forest usando dados simulados do pipeline de observabilidade.


In [2]:
import json
import os
from typing import Dict, List, Optional

import joblib
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

class MLTrainingService:
    """
    Serviço responsável por treinar e avaliar modelos de ML
    """
    def __init__(self, model_path: str = "model.joblib"):
        self.model_path = model_path
        self.model = None

    def load_data(self, file_path: str) -> pd.DataFrame:
        return pd.read_csv(file_path)

    def preprocess(self, df: pd.DataFrame) -> (np.ndarray, np.ndarray):
        X = df.drop("target", axis=1).values
        y = df["target"].values
        return X, y

    def train(self, X: np.ndarray, y: np.ndarray):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model.fit(X_train, y_train)
        preds = self.model.predict(X_test)
        print(classification_report(y_test, preds))
        joblib.dump(self.model, self.model_path)
        print(f"Modelo salvo em {self.model_path}")

    def load_model(self):
        if os.path.exists(self.model_path):
            self.model = joblib.load(self.model_path)
            print("Modelo carregado com sucesso!")
        else:
            raise FileNotFoundError("Nenhum modelo encontrado. Treine antes.")

    def predict(self, X: np.ndarray):
        if not self.model:
            raise ValueError("Modelo não carregado.")
        return self.model.predict(X)


In [3]:
# Simulação de execução
service = MLTrainingService()

# Gerar dataset simulado
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=500, n_features=5, n_informative=3, n_redundant=0, random_state=42)
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
df["target"] = y
df.to_csv("observability_data.csv", index=False)

# Carregar, treinar e avaliar
data = service.load_data("observability_data.csv")
X, y = service.preprocess(data)
service.train(X, y)


              precision    recall  f1-score   support

           0       0.91      0.96      0.93        50
           1       0.96      0.90      0.93        50

    accuracy                           0.93       100
   macro avg       0.93      0.93      0.93       100
weighted avg       0.93      0.93      0.93       100

Modelo salvo em model.joblib
