In [3]:
import torch
from torch import nn, optim
from deep_river import classification
from river import metrics, ensemble, stream, preprocessing, compose
import time
import os
import psutil

In [4]:
import torch

# Define o dispositivo globalmente
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Dispositivo detectado: {device}")
if device == "cuda":
    print(f"GPU em uso: {torch.cuda.get_device_name(0)}")

Dispositivo detectado: cuda
GPU em uso: Quadro P620


In [None]:
# 1. Arquitetura Shallow
class ShallowNN(nn.Module):
    def __init__(self, n_features, n_classes):
        super(ShallowNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(n_features, 10),
            nn.ReLU(),
            nn.Linear(10, n_classes)
        )

    def forward(self, x):
        return self.net(x)

def get_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)

# 2. Configurações de Dados (Elec2)
base_path = os.path.expanduser("~/moa/aldopaim/AdaptiveRegularizedEnsemble/datasets")
filename = "elecNormNew.arff"
file_path = os.path.join(base_path, filename)
target_column = "class"

# Dimensões iniciais para Elec2
n_features = 8 
n_classes = 2

# 3. Classificador Base Deep River (Corrigido conforme seu feedback)
base_nn = classification.Classifier(
    module=ShallowNN(n_features=n_features, n_classes=n_classes),
    loss_fn=nn.CrossEntropyLoss(),
    optimizer_fn=optim.SGD,
    lr=0.5, # Aumentado para convergir mais rápido
    is_feature_incremental=True,
    is_class_incremental=True,
    device=device
)

# 4. Ensemble ADWINBagging
# Mais simples que o ARTE, ideal para validar o aprendizado base
model = ensemble.ADWINBaggingClassifier(
    model=base_nn,
    n_models=50, # Aumentado de 5 para 10 para melhor robustez
    seed=42
)

# 5. Pipeline de Normalização e Tratamento Categórico
pipeline = compose.Pipeline(
    preprocessing.OneHotEncoder(), 
    preprocessing.StandardScaler(),
    model
)

# 6. Loop de Execução
label_map = {}
metric = metrics.Accuracy()

print(f"Iniciando PoC: ADWINBagging + 10 Redes Neurais (Deep River)")
print(f"Dataset: ElecNorm | Algoritmo: Online Bagging com ADWIN")
print("-" * 65)

start_time = time.perf_counter()
count = 0

try:
    dataset_stream = stream.iter_arff(file_path, target=target_column)
    
    for x, y in dataset_stream:
        if y not in label_map:
            label_map[y] = len(label_map)
        y_numeric = label_map[y]

        # Test-then-Train
        y_pred = pipeline.predict_one(x)
        if y_pred is not None:
            metric.update(y_numeric, y_pred)
        
        pipeline.learn_one(x, y_numeric)
        count += 1
        
        if count % 2000 == 0:
            elapsed = time.perf_counter() - start_time
            mem = get_memory_usage()
            print(f"{count:<12} | {metric.get():>9.2%} | {elapsed:>9.2f} | {mem:>12.2f}")

    print("-" * 65)
    print(f"FINAL: Acc: {metric.get():.2%} | Tempo: {time.perf_counter()-start_time:.2f}s")

except Exception as e:
    print(f"Erro: {e}")

Iniciando PoC: ADWINBagging + 10 Redes Neurais (Deep River)
Dataset: ElecNorm | Algoritmo: Online Bagging com ADWIN
-----------------------------------------------------------------


In [10]:
class ARTE(base.Ensemble, base.Classifier):
    """Adaptive Random Tree Ensemble (ARTE) portado do MOA.
    
    Algoritmo adaptativo para fluxos de dados evolutivos de Paim e Enembreck.
    """

    def __init__(
        self,
        model: base.Classifier = None,
        n_models: int = 100,
        lambd: float = 6.0,
        drift_detector: base.DriftDetector = None,
        window_size: int = 1000,
        n_rejections: int = 5,
        seed: int = 1
    ):
        # O modelo base sugerido no original é a ARFHoeffdingTree
        # No River, usamos HoeffdingTreeClassifier como base
        self.model = model or tree.HoeffdingTreeClassifier()
        self.n_models = n_models
        self.lambd = lambd
        self.drift_detector = drift_detector or drift.ADWIN(delta=1e-3)
        self.window_size = window_size
        self.n_rejections = n_rejections
        self.seed = seed
        self._rng = np.random.RandomState(self.seed)
        
        # Inicialização dos membros conforme a estrutura AREBaseLearner do original
        self._ensemble_members = []
        for i in range(self.n_models):
            m = {
                'model': self.model.clone(),
                'detector': self.drift_detector.clone(),
                'untrained_counts': collections.defaultdict(int),
                'window_acc': utils.Rolling(stats.Mean(), window_size=self.window_size),
                'instances_trained': 0
            }
            self._ensemble_members.append(m)
            
        super().__init__(models=[m['model'] for m in self._ensemble_members])
        self._avg_window_acc = 0.0

    def learn_one(self, x, y):
        all_accs = []
        
        for m in self._ensemble_members:
            # Predição para controle de erro e lógica de rejeição
            y_pred = m['model'].predict_one(x)
            correct = (y == y_pred)
            
            # Estratégia de Regularização Adaptativa:
            # Para evitar que domínios com ruído dominem, treina no erro
            # ou após N rejeições (acertos)
            will_train = not correct
            
            if correct:
                m['untrained_counts'][y] += 1
                if self.n_rejections > 0 and m['untrained_counts'][y] >= self.n_rejections:
                    m['untrained_counts'][y] = 0
                    will_train = True
            
            if will_train:
                # Online Bagging via Poisson
                k = self._rng.poisson(self.lambd)
                if k > 0:
                    for _ in range(k):
                        m['model'].learn_one(x, y)
                        m['instances_trained'] += 1
            
            # Detecção de Drift individual
            m['detector'].update(0 if correct else 1)
            if m['detector'].drift_detected:
                self._reset_member(m)
            
            # Atualiza estatísticas da janela deslizante
            m['window_acc'].update(1 if correct else 0)
            all_accs.append(m['window_acc'].get())

        # Atualiza média global para critério de votação seletiva
        if all_accs:
            self._avg_window_acc = statistics.mean(all_accs)
            
        return self

    def predict_proba_one(self, x):
        combined_votes = collections.Counter()
        
        # O ARTE filtra votantes cuja acurácia na janela é inferior à média global
        eligible_members = [
            m for m in self._ensemble_members 
            if self.window_size == 0 or m['window_acc'].get() >= self._avg_window_acc
        ]
        
        # Fallback se ninguém estiver acima da média (ex: início do stream)
        if not eligible_members:
            eligible_members = self._ensemble_members

        for m in eligible_members:
            votes = m['model'].predict_proba_one(x)
            if votes:
                total = sum(votes.values())
                if total > 0:
                    for cls, prob in votes.items():
                        combined_votes[cls] += prob / total

        return combined_votes

    def _reset_member(self, m):
        """Reinicia o modelo e estatísticas após detecção de mudança."""
        m['model'] = self.model.clone()
        m['detector'] = self.drift_detector.clone()
        m['untrained_counts'].clear()
        m['window_acc'] = utils.Rolling(stats.Mean(), window_size=self.window_size)

In [18]:
import torch
from torch import nn, optim
from deep_river import classification
from river import metrics, drift, stream, preprocessing, compose
import time
import os
import psutil

# 1. Definição da Arquitetura da Rede Neural (PyTorch)
class ShallowNN(nn.Module):
    def __init__(self, n_features, n_classes):
        super(ShallowNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(n_features, 10),
            nn.ReLU(),
            nn.Linear(10, n_classes)
        )

    def forward(self, x):
        return self.net(x)

def get_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)

# 2. Configurações de Dados
base_path = os.path.expanduser("~/moa/aldopaim/AdaptiveRegularizedEnsemble/datasets")
filename = "elecNormNew.arff"
file_path = os.path.join(base_path, filename)
target_column = "class"

# Nota: O número de features pode mudar após o OneHotEncoder. 
# Para o Elec2 com OneHot no 'day', passamos de 8 para 14 features.
n_features = 8 
n_classes = 2

# 3. Construção do Classificador Base (Deep River)
base_nn = classification.Classifier(
    module=ShallowNN(n_features=n_features, n_classes=n_classes),  # Passamos a classe da rede
    module_params={"n_classes": 2}, # O wrapper injetará n_features
    loss_fn=nn.CrossEntropyLoss(),
    optimizer_fn=optim.SGD,
    lr=0.05,
    # Habilita a expansão dinâmica de entradas e saídas
    is_feature_incremental=True, 
    is_class_incremental=True
)
# base_nn = classification.Classifier(
#     module=ShallowNN(n_features=n_features, n_classes=n_classes),
#     loss_fn=nn.CrossEntropyLoss(),
#     optimizer_fn=optim.SGD,
#     lr=0.05
# )

# 4. Criação do Ensemble ARTE
model = ARTE(
    model=base_nn,
    n_models=5,
    drift_detector=drift.ADWIN(delta=1e-3),
    window_size=500,
    seed=42
)

# 5. Pipeline Robusto:
# - OneHotEncoder: transforma strings em 0/1 (resolve o erro do scaler)
# - StandardScaler: normaliza para a rede neural
pipeline = compose.Pipeline(
    preprocessing.OneHotEncoder(), 
    preprocessing.StandardScaler(),
    model
)

# 6. Loop de Execução com Mapeamento de Classes
label_map = {}
metric = metrics.Accuracy()

print(f"Iniciando ARTE + Deep River (Redes Neurais)")
print(f"Pipeline: OneHot -> Scaler -> Ensemble")
print("-" * 65)

start_time = time.perf_counter()
count = 0

try:
    dataset_stream = stream.iter_arff(file_path, target=target_column)
    
    for x, y in dataset_stream:
        # Mapeia rótulos string para numérico para o CrossEntropy do PyTorch
        if y not in label_map:
            label_map[y] = len(label_map)
        y_numeric = label_map[y]

        # Prequencial: Teste-antes-do-Treino
        y_pred = pipeline.predict_one(x)
        if y_pred is not None:
            metric.update(y_numeric, y_pred)
        
        # O pipeline processa x e o modelo treina com y_numeric
        pipeline.learn_one(x, y_numeric)
        count += 1
        
        if count % 1000 == 0:
            elapsed = time.perf_counter() - start_time
            mem = get_memory_usage()
            print(f"{count:<12} | {metric.get():>9.2%} | {elapsed:>9.2f} | {mem:>12.2f}")

    print("-" * 65)
    print(f"SUCESSO: Acc Final: {metric.get():.2%} | Rótulos: {label_map}")

except Exception as e:
    import traceback
    traceback.print_exc()
    print(f"\nErro durante a execução: {e}")

Iniciando ARTE + Deep River (Redes Neurais)
Pipeline: OneHot -> Scaler -> Ensemble
-----------------------------------------------------------------
1000         |    78.70% |     13.25 |       581.16
2000         |    78.70% |     35.93 |       581.91
3000         |    77.47% |     67.12 |       583.41
4000         |    77.15% |    105.05 |       584.79
5000         |    76.62% |    150.67 |       586.04
6000         |    76.57% |    199.82 |       586.54
7000         |    76.30% |    255.66 |       588.29


KeyboardInterrupt: 