# Aprendizado Federado com LSTM para Estimativa de ETo

Notebook compatível com **Google Colab**, derivado do script `fl_lstm_eto.py`.

Este notebook:
- Define o modelo LSTM
- Simula Aprendizado Federado (FedAvg)
- Executa treinamento e avaliação por rounds


In [10]:
# Instalação (Colab)
# Normalmente já disponível, mas mantido por segurança
!pip install -q numpy pandas scikit-learn torch

In [11]:
# Imports
import os, math, random
from typing import List, Dict
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [12]:
# Reprodutibilidade
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

In [13]:
# Métrica
def rmse(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred))

In [14]:
# Dataset com janelas temporais
class WindowDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.astype('float32')
        self.y = y.astype('float32')

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.tensor(self.y[idx])

In [15]:
# Criação das janelas
def make_windows_from_df(df, feature_cols, target_col, seq_len, scaler=None, fit_scaler=True):
    df = df.dropna().reset_index(drop=True)
    X_raw = df[feature_cols].values.astype(float)
    y_raw = df[target_col].values.astype(float)

    if scaler is None:
        scaler = StandardScaler()
    if fit_scaler:
        scaler.fit(X_raw)

    Xs = scaler.transform(X_raw)
    X_windows, y_windows = [], []

    for i in range(seq_len, len(Xs)):
        X_windows.append(Xs[i-seq_len:i])
        y_windows.append(y_raw[i])

    if len(X_windows) == 0:
        return None, None, scaler

    return np.stack(X_windows), np.array(y_windows), scaler

In [16]:
# Modelo LSTM
class LSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2, bidirectional=False):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidirectional
        )
        self.num_directions = 2 if bidirectional else 1
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * self.num_directions, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, 1)
        )

    def forward(self, x):
        out, _ = self.lstm(x)
        last = out[:, -1, :]
        return self.fc(last).squeeze(1)

In [17]:
# Cliente Federado
class FLClient:
    def __init__(self, client_id, df, feature_cols, target_col, seq_len,
                 device='cpu', batch_size=32, local_epochs=1, lr=1e-3):

        self.client_id = client_id
        self.feature_cols = feature_cols
        self.target_col = target_col
        self.seq_len = seq_len
        self.device = device
        self.batch_size = batch_size
        self.local_epochs = local_epochs
        self.lr = lr

        n = len(df)
        cutoff = int(n * 0.8) if n > 10 else int(n * 0.7)
        df_train = df.iloc[:cutoff]
        df_test = df.iloc[cutoff:]

        X_tr, y_tr, self.scaler = make_windows_from_df(
            df_train, feature_cols, target_col, seq_len
        )
        X_te, y_te, _ = make_windows_from_df(
            df_test, feature_cols, target_col, seq_len, scaler=self.scaler, fit_scaler=False
        )

        self.train_loader = DataLoader(
            WindowDataset(X_tr, y_tr), batch_size=batch_size, shuffle=True
        )
        self.test_loader = DataLoader(
            WindowDataset(X_te, y_te), batch_size=batch_size, shuffle=False
        ) if X_te is not None else None

        self.n_samples = len(y_tr)

        self.model = LSTMRegressor(input_size=len(feature_cols)).to(device)
        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    def get_weights(self):
        return {k: v.cpu().clone() for k, v in self.model.state_dict().items()}

    def set_weights(self, state_dict):
        self.model.load_state_dict(state_dict)

    def local_train(self):
        self.model.train()
        for _ in range(self.local_epochs):
            for xb, yb in self.train_loader:
                xb, yb = xb.to(self.device), yb.to(self.device)
                loss = self.criterion(self.model(xb), yb)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

    def evaluate_local(self):
        if self.test_loader is None:
            return {}
        self.model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for xb, yb in self.test_loader:
                preds = self.model(xb.to(self.device)).cpu().numpy()
                y_pred.extend(preds)
                y_true.extend(yb.numpy())
        return {
            'rmse': rmse(y_true, y_pred),
            'mae': mean_absolute_error(y_true, y_pred),
            'r2': r2_score(y_true, y_pred)
        }

In [18]:
# Servidor Federado
class FLServer:
    def __init__(self, clients, device='cpu'):
        self.clients = clients
        self.device = device
        self.global_model = LSTMRegressor(
            input_size=len(clients[0].feature_cols)
        ).to(device)

    def aggregate(self, states, weights):
        new_state = {}
        total = sum(weights)
        for k in states[0]:
            new_state[k] = sum(
                sd[k] * (w / total) for sd, w in zip(states, weights)
            )
        self.global_model.load_state_dict(new_state)

    def distribute(self):
        return {k: v.cpu().clone() for k, v in self.global_model.state_dict().items()}

    def evaluate(self):
        metrics = {}
        for c in self.clients:
            c.set_weights(self.distribute())
            metrics[c.client_id] = c.evaluate_local()
        return metrics

## Execução da Simulação

Ajuste os caminhos dos CSVs abaixo (um por cliente).

In [24]:
# Exemplo de configuração
client_csvs = [
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-15.35_lon-55.45_mt.csv',
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-19.75_lon-44.45_mg.csv',
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-2.15_lon-59.85_am.csv',
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-30.75_lon-55.45_rs.csv',
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-4.35_lon-40.05_ce.csv',
    '/content/drive/MyDrive/Doutorado/Disciplina Isolada/PCC177/Projeto de Pesquisa/datasets/lat-8.75_lon-35.65_pe.csv'
]

target_col = 'ETo'
seq_len = 4
rounds = 5
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
# Execução
clients = []
for path in client_csvs:
    df = pd.read_csv(path, sep=';')
    cols = [c for c in df.columns if c != target_col and c.lower() != 'datetime']
    cid = os.path.splitext(os.path.basename(path))[0]
    clients.append(FLClient(cid, df, cols, target_col, seq_len, device=device))

server = FLServer(clients, device=device)

for r in range(rounds):
    print(f'\nRound {r+1}')
    states, weights = [], []
    global_w = server.distribute()
    for c in clients:
        c.set_weights(global_w)
        c.local_train()
        states.append(c.get_weights())
        weights.append(c.n_samples)
    server.aggregate(states, weights)
    print(server.evaluate())


Round 1
{'lat-15.35_lon-55.45_mt': {'rmse': 0.8149028994316663, 'mae': 0.6238318041079488, 'r2': 0.20332069920194695}, 'lat-19.75_lon-44.45_mg': {'rmse': 0.8826057638881883, 'mae': 0.6721318254097733, 'r2': 0.45763957860465543}, 'lat-2.15_lon-59.85_am': {'rmse': 0.9579543799495227, 'mae': 0.7488557158387026, 'r2': -0.24091231554870762}, 'lat-30.75_lon-55.45_rs': {'rmse': 1.4554817945170349, 'mae': 1.2491955683504627, 'r2': 0.3445034601045712}, 'lat-4.35_lon-40.05_ce': {'rmse': 1.2745177239325596, 'mae': 1.1088241364093536, 'r2': -0.07457800748362198}, 'lat-8.75_lon-35.65_pe': {'rmse': 0.7647238195789089, 'mae': 0.6396220438923832, 'r2': 0.4722235853807857}}

Round 2
{'lat-15.35_lon-55.45_mt': {'rmse': 0.7840045124386918, 'mae': 0.5949575658849272, 'r2': 0.26259015388755347}, 'lat-19.75_lon-44.45_mg': {'rmse': 0.8080124377395356, 'mae': 0.5990872874027585, 'r2': 0.5454406841309494}, 'lat-2.15_lon-59.85_am': {'rmse': 1.0058434233876783, 'mae': 0.795157094810135, 'r2': -0.368082227937559