# Federated Learning Atividade - CIFAR-10



In [None]:
# Imports necess√°rios
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import torchvision
import torchvision.transforms as transforms

import numpy as np
from collections import Counter, defaultdict

# Seed para reprodutibilidade
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üîß Usando device: {device}")

# Configura√ß√µes
NUM_CLIENTS = 3
NUM_CLASSES = 10  # CIFAR-10 tem 10 classes
UNDERSAMPLE_COUNT = 50  # N√∫mero de amostras para classes com undersample
NORMAL_SAMPLES_PER_CLASS = 800  # N√∫mero de amostras para classes normais

# Classes do CIFAR-10
CIFAR10_CLASSES = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                   'dog', 'frog', 'horse', 'ship', 'truck']

print(f"‚úÖ Configura√ß√µes definidas:")
print(f"   - N√∫mero de clientes: {NUM_CLIENTS}")
print(f"   - N√∫mero de classes: {NUM_CLASSES}")
print(f"   - Undersample count: {UNDERSAMPLE_COUNT}")
print(f"   - Normal samples per class: {NORMAL_SAMPLES_PER_CLASS}")

üîß Usando device: cuda
‚úÖ Configura√ß√µes definidas:
   - N√∫mero de clientes: 3
   - N√∫mero de classes: 10
   - Undersample count: 50
   - Normal samples per class: 800


## 1. Carregamento de Dados

Carregamos o dataset CIFAR-10 com as transforma√ß√µes necess√°rias.

In [None]:
# Transforma√ß√µes para os dados
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Carregar datasets
print("üì• Carregando CIFAR-10...")
trainset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

testset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

print(f"‚úÖ Dataset de treino carregado: {len(trainset)} amostras")
print(f"‚úÖ Dataset de teste carregado: {len(testset)} amostras")
print(f"‚úÖ Classes: {CIFAR10_CLASSES}")

üì• Carregando CIFAR-10...
Files already downloaded and verified
Files already downloaded and verified
‚úÖ Dataset de treino carregado: 50000 amostras
‚úÖ Dataset de teste carregado: 10000 amostras
‚úÖ Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


## 2. Separa√ß√£o de Clientes

Criamos 3 clientes, onde cada cliente possui dados com undersampling para 2 labels espec√≠ficas.

In [None]:
def create_non_iid_clients(dataset, num_clients, num_classes,
                           undersample_count=50, normal_samples=800):
    """
    Cria divis√£o n√£o-IID dos dados onde cada cliente tem 2 classes com undersample.

    Args:
        dataset: Dataset PyTorch
        num_clients: N√∫mero de clientes (3)
        num_classes: N√∫mero de classes (10 para CIFAR-10)
        undersample_count: N√∫mero de amostras para classes com undersample
        normal_samples: N√∫mero de amostras para classes normais

    Returns:
        Lista de datasets (um por cliente) e informa√ß√µes sobre distribui√ß√£o
    """
    # Organizar √≠ndices por classe
    class_indices = defaultdict(list)
    for idx in range(len(dataset)):
        _, label = dataset[idx]
        class_indices[label].append(idx)

    # Embaralhar √≠ndices de cada classe
    for class_id in class_indices:
        np.random.shuffle(class_indices[class_id])

    client_datasets = []
    client_info = []
    used_indices = set()

    # Definir quais 2 classes ter√£o undersample para cada cliente
    # Cliente 0: classes 0 e 1 com undersample
    # Cliente 1: classes 2 e 3 com undersample
    # Cliente 2: classes 4 e 5 com undersample
    undersample_classes_per_client = [
        [0, 1],  # Cliente 0
        [2, 3],  # Cliente 1
        [4, 5]   # Cliente 2
    ]

    # Para cada cliente
    for client_id in range(num_clients):
        client_indices = []
        client_dist = {}

        # Classes com undersample para este cliente
        undersample_classes = undersample_classes_per_client[client_id]

        # Para cada classe
        for class_id in range(num_classes):
            # Determinar quantas amostras pegar
            if class_id in undersample_classes:
                samples_to_take = undersample_count
            else:
                samples_to_take = normal_samples

            # Pegar amostras n√£o usadas
            available = [idx for idx in class_indices[class_id] if idx not in used_indices]
            selected = available[:samples_to_take]

            client_indices.extend(selected)
            used_indices.update(selected)
            client_dist[class_id] = len(selected)

        # Criar subset para o cliente
        client_dataset = Subset(dataset, client_indices)
        client_datasets.append(client_dataset)
        client_info.append({
            'distribution': client_dist,
            'undersample_classes': undersample_classes,
            'total_samples': len(client_indices)
        })

    return client_datasets, client_info

# Criar divis√£o dos clientes
print("üîÑ Criando divis√£o de clientes...\n")
client_trainsets, client_info = create_non_iid_clients(
    trainset,
    NUM_CLIENTS,
    NUM_CLASSES,
    undersample_count=UNDERSAMPLE_COUNT,
    normal_samples=NORMAL_SAMPLES_PER_CLASS
)

# Imprimir informa√ß√µes de cada cliente
print("="*80)
print("üìã DISTRIBUI√á√ÉO DOS DADOS POR CLIENTE")
print("="*80)

for client_id, info in enumerate(client_info):
    print(f"\nüë§ Cliente {client_id + 1}:")
    print(f"   Total de amostras: {info['total_samples']}")
    undersample_class_names = [CIFAR10_CLASSES[c] for c in info['undersample_classes']]
    print(f"   Classes com UNDERSAMPLE: {undersample_class_names}")
    print(f"   Distribui√ß√£o por classe:")

    for class_id in range(NUM_CLASSES):
        count = info['distribution'][class_id]
        marker = "‚ö†Ô∏è UNDERSAMPLE" if class_id in info['undersample_classes'] else ""
        print(f"      {CIFAR10_CLASSES[class_id]:12s}: {count:4d} amostras {marker}")

print("\n" + "="*80)

üîÑ Criando divis√£o de clientes...

üìã DISTRIBUI√á√ÉO DOS DADOS POR CLIENTE

üë§ Cliente 1:
   Total de amostras: 6500
   Classes com UNDERSAMPLE: ['airplane', 'automobile']
   Distribui√ß√£o por classe:
      airplane    :   50 amostras ‚ö†Ô∏è UNDERSAMPLE
      automobile  :   50 amostras ‚ö†Ô∏è UNDERSAMPLE
      bird        :  800 amostras 
      cat         :  800 amostras 
      deer        :  800 amostras 
      dog         :  800 amostras 
      frog        :  800 amostras 
      horse       :  800 amostras 
      ship        :  800 amostras 
      truck       :  800 amostras 

üë§ Cliente 2:
   Total de amostras: 6500
   Classes com UNDERSAMPLE: ['bird', 'cat']
   Distribui√ß√£o por classe:
      airplane    :  800 amostras 
      automobile  :  800 amostras 
      bird        :   50 amostras ‚ö†Ô∏è UNDERSAMPLE
      cat         :   50 amostras ‚ö†Ô∏è UNDERSAMPLE
      deer        :  800 amostras 
      dog         :  800 amostras 
      frog        :  800 amostras 
      

## 3. Federated Learning

**Implemente aqui o c√≥digo de Federated Learning usando o framework Flower.**

Os dados j√° est√£o preparados:
- `client_trainsets`: Lista com 3 datasets, um para cada cliente
- `testset`: Dataset de teste para avalia√ß√£o
- `client_info`: Informa√ß√µes sobre a distribui√ß√£o de dados de cada cliente

### Dicas:
- Use `flwr` (Flower) para implementar o federated learning
- Cada cliente deve treinar localmente com seu dataset (`client_trainsets[client_id]`)
- O servidor deve agregar os modelos dos clientes
- Use `testset` para avaliar o modelo global

In [None]:
# ============================================================================
# IMPLEMENTE AQUI O C√ìDIGO DE FEDERATED LEARNING
# ============================================================================

print("üìù Implemente o c√≥digo de Federated Learning acima!")

üìù Implemente o c√≥digo de Federated Learning acima!
