#Prequisites
Install Python and required libraries for this exercise.



#Federated Learning

Your task is to implement a federated learning task by completing the code below.

(10 points)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import copy

# Simple Neural Network
class SimpleNet(nn.Module):
    def __init__(self, input_size=10, hidden_size=20, num_classes=2):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

class Client:
    def __init__(self, client_id, train_loader, test_loader):
        self.client_id = client_id
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.model = SimpleNet()
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01)

    def get_parameters(self):
        """Get model parameters as a list"""
        # TODO: Return model parameters as a list of tensors
        # Hint: Use .clone() to avoid reference issues
        return [param.clone() for param in self.model.parameters()]

    def set_parameters(self, parameters):
        """Set model parameters from a list"""
        # TODO: Update model parameters with the provided list
        # Hint: Use zip() to pair model params with new params
        for param, new_param in zip(self.model.parameters(), parameters):
            param.data.copy_(new_param.data)

    def train_one_epoch(self):
        """Train for one epoch on local data"""
        # TODO: Implement training loop
        self.model.train()
        total_loss = 0.0
        for batch_idx, (data, target) in enumerate(self.train_loader):
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)
            loss.backward()
            self.optimizer.step()
            total_loss += loss.item()
        return total_loss / len(self.train_loader)

    def evaluate(self):
        """Evaluate the model on test data"""
        # TODO: Implement evaluation
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in self.test_loader:
                output = self.model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
        accuracy = 100.0 * correct / total
        return accuracy

class Server:
    def __init__(self):
        self.global_model = SimpleNet()

    def get_parameters(self):
        """Get global model parameters"""
        # TODO
        return [param.clone() for param in self.global_model.parameters()]

    # Gets models from clients and averages their weights to create a better global model
    def aggregate(self, client_parameters_list):
        """
        Federated Averaging: Average parameters from all clients
        """
        # TODO: Implement Federated Averaging (FedAvg)
        # Average all client parameters
        num_clients = len(client_parameters_list)
        averaged_params = []
        
        # Initialize averaged params with zeros
        for param in client_parameters_list[0]:
            averaged_params.append(torch.zeros_like(param))
        
        # Sum all client parameters
        for client_params in client_parameters_list:
            for i, param in enumerate(client_params):
                averaged_params[i] += param
        
        # Average by dividing by number of clients
        for i in range(len(averaged_params)):
            averaged_params[i] /= num_clients
        
        # Update global model with averaged parameters
        for param, avg_param in zip(self.global_model.parameters(), averaged_params):
            param.data.copy_(avg_param.data)

def generate_client_data(num_samples=100, input_size=10):
    """Generate random data for a client"""
    X = torch.randn(num_samples, input_size)
    y = torch.randint(0, 2, (num_samples,))
    dataset = TensorDataset(X, y)
    train_size = int(0.8 * len(dataset))
    train_ds, test_ds = random_split(dataset, [train_size, len(dataset) - train_size])
    return train_ds, test_ds

def federated_learning(num_clients=3, num_rounds=5):
    """
    Main federated learning loop
    """
    # Initialize server
    server = Server()

    # TODO: Create clients with their own data
    clients = []
    for i in range(num_clients):
        train_ds, test_ds = generate_client_data()
        train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
        test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)
        clients.append(Client(i, train_loader, test_loader))

    print(f"Starting Federated Learning with {num_clients} clients for {num_rounds} rounds\n")

    # Federated learning rounds
    for round_num in range(num_rounds):
        print(f"--- Round {round_num + 1}/{num_rounds} ---")

        # Get global parameters
        global_params = server.get_parameters()

        # TODO: Train all clients and collect their parameters
        client_params_list = []
        for client in clients:
            # Set global parameters to client
            client.set_parameters(global_params)
            # Train client for one epoch
            loss = client.train_one_epoch()
            print(f"  Client {client.client_id}: Loss = {loss:.4f}")
            # Collect updated parameters
            client_params_list.append(client.get_parameters())

        # TODO: Aggregate client parameters on server
        # Hint: Use server.aggregate()
        server.aggregate(client_params_list)

        # TODO: Evaluate all clients with new global model
        print("  Evaluation:")
        global_params = server.get_parameters()
        for client in clients:
            client.set_parameters(global_params)
            accuracy = client.evaluate()
            print(f"    Client {client.client_id}: Accuracy = {accuracy:.2f}%")
        print()

# Run federated learning
if __name__ == "__main__":
    federated_learning(num_clients=3, num_rounds=5)

Starting Federated Learning with 3 clients for 5 rounds

--- Round 1/5 ---
  Client 0: Loss = 0.7125
  Client 1: Loss = 0.7235
  Client 2: Loss = 0.6964
  Evaluation:
    Client 0: Accuracy = 30.00%
    Client 1: Accuracy = 55.00%
    Client 2: Accuracy = 60.00%

--- Round 2/5 ---
  Client 0: Loss = 0.7147
  Client 1: Loss = 0.7108
  Client 2: Loss = 0.6947
  Evaluation:
    Client 0: Accuracy = 35.00%
    Client 1: Accuracy = 55.00%
    Client 2: Accuracy = 60.00%

--- Round 3/5 ---
  Client 0: Loss = 0.7159
  Client 1: Loss = 0.7204
  Client 2: Loss = 0.6970
  Evaluation:
    Client 0: Accuracy = 35.00%
    Client 1: Accuracy = 55.00%
    Client 2: Accuracy = 60.00%

--- Round 4/5 ---
  Client 0: Loss = 0.7122
  Client 1: Loss = 0.7200
  Client 2: Loss = 0.6951
  Evaluation:
    Client 0: Accuracy = 35.00%
    Client 1: Accuracy = 55.00%
    Client 2: Accuracy = 60.00%

--- Round 5/5 ---
  Client 0: Loss = 0.7112
  Client 1: Loss = 0.7128
  Client 2: Loss = 0.6909
  Evaluation:
    C

#Hugging Face

In this exercise you are asked to complete the code to finetune a pretrained language model to classify a given review as positive or negative. This involves loading the pre-trained model, configuring basic training settings, connecting your data, and starting the training process. The tokenization function that converts text to numbers is already given.

(5 points)

In [None]:
import numpy as np
import os
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)

# DISABLE ALL andb logging
os.environ["WANDB_DISABLED"] = "true"

# Configuration
MODEL_CHECKPOINT = "google-bert/bert-base-uncased"
NUM_LABELS = 2

# Load SMALL dataset - only use 1000 examples for training!
raw_datasets = load_dataset("glue", "sst2")
small_train = raw_datasets["train"].select(range(1000))  # Only 1000 examples
small_val = raw_datasets["validation"].select(range(200))  # Only 200 examples

tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)

# Tokenize the data
def tokenize_function(examples):
    """Tokenize sentences for BERT"""
    return tokenizer(examples["sentence"], truncation=True)

# TODO: Apply tokenization
tokenized_train = small_train.map(tokenize_function, batched=True)
tokenized_val = small_val.map(tokenize_function, batched=True)

# TODO: Train the model
def train_model():
    # Load the pre-trained model
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_CHECKPOINT, 
        num_labels=NUM_LABELS
    )
    
    # Data collator for padding
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_strategy="steps",
        logging_steps=10,
        logging_dir="./logs",
    )
    
    # Create trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        data_collator=data_collator,
    )
    
    # Train the model
    trainer.train()
    
    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation results: {eval_results}")

if __name__ == "__main__":
    train_model()