__import des librairies nécessaires__

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
from tqdm import tqdm
import copy
import argparse
import os
import sys
#from utils import *

__import des autres fichiers__

In [2]:
print(os.getcwd())
os.chdir('..')


/Users/Theo/Documents/Dépots Githubs/2023/chocoEA/impl


In [3]:
print(os.getcwd())

/Users/Theo/Documents/Dépots Githubs/2023/chocoEA


In [4]:
from nv_orga.Models import Net, Net_eNTK
from nv_orga.FedAvg import average_models,client_update
from nv_orga.Eval import evaluate_many_models
from nv_orga.NTK import client_compute_eNTK
from nv_orga.Scaffold import scaffold_update

__définition des hyperparamètres__

In [5]:
args = {
    'num_client_stage1_2': 5,
    'num_client_stage3': 5,
    'seed': 123,
    'num_samples_per_client': 500,
    'rounds_stage1': 50, #100 de base
    'local_epochs_stage1': 5,
    'mini_batchsize_stage1': 64,
    'local_lr_stage1': 0.1,
    'rounds_stage2': 1, #100 de base
    'local_steps_stage2': 100,
    'local_lr_stage2': 0.001,
    'rounds_stage3': 100,
    'local_steps_stage3': 100,
    'local_lr_stage3': 0.001
}

__création d'un dossier de sauvegarde pour les modèles successifs du stage 1__

In [6]:
isExist = os.path.exists('data/ckpt_stage1')
if not isExist:
   os.makedirs('data/ckpt_stage1')

## __Stage 1__

### __hyperparamètres__

In [7]:
num_clients_stage1_2 = args["num_client_stage1_2"]
num_clients_stage3 = args["num_client_stage3"]
num_rounds_stage1 = args["rounds_stage1"]

epochs_stage1 = args["local_epochs_stage1"]
batch_size_stage1 = args["mini_batchsize_stage1"]
lr_stage1 = args["local_lr_stage1"]

### __création des datasets décentralisés (ie non idd)__

In [8]:
## load les data MNIST, les transformer en tensor et les normaliser
traindata = datasets.MNIST('data/data_mnist', train=True, download=True,
                           transform=transforms.Compose([transforms.ToTensor(),
                                                         transforms.Normalize((0.1307,), (0.3081,))]))


target_labels = torch.stack([traindata.targets == i for i in range(10)]) # 10 x 60000 (one-hot qui détermine la label correpondant à la ligne)

In [9]:
target_labels_split = []
torch.manual_seed(args["seed"]) # pour que les splits soient les mêmes à chaque fois
torch.cuda.manual_seed(args["seed"])  # pour que les splits soient les mêmes à chaque fois

for i in range(num_clients_stage3):
    index_split = torch.where(target_labels[(2 * i):(2 * (i + 1))].sum(0))[0] # on prend les labels 2i et 2i+1
    perm_split = torch.randperm(index_split.size(0)) # on mélange les indices
    index_split_subsample = index_split[perm_split[:args["num_samples_per_client"]]] # on prend les 500 premiers
    target_labels_split += [index_split_subsample] # on ajoute à la liste des labels splités

#Chacun des 5 clients reçoit 500 images d'un des deux labels associés

In [10]:
# Training datasets (subsampled)
traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in target_labels_split] # chaque élément contient les images et labels d'un client
train_loader = [torch.utils.data.DataLoader(train_subset, batch_size=batch_size_stage1, shuffle=True)
                for train_subset in traindata_split] # on crée les dataloader associés

### __création du dataset global de test__

In [11]:
# Test dataset (subsampled)
testdata = datasets.MNIST('data/data_mnist', train=False,
                          transform=transforms.Compose([transforms.ToTensor(),
                                                        transforms.Normalize((0.1307,), (0.3081,))])) # on charge les données de test

torch.manual_seed(args["seed"])
torch.cuda.manual_seed(args["seed"])
perm_split_test = torch.randperm(testdata.targets.shape[0])
testdata_subset = torch.utils.data.Subset(testdata, perm_split_test[:1000])
test_loader = torch.utils.data.DataLoader(testdata_subset, batch_size=batch_size_stage1, shuffle=False) #pas de shuffle pour le test

### __modèle de réseau de neurones de base__

In [12]:
global_model = Net() #modifié depuis Net().cuda() #Modèle fédéré
client_models = [Net() for _ in range(num_clients_stage3)] #modifié depuis Net().cuda() #Modèles des clients
for model in client_models:
    model.load_state_dict(global_model.state_dict())
opt = [optim.SGD(model.parameters(), lr=lr_stage1) for model in client_models]

### __imple de FedAvg__

In [13]:
# Run TCT-Stage1 (i.e., FedAvg)
for r in range(num_rounds_stage1):
    # load global weights
    for model in client_models[:num_clients_stage1_2]:
        model.load_state_dict(global_model.state_dict())

    # client update
    loss = 0
    for i in range(num_clients_stage1_2):
        loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs_stage1)

    # average params across neighbors
    average_models(global_model, client_models[:num_clients_stage1_2])

    # evaluate
    test_losses, accuracies = evaluate_many_models(client_models[:num_clients_stage1_2], test_loader)
    torch.save(client_models[0].state_dict(), 'data/ckpt_stage1/stage1_20rounds_1workers.pth')

    print('%d-th round: average train loss %0.3g | average test loss %0.3g | average test acc: %0.3f' % (
    r, loss / num_clients_stage1_2, test_losses.mean(), accuracies.mean()))

0-th round: average train loss 0.0963 | average test loss 7.08 | average test acc: 0.197
1-th round: average train loss 0.0635 | average test loss 6.91 | average test acc: 0.199
2-th round: average train loss 0.0918 | average test loss 7.12 | average test acc: 0.198
3-th round: average train loss 0.0236 | average test loss 7.87 | average test acc: 0.199
4-th round: average train loss 0.038 | average test loss 7.58 | average test acc: 0.199
5-th round: average train loss 0.0228 | average test loss 6.46 | average test acc: 0.199
6-th round: average train loss 0.011 | average test loss 6.36 | average test acc: 0.202
7-th round: average train loss 0.0322 | average test loss 6.7 | average test acc: 0.203
8-th round: average train loss 0.0272 | average test loss 6.18 | average test acc: 0.222
9-th round: average train loss 0.00605 | average test loss 6.78 | average test acc: 0.207
10-th round: average train loss 0.00486 | average test loss 5.9 | average test acc: 0.234
11-th round: average t

In [14]:
#torch.save(client_models[0].state_dict(), 'data/ckpt_stage1/random.pth')

## __Stage 2__

### __hyperparamètres__

In [25]:
num_rounds_stage2 = args["rounds_stage2"]
batch_size = args["num_samples_per_client"]

### __modèle eNTK__

In [26]:
# Init and load model ckpt
global_model = Net_eNTK() #supprimer .cuda()
global_model.load_state_dict(torch.load('data/ckpt_stage1/stage1_20rounds_1workers.pth'))
global_model.fc2 = nn.Linear(128, 1) #supprimer .cuda() #récupérer une unique sortie ici #supprimer le dernier layer pour le remplacer (passer de 128->10 à 128->1)
print('load model')

load model


### __Compute eNTK__

In [27]:
# Train
grad_all = []
target_all = []
target_onehot_all = []
for i in range(num_clients_stage1_2):
    grad_i, target_onehot_i, target_i = client_compute_eNTK(global_model, train_loader[i])
    grad_all.append(copy.deepcopy(grad_i).cpu())
    target_all.append(copy.deepcopy(target_i).cpu())
    target_onehot_all.append(copy.deepcopy(target_onehot_i).cpu())
    del grad_i
    del target_onehot_i
    del target_i
    torch.cuda.empty_cache()

  0%|          | 0/64 [00:00<?, ?it/s]

100%|██████████| 64/64 [00:00<00:00, 654.62it/s]
100%|██████████| 64/64 [00:00<00:00, 731.15it/s]
100%|██████████| 64/64 [00:00<00:00, 729.63it/s]
100%|██████████| 64/64 [00:00<00:00, 567.14it/s]


In [28]:
grad_all[0].shape

torch.Size([64, 100000])

In [29]:
# Test
grad_eval, target_eval_onehot, target_eval  = client_compute_eNTK(global_model, test_loader)

100%|██████████| 64/64 [00:00<00:00, 508.98it/s]


### __run stage 2__

In [30]:
# Init linear models
theta_global = torch.zeros(100000, 10) #supprimer .cuda()
theta_global = torch.tensor(theta_global, requires_grad=False)
client_thetas = [torch.zeros_like(theta_global) for _ in range(num_clients_stage3)] #supprimer .cuda()
client_hi_s = [torch.zeros_like(theta_global) for _ in range(num_clients_stage3)] #supprimer .cuda()

  theta_global = torch.tensor(theta_global, requires_grad=False)


In [21]:
# # Run TCT-Stage2
# for round_idx in range(num_rounds_stage2):
#     theta_list = []
#     for i in range(num_clients_stage1_2):
#         theta_hat_update, h_i_client_update = scaffold_update(grad_all[i],
#                                                               target_all[i],
#                                                               client_thetas[i],
#                                                               client_hi_s[i],
#                                                               theta_global,
#                                                               M=args["local_steps_stage2"],
#                                                               lr_local=args["local_lr_stage2"])
#         client_hi_s[i] = h_i_client_update * 1.0
#         client_thetas[i] = theta_hat_update * 1.0
#         theta_list.append(theta_hat_update)

#     # averaging
#     theta_global = torch.zeros_like(theta_list[0]) #supprimer .cuda()
#     for theta_idx in range(num_clients_stage1_2):
#         theta_global += (1.0 / num_clients_stage1_2) * theta_list[theta_idx]

#     # eval on train
#     logits_class_train = torch.cat(grad_all) @ theta_global #supprimer .cuda()
#     _, targets_pred_train = logits_class_train.max(1)
#     train_acc = targets_pred_train.eq(torch.cat(target_all)).sum() / (1.0 * logits_class_train.shape[0]) #supprimer .cuda()
#     # eval on test
#     logits_class_test = grad_eval @ theta_global
#     _, targets_pred_test = logits_class_test.max(1)
#     test_acc = targets_pred_test.eq(target_eval).sum() / (1.0 * logits_class_test.shape[0]) #supprimer .cuda()
#     print('Round %d: train accuracy=%0.5g test accuracy=%0.5g' % (round_idx, train_acc.item(), test_acc.item()))

### Run Stage 3

In [31]:
#hyperparameters
num_rounds_stage3 = args["rounds_stage3"]

In [32]:
#Compute NTK
for i in range(num_clients_stage1_2, num_clients_stage3):
    grad_i, target_onehot_i, target_i = client_compute_eNTK(global_model, train_loader[i])
    grad_all.append(copy.deepcopy(grad_i).cpu())
    target_all.append(copy.deepcopy(target_i).cpu())
    target_onehot_all.append(copy.deepcopy(target_onehot_i).cpu())
    del grad_i
    del target_onehot_i
    del target_i
    torch.cuda.empty_cache()

100%|██████████| 64/64 [00:00<00:00, 664.17it/s]


In [33]:
import os

# Create the directory if it doesn't exist
os.makedirs('./data/dataforplots', exist_ok=True)

# Define the file path
file_path = './data/dataforplots/accuracies_5workers.txt'

# Run TCT-Stage2
accuracies = []
for round_idx in range(num_rounds_stage3):
    theta_list = []
    for i in range(num_clients_stage3):
        theta_hat_update, h_i_client_update = scaffold_update(grad_all[i],
                                                              target_all[i],
                                                              client_thetas[i],
                                                              client_hi_s[i],
                                                              theta_global,
                                                              M=args["local_steps_stage2"],
                                                              lr_local=args["local_lr_stage2"])
        client_hi_s[i] = h_i_client_update * 1.0
        client_thetas[i] = theta_hat_update * 1.0
        theta_list.append(theta_hat_update)

    # averaging
    theta_global = torch.zeros_like(theta_list[0]) #supprimer .cuda()
    for theta_idx in range(num_clients_stage3):
        theta_global += (1.0 / num_clients_stage3) * theta_list[theta_idx]

    # eval on train
    logits_class_train = torch.cat(grad_all) @ theta_global #supprimer .cuda()
    _, targets_pred_train = logits_class_train.max(1)
    train_acc = targets_pred_train.eq(torch.cat(target_all)).sum() / (1.0 * logits_class_train.shape[0]) #supprimer .cuda()
    # eval on test
    logits_class_test = grad_eval @ theta_global
    _, targets_pred_test = logits_class_test.max(1)
    test_acc = targets_pred_test.eq(target_eval).sum() / (1.0 * logits_class_test.shape[0]) #supprimer .cuda()
    print('Round %d: train accuracy=%0.5g test accuracy=%0.5g' % (round_idx, train_acc.item(), test_acc.item()))
    accuracies.append(test_acc.item())

    # Write the accuracy to the text file
    with open(file_path, 'a') as file:
        file.write(f'Round {round_idx}: {test_acc.item()}\n')


Round 0: train accuracy=0.64688 test accuracy=0.64062
Round 1: train accuracy=0.82812 test accuracy=0.875
Round 2: train accuracy=0.8625 test accuracy=0.875
Round 3: train accuracy=0.86875 test accuracy=0.85938
Round 4: train accuracy=0.875 test accuracy=0.875
Round 5: train accuracy=0.89375 test accuracy=0.84375
Round 6: train accuracy=0.89688 test accuracy=0.84375
Round 7: train accuracy=0.90938 test accuracy=0.85938
Round 8: train accuracy=0.91875 test accuracy=0.85938
Round 9: train accuracy=0.92813 test accuracy=0.85938
Round 10: train accuracy=0.94063 test accuracy=0.875
Round 11: train accuracy=0.94687 test accuracy=0.875
Round 12: train accuracy=0.94687 test accuracy=0.89062
Round 13: train accuracy=0.95 test accuracy=0.89062
Round 14: train accuracy=0.95 test accuracy=0.89062
Round 15: train accuracy=0.95 test accuracy=0.875
Round 16: train accuracy=0.95625 test accuracy=0.89062
Round 17: train accuracy=0.9625 test accuracy=0.89062
Round 18: train accuracy=0.96875 test accurac