In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import random
import numpy as np
from tqdm import tqdm
import copy
import matplotlib.pyplot as plt
import time
import timeit

import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset  
from torch.utils.data import TensorDataset

In [None]:
args = {
    "batch_size" : 64,
    "num_clients": 2,
    "frac": 1,
    "ep_local": 2,
    "bs_local": 10,
    "epochs": 2,
    "dataset": "CIFAR10",
    "model": "CNN",
    "iid": "iid",
    
    # Unlearning params
    "unlearned_clients": [0], 
    "t": 2, # calibration_interval,
    "r": 0.5 # local_calibration_epoch_ratio r = Ecali/Eloc
    
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
path = '/kaggle/input/challenges-in-representation-learning-facial-expression-recognition-challenge/'
os.listdir(path)

In [None]:
data = pd.read_csv(path+'icml_face_data.csv')

In [None]:
data.head()

In [None]:
def prepare_data(data):
    """ Prepare data for modeling 
        input: data frame with labels und pixel data
        output: image and label array """
    
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['emotion'])))
    
    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, ' pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image
        
    return image_array, image_label

In [None]:
data[' Usage'].value_counts()

In [None]:
emotions = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}

In [None]:
train_image_array, train_image_label = prepare_data(data[data[' Usage']=='Training'])
val_image_array, val_image_label = prepare_data(data[data[' Usage']=='PrivateTest'])
test_image_array, test_image_label = prepare_data(data[data[' Usage']=='PublicTest'])

In [None]:
print(type(train_image_array))
train_image_array.shape

In [None]:
#reshape and scale
train_images = train_image_array.reshape((train_image_array.shape[0], 1, 48, 48))
train_images = train_images.astype('float32')/255
val_images = val_image_array.reshape((val_image_array.shape[0], 1, 48, 48))
val_images = val_images.astype('float32')/255
test_images = test_image_array.reshape((test_image_array.shape[0], 1, 48, 48))
test_images = test_images.astype('float32')/255

In [None]:
print(type(train_images))
train_images.shape

In [None]:
train_labels = train_image_label
val_labels = val_image_label
test_labels = test_image_label

In [None]:
#Convert to tensor
train_images = torch.from_numpy(train_images)
train_labels = torch.from_numpy(train_labels)
val_images = torch.from_numpy(val_images)
val_labels = torch.from_numpy(val_labels)
test_images = torch.from_numpy(test_images)
test_labels = torch.from_numpy(test_labels)

In [None]:
type(train_labels)
train_labels[1]

In [None]:
# to dataset
dataset_train = TensorDataset(train_images, train_labels)
dataset_val = TensorDataset(val_images, val_labels)
dataset_test = TensorDataset(test_images, test_labels)

In [None]:
type(dataset_train)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args["batch_size"], shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args["batch_size"], shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=args["batch_size"], shuffle=True)

In [None]:
for x, y in val_loader:
    print(x.shape, y.shape)
    break

In [None]:
# Model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(64*10*10, 64)
        self.fc2 = nn.Linear(64, 7)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [None]:
for images, y in val_loader:
    print(x.shape, y.shape)
    break
conv1 = nn.Conv2d(1, 32, 3)
conv2 = nn.Conv2d(32, 64, 3)
pool = nn.MaxPool2d(2, 2)
print(images.shape)
x = conv1(images)
print(x.shape)
x = pool(x)
x = conv2(x)
print(x.shape)
x = pool(x)
print(x.shape)

In [None]:
class DatasetSplit(Dataset):
    def __init__(self, dataset_train, idxs):
        self.dataset_train = dataset_train
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset_train[self.idxs[item]]
        return image, label

In [None]:
# Distribute data - IID
num_items = int(len(dataset_train)/args["num_clients"])
dict_users, all_idxs = {}, [i for i in range(len(dataset_train))]
for i in range(args["num_clients"]):
    dict_users[i] = set(np.random.choice(all_idxs, num_items, replace=False))
    all_idxs = list(set(all_idxs) - dict_users[i])

local_datasets = []
for i in range(args["num_clients"]):
    local_datasets.append(DataLoader(DatasetSplit(dataset_train, dict_users[i]),
                                 batch_size=args["bs_local"], shuffle=True))

In [None]:
len(local_datasets[0])

In [None]:
class Client:
    num_clients = 0
    def __init__(self, data):
        self.id = Client.num_clients
        self.dataloader = data
        self.__model = None
        self.model_record = []
        self.device = device
        self.unlearned = False
        Client.num_clients += 1
        
        
    def setup(self, args):
        self.local_epoch = args["ep_local"]
        
        
    @property
    def unlearned(self):
        return self.__unlearned
    
    
    @unlearned.setter
    def unlearned(self, unlearned):
        self.__unlearned = unlearned
        
        
    @property
    def model(self):
        return self.__model
    
    
    @model.setter
    def model(self, model):
        self.__model = model
        
        
    def client_update(self):
        self.model.train()
        self.model.to(self.device)

        optimizer = optim.SGD(self.model.parameters(), lr=0.01)
        epoch_loss = []
        
        for epoch in range(self.local_epoch):
            batch_loss = []
            
            for batch_idx, (data, labels) in enumerate(self.dataloader):
                data, labels = data.to(self.device), labels.long().to(self.device)
                 
                self.model.zero_grad()
                outputs = self.model(data)
                loss = F.nll_loss(outputs, labels)

                loss.backward()
                optimizer.step() 

                batch_loss.append(loss.item())
            
            #print(f"Train Epoch: {epoch} \tLoss: {loss.item():.6f}")
            epoch_loss.append(sum(batch_loss)/len(batch_loss))

        #return self.model.state_dict(), sum(epoch_loss) / len(epoch_loss)
        return self.model, sum(epoch_loss) / len(epoch_loss)
    
    
    def client_test(self):
        self.model.eval()
        self.model.to(self.device)
        test_loss, correct = 0, 0
        with torch.no_grad():
            for data, labels in self.dataloader:
                data, labels = data.to(self.device), labels.to(self.device)
                outputs = self.model(data)
                test_loss += F.nll_loss(outputs, labels, reduction='sum').item()
                predicted = outputs.argmax(dim=1, keepdim=True)
                correct += predicted.eq(labels.data.view_as(predicted)).long().cpu().sum()


        test_loss /= len(self.dataloader.dataset)
        test_accuracy = correct / len(self.dataloader.dataset)

        print(f"Average loss: {test_loss:.4f}, Accuracy: {100. * test_accuracy:.2f}%")

        return test_accuracy, test_loss

In [None]:
class Server:
    def __init__(self):
        self.round = 0
        self.model = Net().to(device)
        self.w_glob = None
        self.device = device
        
        self.history = {
            "train_loss": [], 
            "test_loss": [], 
            "train_acc": [],
            "test_acc": []
        }
        
        
    def setup(self, args):
        self.train_loader = train_loader
        self.test_loader = test_loader
        
        self.num_clients = args["num_clients"]
        self.num_rounds = args["epochs"]
        self.local_epochs = args["ep_local"]
        self.batch_size = args["batch_size"]
        self.clients = args["clients"]
        self.unlearned_clients = args["unlearned_clients"]
        
        self.local_model_record = [[] for i in range(self.num_clients)]
        self.global_model_record = []
        
        
    def fedAvg(self, w):
        w_avg = copy.deepcopy(w[0])
        for k in w_avg.keys():
            for i in range(1, len(w)):
                w_avg[k] += w[i][k]
            w_avg[k] = torch.div(w_avg[k], len(w))
        self.model
        return w_avg
    
    
    def send_global_model(self):
        for client in self.clients:
            #client.w_local = copy.deepcopy(self.w_glob)
            client.model = copy.deepcopy(self.model)
            client.model_record.append(client.model)
        
        
    def train_global_model(self):
        w_locals = [self.w_glob for i in range(self.num_clients)]
        loss_locals = [0 for i in range(self.num_clients)]
        
        # Send global model to clients
        self.send_global_model()
        print("Send global model to all clients...")
        
        # Train local model
        acc_test_clients, loss_test_clients = [], []
        for id, client in enumerate(self.clients):
            print(f"\nUpdating client {id}...")
            local_model, loss_train_client = client.client_update()
            
            # Save local models
            self.local_model_record[id].append(local_model)
            w_locals.append(local_model.state_dict())
            #print(w_locals)
            loss_locals.append(loss_train_client)
            
            print(f"Evaluating client {id}...")
            acc_test_client, loss_test_client = client.client_test()
            acc_test_clients.append(acc_test_client)
            loss_test_clients.append(loss_test_client)
            
            
        # Fed Aggregation
        w_glob = self.fedAvg(w_locals)
            
        # Update global model
        self.model.load_state_dict(w_glob)
            
        return acc_test_clients, loss_locals
    
    
    def federated_learning(self):
        self.model.train()
        self.w_glob = self.model.state_dict()
        
        print("\tFederated Learning:")
        for round in tqdm(range(self.num_rounds)):
            self.round = round + 1
            print(f"\nRound {self.round}/{self.num_rounds}: Starting...")
            acc_train_clients, loss_train_clients = self.train_global_model()
            self.history["train_acc"].append(100*sum(acc_train_clients)/len(acc_train_clients))
            self.history["train_loss"].append(sum(loss_train_clients)/len(loss_train_clients))
            
            print(f"\nRound {self.round}: Evaluating...")
            # Save test accuracy and loss
            acc_test_server, loss_test_server = self.test_global_model()
            self.history["test_acc"].append(acc_test_server)
            self.history["test_loss"].append(loss_test_server)
            
            print(f"|---- Average Clients Loss: {sum(loss_train_clients) / len(loss_train_clients)}")
            print(f"|---- Average Clients Accuracy: {100*sum(acc_train_clients)/len(acc_train_clients):.2f}%")
            print(f"|---- Server Testing Accuracy: {acc_test_server:.2f}%")
            
            print(f"\nRound {self.round}: Finished!\n")
            print(f"---------------------------------")
            
        self.show_result()
        self.plot(self.history)
        
        
    def show_result(self):
        # Testing data
        acc_test_server, loss_test_server = self.test_global_model()
        print(f' \n Results after {self.num_rounds} global rounds of training:')
        print(f"|---- Testing Accuracy: {acc_test_server:.2f}%")

        # Unlearned client
        print(f"\nUnlearned Clients:")
        for i in self.unlearned_clients:
            acc_test, loss_test = self.clients[i].client_test()
            print(f"|---- Unlearned Client - {i} Accuracy: {100*acc_test:.2f}%")
            
            
    def plot(self, history):
        fig, axs = plt.subplots(2, 1)
        axs[0].plot(history["train_loss"], color="b", label="Training Loss")
        axs[0].plot(history["test_loss"], color='r', label="Testing Loss")
        legend = axs[0].legend(loc="best", shadow=True)
        axs[0].set_xlabel("Communication Rounds")
        axs[0].set_ylabel("Loss")
        
        axs[1].plot(history["train_acc"], color="b", label="Training Accuracy")
        axs[1].plot(history["test_acc"], color='r', label="Testing Accuracy")
        legend = axs[1].legend(loc="best", shadow=True)
        axs[1].set_xlabel("Communication Rounds")
        axs[1].set_ylabel("Accuracy")
        
        
    def test_global_model(self):
        self.model.eval()
        
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, labels in self.test_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                output = self.model(data)
                test_loss += F.nll_loss(output, labels, reduction='sum').item()  
                pred = output.argmax(dim=1, keepdim=True)  
                #correct += pred.eq(target.view_as(pred)).sum().item()
                correct += pred.eq(labels.data.view_as(pred)).long().cpu().sum()


        test_loss /= len(self.test_loader.dataset)
        accuracy = 100. * correct / len(self.test_loader.dataset)

        print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({ 100. * correct / len(test_loader.dataset):.2f}%)\n')

        return accuracy, test_loss
    

In [None]:
# Create clients and distribute the dataset 
# All clients are stored in list clients
args["clients"] = []
for i in range(args["num_clients"]):
    client = Client(local_datasets[i])
    client.setup(args)
    args["clients"].append(client)
    
for unlearned in args["unlearned_clients"]:
    args["clients"][unlearned].unlearned = True

In [None]:
# Create a server
server = Server()
server.setup(args)

In [None]:
server.federated_learning()