In [None]:
"""
    Cases:
        Case 0: normal federated learning
        Case 1: baseline, retrain from scratch
        Case 2: method 1: continue train
        Case 3: method 2: PGA
        Case 4: method 3: federaser
        Case 5: method 4: flipping
"""

In [None]:
""" 
    List of settings:
    1. MNIST: 
        - R10, UR5, PR15, OR15
        - R10, UR1, PR15, OR15
        - R50, UR5, PR15, OR15
    2. CIFAR10
        - R20, UR10, PR30, OR30
        - R20, UR2, PR30, OR30
        - R100, UR10, PR30, OR30
    List of experiments:
    1. Accuracy
        - compare case 2 with case 1
        - compare case 3 with case 1
        - compare case 4 with case 1
        - compare case 5 with case 1
    2. Accuracy on the last round before onboarding
    3. Params similarity
    4. Prediction Similarity
    5. Unlearning time
"""

In [None]:
# import modules

import pickle
import matplotlib.pyplot as plt
import os
import numpy as np
import sys
sys.path.insert(0, '..')

from utils.model import get_model
import torch

import pandas as pd


In [None]:
# configs for experiments

configs = {
    "mnist": {
        "num_round": 50,
        "num_unlearn_round": 5,
        "num_post_training_round": 15
    },
    "cifar10": {
        "num_round": 100,
        "num_unlearn_round": 10,
        "num_post_training_round": 30
    },
    "cifar100": {
        "num_round": 100,
        "num_unlearn_round": 10,
        "num_post_training_round": 30
    },
    "dataset": "cifar100"
}

In [None]:
# result structure
res = {}

for k1 in ("train", "val"):
    res[k1] = {}
    for k2 in ("loss", "acc"):
        res[k1][k2] = {}
        res[k1][k2]["avg"] = []
        res[k1][k2]["clean"] = []
        res[k1][k2]["backdoor"] = []
        for k3 in range(5):
            res[k1][k2][k3] = []

# or, for better visualization, this is the architecture of res

res = {
    "train": {
        "loss": {
            "avg": [],
            "clean": [],
            "backdoor": [],
            0: [],
            1: [],
            2: [],
            3: [],
            4: []
        },
        "acc": {
            "avg": [],
            "clean": [],
            "backdoor": [],
            0: [],
            1: [],
            2: [],
            3: [],
            4: []
        }
    },
    "val": {
        "loss": {
            "avg": [],
            "clean": [],
            "backdoor": [],
            0: [],
            1: [],
            2: [],
            3: [],
            4: []
        },
        "acc": {
            "avg": [],
            "clean": [],
            "backdoor": [],
            0: [],
            1: [],
            2: [],
            3: [],
            4: []
        }
    }
}

In [None]:
paths = os.listdir("with_onboarding")

In [None]:
def to_csv(X,Ys, filename, is_cuda = False):
    df = pd.DataFrame({
        X["label"]: X["value"],
    })

    if is_cuda:
        for label, Y in Ys.items():
            df[label] = [y.cpu().item() for y in Y]
    else:
        for label, Y in Ys.items():
            df[label] = Y

    df.to_csv("csvs/" + filename, index = False)

In [None]:
def load_gen(filename, type="acc"):
    with open(filename, 'rb') as fp:
        data = pickle.load(fp)['val'][type]
        return data


onboarding = True
num_onboarding_rounds = 30


if onboarding:
    folder = "with_onboarding/"
else:
    folder = "without_onboarding/"


name = {
    "case0": "normal",
    "case1": "Retrain",
    "case2": "Continue to Train",
    "case3": "PGA",
    "case4": "FedEraser",
    "case5": "Flipping"
}


def show_result(path, methods=[1, 2, 3, 4], is_marked=False):

    markers = ["", "bo--", "gx--", "m^-", "c+-", "r>-", "y<-", "ks-", "yd-"]

    num_rounds = 0

    for i in [3, 4, 5]:
        temp = 0
        if i == 3:
            temp = int(path.split("_")[i][1:])
        else:
            temp = int(path.split("_")[i][2:])

        num_rounds += temp

    num_rounds += num_onboarding_rounds    

    fl_rounds = [i for i in range(1, num_rounds + 1)]

    filename_baseline = f"case0_{path}"
    baseline = load_gen(folder + filename_baseline)

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            data = load_gen(folder + filename)
        except:
            continue
        case = f"case{i}"

        if i != 1:
            clean_data = baseline["clean"] + data["clean"]
            backdoor_data = baseline["backdoor"] + data["backdoor"]
        else:
            clean_data = data["clean"]
            backdoor_data = data["backdoor"]

        to_csv( 
            {
                "label": "Rounds",
                "value": fl_rounds
            },
            {
                "clean_data": clean_data,
                "backdoor_data": backdoor_data
            },
            f"exp1_accuracy/{configs['dataset']}_case{i}_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}.csv"
        )

        if is_marked:
            plt.plot(fl_rounds, clean_data, markers[2*i-1], label=f"{name[case]} clean")
            plt.plot(fl_rounds, backdoor_data, markers[2*i], label=f"{name[case]} backdoor")
        else:
            plt.plot(fl_rounds, clean_data, label=f"{name[case]} clean")
            plt.plot(fl_rounds, backdoor_data, label=f"{name[case]} backdoor")

    plt.xlabel('Rounds')
    plt.ylabel('Accuracy')
    plt.locator_params(axis="x", integer=True)
    plt.grid()
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

    method_string = ""
    for i in methods:
        method_string += str(i)

    plt.savefig(f"plot/{configs['dataset']}/{path[:-4]}_M{method_string}.png", dpi=1200, bbox_inches='tight')
    
    plt.show()


In [None]:
def show_result_all(path, methods=[1, 2, 3, 4, 5], is_clean = True, is_marked=True):

    markers = ["", "^", "s", "<", "o", "v"]
    colors = ["", "b", "orange", "g", "r", "k"]

    num_rounds = 0

    for i in [3, 4, 5]:
        temp = 0
        if i == 3:
            temp = int(path.split("_")[i][1:])
        else:
            temp = int(path.split("_")[i][2:])

        num_rounds += temp

    num_rounds += num_onboarding_rounds    

    fl_rounds = [i for i in range(1, num_rounds + 1)]

    filename_baseline = f"case0_{path}"
    baseline = load_gen(folder + filename_baseline)

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            data = load_gen(folder + filename)
        except:
            continue
        case = f"case{i}"

        if is_clean:
            if i != 1:
                clean_data = baseline["clean"] + data["clean"]
                # backdoor_data = baseline["backdoor"] + data["backdoor"]
            else:
                clean_data = data["clean"]
                # backdoor_data = data["backdoor"]

            if is_marked:
                plt.plot(fl_rounds, clean_data, marker = markers[i], markevery= 10, color = colors[i], label=f"{name[case]}")
                # plt.plot(fl_rounds, backdoor_data, markers[2*i], label=f"{name[case]} backdoor")
            else:
                plt.plot(fl_rounds, clean_data, color = colors[i], label=f"{name[case]}")
                # plt.plot(fl_rounds, backdoor_data, label=f"{name[case]} backdoor")
        else:
            if i != 1:
                # clean_data = baseline["clean"] + data["clean"]
                backdoor_data = baseline["backdoor"] + data["backdoor"]
            else:
                # clean_data = data["clean"]
                backdoor_data = data["backdoor"]

            if is_marked:
                # plt.plot(fl_rounds, clean_data, markers[2*i-1], label=f"{name[case]} clean")
                plt.plot(fl_rounds, backdoor_data, marker = markers[i], markevery=10, color = colors[i], label=f"{name[case]}")
            else:
                # plt.plot(fl_rounds, clean_data, label=f"{name[case]} clean")
                plt.plot(fl_rounds, backdoor_data, color = colors[i], label=f"{name[case]}")

    plt.xlabel('Rounds')
    plt.ylabel('Accuracy')
    plt.locator_params(axis="x", integer=True)
    plt.grid()
    plt.legend(loc='best')

    method_string = ""
    for i in methods:
        method_string += str(i)

    type = ""
    if is_clean:
        type = "clean"
    else:
        type = "backdoor"

    plt.savefig(f"plot/{configs['dataset']}/{path[:-4]}_M{method_string}_{type}.png", dpi=1200, bbox_inches='tight')
    plt.savefig(f"plot/{configs['dataset']}/{path[:-4]}_M{method_string}_{type}.pdf", dpi=1200, bbox_inches='tight')
    
    plt.show()


In [None]:
# 1. Accuracy of all methods

path = f"{configs['dataset']}_C5_BS128_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}_E1_LR0.01.pkl"

show_result_all(path, is_clean=False)

In [None]:
def show_last_round_result_before_onboarding(path, methods=[1, 2, 3, 4]):
    filename_baseline = f"case0_{path}"
    baseline = load_gen(folder + filename_baseline)

    clean_data = []
    backdoor_data = []

    clean_labels = []
    backdoor_labels = []

    method_names = [name[f"case{i}"] for i in methods]
    x_axis = np.arange(len(method_names))

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            data = load_gen(folder + filename)
        except:
            continue
        case = f"case{i}"


        clean_data.append(data["clean"][-configs[configs["dataset"]]["num_post_training_round"]-1])
        backdoor_data.append(data["backdoor"][-configs[configs["dataset"]]["num_post_training_round"]-1])

        clean_label = f"{name[case]} clean"
        backdoor_label = f"{name[case]} backdoor"
        clean_labels.append(clean_label)
        backdoor_labels.append(backdoor_label)

    plt.bar(x_axis-0.2, clean_data, 0.4, label="clean")
    plt.bar(x_axis+0.2, backdoor_data, 0.4, label="backdoor")

    plt.xticks(x_axis, method_names)
    plt.xlabel('Methods')
    plt.ylabel('Accuracy')
    plt.title("Last Round Accuracy")
    plt.grid()
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    
    plt.show()

In [None]:
# 2. Last round accuracy

"""
    This cell is to run the first experiment: accuracy on the last round before onboarding
"""

path = f"{configs['dataset']}_C5_BS128_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}_E1_LR0.01.pkl"

show_last_round_result_before_onboarding(path, methods=[1, 2, 3, 4, 5])

In [None]:
def show_numerical_result(path, methods=[1, 2, 3, 4], dataset = "mnist"):
    filename_baseline = f"case0_{path}"
    baseline = load_gen(folder + filename_baseline)

    clean_data = []
    backdoor_data = []

    clean_labels = []
    backdoor_labels = []

    # method_names = [name[f"case{i}"] for i in methods]
    # x_axis = np.arange(len(method_names))

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            data = load_gen(folder + filename)
        except:
            continue

        case = f"case{i}"

        # clean_data.append(data["clean"][-1])
        # backdoor_data.append(data["backdoor"][-1])

        clean_label = f"{name[case]} clean"
        # clean_labels.append(clean_label)
        # backdoor_labels.append(backdoor_label)

        print(clean_label)
        
        res_str = ""

        if i == 1:
            # i=1: Continue train
            res_str += f"{data['clean'][configs[dataset]['num_round'] - 1]} & {data['backdoor'][configs[dataset]['num_round'] - 1]} & "
            res_str += f"{data['clean'][configs[dataset]['num_round']]} & {data['backdoor'][configs[dataset]['num_round']]} & "
            res_str += f"{data['clean'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] - 1]} & {data['backdoor'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] - 1]} & "
            res_str += f"{data['clean'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round']]} & {data['backdoor'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round']]} & "
            res_str += f"{data['clean'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round'] - 1]} & {data['backdoor'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round'] - 1]} & "
            res_str += f"{data['clean'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round']]} & {data['backdoor'][configs[dataset]['num_round'] + configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round']]} & "
            res_str += f"{data['clean'][-1]} & {data['backdoor'][-1]}"
        else:
            res_str += f"{baseline['clean'][configs[dataset]['num_round'] - 1]} & {baseline['backdoor'][configs[dataset]['num_round'] - 1]} & "
            res_str += f"{data['clean'][0]} & {data['backdoor'][0]} & "
            res_str += f"{data['clean'][configs[dataset]['num_unlearn_round'] - 1]} & {data['backdoor'][configs[dataset]['num_unlearn_round'] - 1]} & "
            res_str += f"{data['clean'][configs[dataset]['num_unlearn_round']]} & {data['backdoor'][configs[dataset]['num_unlearn_round']]} & "
            res_str += f"{data['clean'][configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round'] - 1]} & {data['backdoor'][configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round'] - 1]} & "
            res_str += f"{data['clean'][configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round']]} & {data['backdoor'][configs[dataset]['num_unlearn_round'] + configs[dataset]['num_post_training_round']]} & "
            res_str += f"{data['clean'][-1]} & {data['backdoor'][-1]}"


        print(res_str)
        

In [None]:
path = f"{configs['dataset']}_C5_BS128_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}_E1_LR0.01.pkl"
show_numerical_result(path, methods=[1, 2, 3, 4, 5], dataset = configs['dataset'])

In [None]:
def load_time(filename, type="acc"):
    with open(filename, 'rb') as fp:
        data = pickle.load(fp)["time"]
        return data



def show_time(path, methods):
    num_rounds = 0
    for i in [3, 4, 5]:
        temp = 0
        if i == 3:
            temp = int(path.split("_")[i][1:])
        else:
            temp = int(path.split("_")[i][2:])
        num_rounds += temp

    num_rounds += num_onboarding_rounds

    fl_rounds = [i for i in range(1, num_rounds + 1)]

    method_names = [name[f"case{i}"] for i in methods]
    x_axis = np.arange(len(method_names))

    retrain_time = 0
    factors = []

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            time = load_time(folder + filename)
        except:
            print(filename)
            continue

        if i == 1:
            retrain_time = time

        factor = time/retrain_time

        factors.append(factor)

    # print(method_names)
    # print(factors)
    plt.bar(method_names, factors)
    plt.ylabel('Unit')
    plt.grid()
    #plt.locator_params(axis="x", integer=True)
    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

In [None]:
# 5. Unlearning time

"""
    This cell is to run the fifth experiment: measuring unlearning time
"""

path = f"{configs['dataset']}_C5_BS128_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}_E1_LR0.01.pkl"
show_time(path, methods=[1, 2, 3, 4, 5])

In [None]:
def show_time_detail(path, methods):
    num_rounds = 0
    for i in [3, 4, 5]:
        temp = 0
        if i == 3:
            temp = int(path.split("_")[i][1:])
        else:
            temp = int(path.split("_")[i][2:])
        num_rounds += temp

    num_rounds += num_onboarding_rounds

    fl_rounds = [i for i in range(1, num_rounds + 1)]

    method_names = [name[f"case{i}"] for i in methods]
    x_axis = np.arange(len(method_names))

    retrain_time = 0
    factors = []

    for i in methods:
        filename = f"case{i}_{path}"
        try:
            time = load_time(folder + filename)
        except:
            print(filename)
            continue

        if i == 1:
            retrain_time = time

        factor = time/retrain_time

        factors.append(factor)

        print(f"{method_names[i-1]}: {time:.2f}({(retrain_time / time):.2f}x)")


In [None]:
# 5. Unlearning time

"""
    This cell is to run the fifth experiment: measuring unlearning time
"""

path = f"{configs['dataset']}_C5_BS128_R{configs[configs['dataset']]['num_round']}_UR{configs[configs['dataset']]['num_unlearn_round']}_PR{configs[configs['dataset']]['num_post_training_round']}_E1_LR0.01.pkl"
show_time_detail(path, methods=[1, 2, 3, 4, 5])

In [None]:
args = {
    "dataset": configs['dataset'],
    "num_clients": 5,
    "batch_size": 128,
    "num_rounds": configs[configs['dataset']]['num_round'],
    "num_unlearn_rounds": configs[configs['dataset']]['num_unlearn_round'],
    "num_post_training_rounds": configs[configs['dataset']]['num_post_training_round'],
    "local_epochs": 1,
    "lr": 0.01,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "poisoned_percent": 0.9
}

In [None]:
def load_model(path):
    model = get_model(args, plotting=True)
    model.load_state_dict(torch.load(path))

    return model

In [None]:
# load the baseline model after learning phase
case = 2

path = f"../results/models/case1/case1_{args['dataset']}_C{args['num_clients']}_BS{args['batch_size']}_R{args['num_rounds']}_UR{args['num_unlearn_rounds']}_PR{args['num_post_training_rounds']}_E{args['local_epochs']}_LR{args['lr']}_round{args['num_rounds'] - 1}.pt"
baseline_model = load_model(path)

# path2 = f"../results/models/case{case}/case{case}_{args['dataset']}_C{args['num_clients']}_BS{args['batch_size']}_R{args['num_rounds']}_UR{args['num_unlearn_rounds']}_PR{args['num_post_training_rounds']}_E{args['local_epochs']}_LR{args['lr']}_round{args['num_rounds']}.pt"
# model2 = load_model(path2)

In [None]:
def plot_diff(X, Y, title):
    Y = [y.cpu().numpy() for y in Y]
    
    plt.plot(X, Y)

    plt.xlabel('Rounds')
    plt.ylabel('Difference')

    plt.xticks(np.arange(min(X), max(X)+1, len(X) // 10))
    
    plt.title(title)
    
    plt.show()

In [None]:
def compare_prediction(model1, model2, data_loader):
    model1.eval()
    model2.eval()

    output1s = torch.tensor([])
    output2s = torch.tensor([])

    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(args["device"])
            target = target.to(args["device"])

            output1 = model1(data).argmax(dim=1).detach().cpu().float()
            output2 = model2(data).argmax(dim=1).detach().cpu().float()

            output1s = torch.cat((output1s, output1))
            output2s = torch.cat((output2s, output2))
    
    
    cos = torch.nn.CosineSimilarity(dim=0, eps=1e-9)
    return cos(output1s, output2s)



In [None]:
from utils.dataloader import get_loaders
train_loaders, test_loader, test_loader_poison = get_loaders(args, plotting=True)


markers = ["", "", "^", "s", "<", "o", "v"]
colors = ["", "", "b", "orange", "g", "r", "k"]

for case in [2,3,4,5]:
    X = []
    Y = []

    for i in range(args['num_rounds'], args['num_rounds'] + args['num_unlearn_rounds'] + args['num_post_training_rounds']):
        path = f"../results/models/case{case}/case{case}_{args['dataset']}_C{args['num_clients']}_BS{args['batch_size']}_R{args['num_rounds']}_UR{args['num_unlearn_rounds']}_PR{args['num_post_training_rounds']}_E{args['local_epochs']}_LR{args['lr']}_round{i}.pt"
        unlearned_model = load_model(path)

        cos_sim = compare_prediction(unlearned_model, baseline_model, test_loader)
        # print(cos_sim)
        X.append(i)
        Y.append(cos_sim)

    
    Y = [y.cpu().numpy() for y in Y]
    
    case_name = f"case{case}"

    plt.plot(X, Y, marker = markers[case], markevery= 10, color = colors[case], label=f"{name[case_name]}")
    

plt.xticks(np.arange(min(X), max(X)+1, len(X) // 10))

plt.xlabel('Rounds')
plt.ylabel('Cosine Similarity')

plt.grid()
plt.legend(loc='best')


plt.savefig(f"plot/{configs['dataset']}/Cosine_Similarity.png", dpi=1200, bbox_inches='tight')
plt.savefig(f"plot/{configs['dataset']}/Cosine_Similarity.pdf", dpi=1200, bbox_inches='tight')

plt.show()