In [None]:
import re
import os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
import numpy as np
import json
import seaborn as sns
sns.set(style="whitegrid")

In [None]:
# The methods you used in your experiments
methods = ["SGD", "DPSGD", "DPSGD_noise", "DPSGD_clip"]

methods_to_colours = {
    "SGD" : "#66c2a5",
    "DPSGD" : "#fc8d62",
    "DPSGD_n3" : "#8da0cb",
    "DPSGD_noise" : "#e78ac3",
    "DPSGD_noise5" : "#e5c494",
    "DPSGD_noise10" : "#b3b3b3",
    "DPSGD_clip" : "#e5c494",
    "DPSGDGA" : "#ffd92f",
    "DPSGDG" : "#3633FF"
}

# The types of attacks
attacks = {
    "pgd" : "linf",
    "fgsm" : "linf", 
    "pgdl2" : "l2",
    "deepfooll2" : "l2",
    "deepfoollinf" : "linf",
#     "boundary" : "l2",
}

The following method is for reading robustness per method. You need the robustness results by setting `--config valid_metrics='accuracy','robustness_succ'` and then pass the path of experiment results to `read_robustness_per_method`. For example, you have a the following folder which contains the results for different methods:
```
├──FoolBox_exps:
    ├──mnist_DPSGD,
    ├──mnist_DPSGD_clip,
    ├──mnist_DPSGDA,
    ├──mnist_DPSGD_n3,
    ├──mnist_DPSGD_noise,
    ├──mnist_SGD
```
Then you can parse all the results and plot them
```
df_ra = read_robustness_per_method(path_to_FoolBox_exps, root_dir=None, methods=methods)
plot_epoch_robustness_per_gamma(df=df_ra, epoch=-1, methods=methods) # You can also specify a particular attack
plot_robustness_per_epoch_per_gamma(df=df_ra, methods=methods, attacks=["deepfooll2", "deepfoollinf"])
plot_epoch_attack_norm_per_gamma(df=df_ra, attack="DeepFool", methods=methods, epoch=-1)
```

In [None]:
# pass attacks as dict with atk : norm

# if robustness_acc only on for test_metrics: returns dictionary, method : list of length 1
# if robustness_acc only on for valid_metrics: returns dictionary, method : list of length max_epochs + 1 or max_epochs
# (accounting for if we do evalution before training)
# if robustness_acc on for both valid_metrics and test_metrics: returns dictionary, method : list of length max_epochs + 2 or max_epochs + 1

def read_robustness_per_method(dir_name, root_dir=None, methods=methods, attacks=attacks):
    robustness_per_method = {}
    for method in methods:
        if root_dir is None:
            output_path = os.path.join(dir_name, f"mnist_{method}", "stdout")
            config_path = os.path.join(dir_name, f"mnist_{method}", "config.json")
        else:
            output_path = os.path.join(root_dir, dir_name, f"mnist_{method}", "stdout")
            config_path = os.path.join(root_dir, dir_name, f"mnist_{method}", "config.json")
        config_file = open(config_path)
        config = json.load(config_file)
        config_file.close()
        config = {k.lower() : config[k] for k in config.keys()}

        # get gammas_l2, gammas_linf keys
        gammas_l2 = config["gammas_l2"]
        gammas_linf = config["gammas_linf"]

        method_robustness_per_epoch = []
        counter = 0

        with open(output_path, 'r') as f:
            for line in f:
                if re.search(r".robustness_succ", line) and (re.match("Validating", line) or re.match("Testing", line)):
                    index = pd.MultiIndex.from_tuples([], names=("attack", "gamma"))
                    robustness_per_attack = pd.DataFrame(columns=["acc", "l2", "linf", "l2_miss", "linf_miss"], index=index)
                    method_robustness_per_epoch.append(robustness_per_attack) # at index counter
                    counter += 1

                if re.match(r'adv robust succ on.*', line):
                    metric = re.match(r'adv robust succ on (.*): \[(.*)\], l2 norm: \[(.*)\], linfty norm: \[(.*)\], l2 norm miss: \[(.*)\], linfty norm miss: \[(.*)\]', line)
                    attack = metric.group(1)
                    if not attack in attacks:
                        continue

                    def str_to_list(s, r=5):
                        return [round(float(x),r) for x in s.replace(" ","").split(",")]

                    (acc, l2_norm, linf_norm, l2_norm_miss, linf_norm_miss) = (str_to_list(metric.group(i)) for i in range(2,6+1))

                    # get norm, gamma based on attack
                    norm = attacks[attack]
                    if norm == "linf":
                        gammas = gammas_linf
                    else: gammas = gammas_l2

                    for g, gamma in enumerate(gammas):
                        # convert from adversarial success to adversarial accuracy for consistency
                        row = [1-acc[g], l2_norm[g], linf_norm[g], l2_norm_miss[g], linf_norm_miss[g]]
                        method_robustness_per_epoch[counter - 1].loc[(attack,gamma),:] = row
                        
        robustness_per_method[method] = method_robustness_per_epoch
    return robustness_per_method

In [None]:
# input: df = LeNet, dict: method : df
# columns= ["acc", "l2", "linf", "l2_miss", "linf_miss"]
# index = ["attack", "gamma"]
def plot_epoch_robustness_per_gamma(df=ra_LeNet, methods=methods, attacks=attacks, epoch=-1):
    fig, axs = plt.subplots(1, len(attacks), figsize=(20,5), sharey=True, dpi=75)

    for i,(attack,norm) in enumerate(attacks.items()):
        col = norm
        if attack == "deepfooll2": col = "l2_miss"
        elif attack == "deepfoollinf": col = "linf_miss"

        for method in methods:
            df_method = df[method][epoch]
            # check attack exists for method at this epoch
            if not attack in df_method.index:
                continue
            df_method = df[method][epoch].loc[attack]
            x = list(df_method[col])
            y = list(df_method["acc"])
            axs[i].plot(x,y,"o-",c=methods_to_colours[method], alpha=0.7, label=method)

        axs[i].set_xlabel(f"{norm} norm")
        axs[i].set_title(attack)
        if i == 0: 
            axs[i].set_ylabel("Adversarial accuracy")
            axs[i].legend()

    fig.suptitle("Adv acc on LeNet, varying adv strength")
    fig.tight_layout()

In [None]:
def plot_robustness_per_epoch_per_gamma(df=ra_LeNet, methods=methods, attacks=attacks, column="acc"):
    gammas_per_attack = [df[methods[0]][0].loc[attack].index.tolist() for attack in attacks]
    n = max([len(gammas) for gammas in gammas_per_attack])
    m = len(attacks)

    # TODO: make figsize depend on n,m
    fig, axs = plt.subplots(n, m, figsize=(8,20), dpi=100, sharex=True)

    for i, attack in enumerate(attacks):
        gammas = gammas_per_attack[i]
        for k, gamma in enumerate(gammas):

            for j, method in enumerate(methods):
                data = [epoch[column].loc[attack,gamma] for epoch in df[method]]
                axs[k][i].plot(data, label=method, c=methods_to_colours[method], alpha=0.7)

            if k == 0:
                axs[k][i].set_title(attack)
            if k == len(gammas) - 1:
                axs[k][i].legend()

            axs[k][i].set_ylabel(f"Adv. {column} @ {gamma}")
            if k == len(gammas) - 1: axs[k][i].set_xlabel("Epochs") # doesnt plot for all rows
# NOTE: might plot final robustness twice, if it is evaluated in both valid_metrics and test_metrics
    fig.suptitle(f"{column} over training")
    fig.tight_layout()

In [None]:
# input: df = LeNet, dict: method : df
# columns= ["acc", "l2", "linf", "l2_miss", "linf_miss"]
# index = ["attack", "gamma"]
def plot_epoch_attack_norm_per_gamma(df=ra_LeNet, attack="DeepFool", methods=methods, epoch=-1):
    if attack == "DeepFool":
        attacks = ["deepfooll2","deepfoollinf"]
    else:
        attacks = [attack]
    
    fig, axs = plt.subplots(1, 2, figsize=(10,5), dpi=75)
    axs2 = [ax.twinx() for ax in axs]
    for ax in axs2:
        ax.set_ylim((0,1))
        ax.set_ylabel("Adv acc")
        ax.grid(False)

    for i,iattack in enumerate(attacks):
        # TODO: use this using the attack dictionary
        if iattack == "deepfooll2": norm = "l2"
        elif iattack == "deepfoollinf": norm = "linf"
        else: norm = "l2" # NOTE: assuming this is boundary attack
        col = f"{norm}_miss"

        for j,method in enumerate(methods):
            df_method = df[method][epoch].loc[iattack]
            y = df_method[col]
            axs[i].plot(y,"o-",c=methods_to_colours[method],label=method)
            axs2[i].plot(df_method["acc"],"o--",c=methods_to_colours[method], alpha=0.5,label=method)

        axs[i].set_xlabel("gamma")
        axs[i].set_title(iattack)
        axs[i].set_ylabel(f"Perturbation norm {norm}")
        axs[i].legend()

    fig.suptitle(f"Distance to decision boundary ({attack}), LeNet, varying adv strength")
    fig.tight_layout()

The following method is for reading and plotting accuracy or loss metric

In [None]:
def plot_metric_per_method(metric, dir_name, root_dir=None, methods=methods):
    # ACCURACY
    if metric == "accuracy": # evaluated on validation = test set
        test_loss_per_method = {}
        for method in methods:
            if root_dir is None:
                output_path = os.path.join(dir_name, f"mnist_{method}", "stdout")
            else:
                output_path = os.path.join(root_dir, dir_name, f"mnist_{method}", "stdout")
            epoch = 0
            test_loss = []
            with open(output_path, 'r') as f:
                for line in f:
                    if re.match(r'Validation\saccuracy.*', line):
                        metric = re.match(r'Validation\saccuracy:\s(.*)', line).group(1)
                        test_loss.append(round(float(metric), 4)) 
                        epoch += 1
            test_loss_per_method[method] = test_loss
        for method in methods:
            plt.plot(test_loss_per_method[method], label=method, c=methods_to_colours[method], alpha=0.7)
        plt.legend()
        plt.title("Validation accuracy")
        plt.xlabel("Epochs")
        plt.ylabel("Accuracy")
        return test_loss_per_method

    # LOSS
    if metric == "loss": # evaluated on training set
        train_loss_per_method = {}
        for method in methods:
            if root_dir is None:
                output_path = os.path.join(dir_name, f"mnist_{method}", "train_loss_per_epochs.csv")
            else:
                output_path = os.path.join(root_dir, dir_name, f"mnist_{method}", "train_loss_per_epochs.csv")
            loss = pd.read_csv(output_path).train_loss.tolist()
            train_loss_per_method[method] = loss
        for method in methods:
            plt.plot(train_loss_per_method[method], label=method, c=methods_to_colours[method], alpha=0.5)
        plt.legend()
        plt.title("Train loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        return train_loss_per_method

    return (f"Invalid metric {metric}")


The following mehtods are only for reading different CSV files and plotting them

In [None]:
def plot_max_mean_gradient(dir_name, methods):
    fig, axs = plt.subplots(1, 2, figsize=(10,5), dpi=75)
    
    for method in methods:
        max_grad_path = os.path.join(dir_name, f"mnist_{method}", "max_grad_norms_per_epochs.csv")
        avg_grad_path = os.path.join(dir_name, f"mnist_{method}", "avg_grad_norms_per_epochs.csv")
        
        max_grad_df = pd.read_csv(max_grad_path)["max_grads"]
        avg_grad_df = pd.read_csv(avg_grad_path)["ave_grads"]
        
        axs[0].plot(max_grad_df, label=method, c=methods_to_colours[method], alpha=0.7)
        axs[1].plot(avg_grad_df, label=method, c=methods_to_colours[method], alpha=0.7)
        
    axs[0].set_ylabel("max grad")
    axs[0].set_xlabel("epoch")
    axs[0].set_title("Max Gradient")
    axs[1].set_ylabel("avg grad")
    axs[1].set_xlabel("epoch")
    axs[1].set_title("Average Gradient")
    axs[0].legend()

In [None]:
def plot_norm_of_avg_grad(dir_name, methods):
    for method in methods:
        norm_avg_grad_path = os.path.join(dir_name, f"mnist_{method}", "norm_avg_grad_per_epochs.csv")
        norm_avg_grad_df = pd.read_csv(norm_avg_grad_path)["gorm_avg_grad"]
        plt.plot(norm_avg_grad_df, label=method, c=methods_to_colours[method], alpha=0.7)
    plt.ylabel("grad norm")
    plt.xlabel("epoch")
    plt.title("Norm of Average Gradient")
    plt.legend()

### Hessian, grad values over training

At some iteration  𝑖 , we have batch  𝐵  and for some  𝑥∈𝐵 , we compute  ∇𝑥(ℓ(𝑓𝜃(𝑥)),𝑦)  and  ∇2𝑥(ℓ(𝑓𝜃(𝑥)),𝑦) . Then the grad_term is  ‖∇𝑥(ℓ(𝑓𝜃(𝑥)),𝑦)‖2  and the Hessian term is  𝜆max(∇2𝑥(ℓ(𝑓𝜃(𝑥)),𝑦)) . For each epoch and last batch (on training set), we compute the average of the grad term and hessian term, and similarly with the max.

2012.07985 computes Hessian (for final model) for 100 random test samples and shows 15 largers eigenvalues (presumed averaged over the 100 samples). They see that DPSGD largest eigenvalues are up to 100x larger than SGD largest eigenvalues.

In [None]:
def get_exp_adv_loss(dir_name, root_dir=None, methods=methods):
    exp_adv_loss_per_method = {}
    for method in methods:
        if root_dir is None:
            adv_loss_path = os.path.join(dir_name, f"mnist_{method}", "expected_adversarial_loss_per_epochs.csv")
        else:
            adv_loss_path = os.path.join(root_dir, dir_name, f"mnist_{method}", "expected_adversarial_loss_per_epochs.csv")
        df = pd.read_csv(adv_loss_path)
        df.drop(columns="batch", inplace=True)
        df.set_index("epoch", inplace=True) # NOTE: assumign eval only happens for one batch in epoch
        exp_adv_loss_per_method[method] = df

    return exp_adv_loss_per_method


In [None]:
def plot_grad_hessians(df, methods=methods):
    fig, axs = plt.subplots(1,4,figsize=(18,5),dpi=75, sharex=True) # len(columns), len(methods)

    for j, method in enumerate(methods): # pass
        for i,column in enumerate(df[method].columns):        
            axs[i].set_title(column)
            axs[i].set_ylabel(column)
            axs[i].set_xlabel("Epochs")
            axs[i].plot(df[method][column], 'o', c=methods_to_colours[method], alpha=0.8, label=method)#, color=colors[j])
            # line of best fit:
            #x = df[method][column].index.values
            #y = df[method][column].values
            #axs[i].plot(x, np.poly1d(np.polyfit(x, y, 1))(x), color=colors[j])
            if i == 0: axs[i].legend()
    
    fig.suptitle("Hessian,grad values over training;")
    fig.tight_layout()

In [None]:
hg = get_exp_adv_loss("path to directory", root_dir="", methods=methods)
plot_grad_hessians(df, methods)

In [None]:
def get_exp_max_lambda(dir_name, root_dir=None, methods=methods):
    exp_max_lambda = {}
    for method in methods:
        for seed in range(3):
            if root_dir is None:
                adv_loss_path = os.path.join(dir_name, f"{seed}/mnist_{method}", "params_max_eigenval_per_epochs.csv")
            else:
                adv_loss_path = os.path.join(root_dir, dir_name, f"{seed}/mnist_{method}", "params_max_eigenval_per_epochs.csv")
            df = pd.read_csv(adv_loss_path)['max_eigenval']
#             df.drop(columns="batch", inplace=True)
        df.set_index("epoch", inplace=True) # NOTE: assumign eval only happens for one batch in epoch
        exp_max_lambda[method] = df

    return exp_max_lambda

In [None]:
def plot_param_lambda(df=lambda_LeNet, methods=methods):

    for j, method in enumerate(methods): # pass
        for i,column in enumerate(df[method].columns):
            x = df[method][column].index.values
            y = df[method][column].values
            plt.plot(x, y, label=method, c=methods_to_colours[method], alpha=0.5)

    plt.legend()
    plt.title("Parameters' max eigenvalues over training; LeNet")
    plt.xlabel("Epochs")