In [2]:
import pandas as pd
from pathlib import Path
import json
import logging
from tqdm import tqdm
import yaml
import numpy as np
from math import isnan
from deckard.layers.utils import deckard_nones as nones

from deckard.layers.compile import parse_results



import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from paretoset import paretoset


In [11]:


# Compiled data file
data_file = "mnist/reports/attack.csv"

# Read data
df = pd.read_csv(data_file)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'mnist/reports/attack.csv'

In [9]:

layers = [int(x) for x in df.model_layers.unique()]
layers.sort()
epochs = df["epochs"].unique()
epochs.sort()
attacks = df.atk_gen.unique()
attacks.sort()
defenses = df.def_gen.unique()
defenses.sort()

print(
    f"Number of models: {len(layers)}\n"
    f"Layers: {layers}\n"
    f"Number of epochs: {len(epochs)}\n"
    f"Epochs: {epochs}\n"
    f"Number of attacks: {len(attacks)}\n"
    f"Attacks: {attacks}\n"
    f"Number of defenses: {len(defenses)}\n"
    f"Defenses: {defenses}\n"
)

AttributeError: 'DataFrame' object has no attribute 'model_layers'

In [14]:
sense_dict = {
    "model_layers": "diff",
    "accuracy": "max",
    "data.sample.random_state": "diff",
    "epochs": "diff",
    "model_layers": "diff",
    "atk_gen": "diff",
    "def_gen": "diff",
    "def_param": "diff",
    "atk_param": "diff",
    "adv_fit_time": "min",
    "adv_accuracy": "min",
    "predict_time": "min",
    "train_time": "min",
    "attack.attack_size": "diff",
}

# Average across random states
scorer = "accuracy"


def average_across_random_states(df, scorer, sense_dict):
    sense_dict.pop("data.sample.random_state", None)
    group_list = [k for k, v in sense_dict.items() if v == "diff"]
    group_list_wo_random_state = group_list.copy()
    print(f"Grouping by {group_list_wo_random_state} for {scorer}")
    df[f"mean_{scorer}"] = df.groupby(group_list_wo_random_state)[scorer].transform(
        "mean"
    )
    return df


def drop_poorly_merged_columns(df):
    cols = df.columns
    for col in cols:
        if col.endswith(".1") and col[:-2] in cols:
            df = df.drop(col, axis=1)
    return df


def find_pareto_set_for_graph(df, sense_dict):
    scorers = [k for k, v in sense_dict.items() if v in ["max", "min"]]
    group_list = [k for k, v in sense_dict.items() if v == "diff"]
    group_list_wo_attack = group_list.copy()
    for group in group_list:
        if group in ["atk_gen", "atk_value", "atk_param"]:
            group_list_wo_attack.remove(group)
        elif group.startswith("attack_") or group.startswith("attack_"):
            group_list_wo_attack.remove(group)
        elif group.startswith("adv.") or group.startswith("adv_"):
            group_list_wo_attack.remove(group)
        else:
            continue
    for scorer in scorers:
        scores = df[scorer].fillna(
            df.groupby(group_list_wo_attack)[scorer].transform("mean")
        )
        df[scorer] = scores.fillna(scores.mean())
        df = average_across_random_states(df, scorer, sense_dict)
        value = sense_dict.get(scorer)
        sense_dict.update({f"mean_{scorer}": value})
        del sense_dict[scorer]
    # sub_df = df[[*sense_dict.keys()]]
    # bools = paretoset(sub_df, list(sense_dict.values()))
    # df = df[bools]
    return df


df = find_pareto_set_for_graph(df, sense_dict)


def drop_col_if_no_variance(df):
    drop_these = []
    for col in df.columns:
        if df[col].nunique() == 1:
            drop_these.append(col)
        tmp = df.drop(drop_these, axis=1)
    return tmp


df = drop_poorly_merged_columns(df)

df

KeyError: 'def_param'

In [None]:
sns.lineplot(data=df, y="adv_log_loss", x="model.trainer.nb_epoch", hue="model_layers")

In [None]:
from lifelines import (
    CoxPHFitter,
    KaplanMeierFitter,
    NelsonAalenFitter,
    AalenAdditiveFitter,
    WeibullAFTFitter,
    LogNormalAFTFitter,
    LogLogisticAFTFitter,
    PiecewiseExponentialRegressionFitter,
)


model_dict = {
    "cox": CoxPHFitter,
    # "kaplan_meier" : KaplanMeierFitter,
    # "nelson_aalen" : NelsonAalenFitter,
    # "aalen_additive" : AalenAdditiveFitter,
    "weibull": WeibullAFTFitter,
    "log_normal": LogNormalAFTFitter,
    "log_logistic": LogLogisticAFTFitter,
    # "piecewise_exponential" : PiecewiseExponentialRegressionFitter,
}


def fit_aft_model(df, sense_dict, model_name):
    stratify = [
        "atk_gen",
        "def_gen",
    ]
    subset_df = df.copy()
    subset_df = subset_df.drop(stratify, axis=1)
    model = model_dict[model_name]()
    model.fit(df, duration_col="mean_adv_fit_time", event_col="adv_failures")
    model.print_summary()
    plot = model.plot()
    concordance = model.score(df, scoring_method="concordance_index")
    print(f"Concordance index: {concordance}")
    measured_median = np.median(
        df.mean_adv_fit_time / df["attack.attack_size"] * ((1 - df.adv_failures) / 100)
    )
    print("Measured median attack time:", measured_median)
    modelled_median = np.median(model.predict_median(df, ancillary=df))
    print("Predicted median attack time:", modelled_median)
    score = model.score(df, scoring_method="log_likelihood")
    score_dict = {
        "model": model_name,
        "concordance": concordance,
        "measured_median": measured_median,
        "modelled_median": modelled_median,
        "log_likelihood": score,
    }
    return model, plot, score


models = {}
scores = {}
plots = {}
stratify = ["atk_gen", "def_gen"]
subset_cols = [k for k in sense_dict if k not in stratify]
aft_df = df[subset_cols].copy()
aft_df["adv_failures"] = (1 - df["mean_adv_accuracy"]) * df["attack.attack_size"]
del aft_df["mean_adv_accuracy"]
new_sense_dict = sense_dict.copy()
new_sense_dict.update({"adv_failures": sense_dict["mean_adv_accuracy"]})
new_sense_dict.pop("mean_adv_accuracy", None)
new_sense_dict

for model_name in model_dict:
    print(f"Fitting {model_name} model")
    model, plot, score = fit_aft_model(aft_df, new_sense_dict, model_name)
    models.update({model_name: model})
    scores.update({model_name: score})
    plots.update({model_name: plot})
    plt.xscale("linear")
    plt.show()
    plt.gcf().clear()

# scores = pd.DataFrame.from_dict(scores, orient='index', columns=['score'])

# covariates = [k for k,v in sense_dict.items() if v == 'diff']
# values = [np.array(df[k].unique()) for k in covariates]
# print(f"Number of covariates: {len(covariates)}")
# print(f"Number of values: {len(values)}")
# print(f"Values: \n{[value.tolist() for value in values]}")
# for i in range(len(covariates)):
#     if covariates[i] in stratify:
#         continue
#     else:
#         print(f"Plotting {covariates[i]} with values {values[i]}")
#         graph = model.plot_partial_effects_on_outcome(covariates = covariates[i], values =values[i], cmap='coolwarm', figsize=(10, 10))
#         print(type(graph))

In [None]:
model = models["weibull"]
expectations = model.predict_expectation(df, ancillary=df)
survival_function = model.predict_survival_function(df, ancillary=df)

In [None]:
scores.T

In [None]:
df