# Generalizability experiments
Prerequisites:
* Training of shadow models

## CIFAR
Training of adversaries

In [27]:
import pandas as pd
from cifar_functions import cifar_adversary

def load_and_concat(dist):
    return pd.concat([
            pd.read_csv(f"cifar_new200/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"cifar_new201/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"cifar_new202/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"cifar_new100/data/shadow_model_outputs/{dist}/train.csv")
        ])

dists = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# 9 volle datensätze in einer liste:
all_train = [load_and_concat(dist) for dist in dists]
adv_train = pd.concat(all_train)


In [None]:
adv_train.to_csv("cifar_700_adv_train.csv", index=False)

In [28]:
training_sets = [
    pd.concat([d[:400] for d in all_train]),
    pd.concat([d[100:500] for d in all_train]),
    pd.concat([d[200:600] for d in all_train]),
    pd.concat([d[300:700] for d in all_train]),
    pd.concat([pd.concat([d[500:700] for d in all_train]), pd.concat([d[:200] for d in all_train])])
]

In [29]:
#test data
adv_test = [pd.read_csv(f"cifar_new_new/data/shadow_model_outputs/{dist}/test.csv") for dist in dists]
adv_test = pd.concat(adv_test)
adv_y_test = adv_test["y"]
adv_X_test = adv_test.drop(columns=["y"])

In [30]:
# train adversaries
import keras

adv_no = 4 # 0, 1, 2, 3

curr_adv_train = training_sets[adv_no]

adv_y = curr_adv_train["y"]
adv_X = curr_adv_train.drop(columns=["y"])

checkpoint_filepath = f"cifar/models/manual_tuning_checkpoints_adv{adv_no}/keras.weights.h5"

currentmax = 0.5
for i in range(100): # multiple attempts to train until good test R² is reached
    manual_adversary = cifar_adversary((adv_X.shape[1],))
    history = manual_adversary.fit(
        adv_X,
        adv_y,
        epochs=200,
        validation_data=(adv_X_test, adv_y_test),
        batch_size=18,
        verbose=0,
        callbacks=[
            keras.callbacks.EarlyStopping('val_r2_score', mode='max', patience=20, verbose=1),
            keras.callbacks.ModelCheckpoint(
                filepath=checkpoint_filepath,
                save_weights_only=True,
                monitor='val_r2_score',
                mode='max',
                save_best_only=True)
        ])
    newmax = max(history.history['val_r2_score'])
    print(newmax)
    if newmax > currentmax:
        print(f"new max r2: {newmax}")
        currentmax = newmax
        manual_adversary.load_weights(checkpoint_filepath)
        manual_adversary.save(f"cifar/models/cifar_adv{adv_no}.keras")
        if newmax > 0.64:
            break

Epoch 66: early stopping
0.6663440465927124
new max r2: 0.6663440465927124


In [31]:
# load all adversaries:

from cifar_functions import cifar_adversary

adversaries = []

for adv_no in range(5):
    adv = cifar_adversary((45360,))
    adv.load_weights(f"cifar/models/cifar_adv{adv_no}.keras")
    adversaries.append(adv)

  trackable.load_own_variables(weights_store.get(inner_path))


## Test if defense works for multiple adversaries

In [None]:
from cifar_functions import get_cifar_input_set, get_distributed_cifar_sets
distributed_datasets = get_distributed_cifar_sets(distributions=dists)

In [32]:
from cifar_functions import get_cifar_input_set
model_input = get_cifar_input_set()

In [23]:
import keras
from common.functions import cifar_adversary

adversary = cifar_adversary((45360,))
adversary.load_weights("cifar/models/cifar-adv_0.64_test_r2.keras")

  trackable.load_own_variables(weights_store.get(inner_path))


In [78]:
# read target models from disk
from common.functions import get_defending_lucasnet_model, compile_categorical_model, ensure_path_exists, compile_lucasnet, get_lucasnet_model

lambdas = [0.0, 0.15]

def load_models(lambda_):
    result = {}
    for d in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
        d_models = []
        for i in range(5):
            model = get_lucasnet_model(
                num_classes=10,
                input_shape=(32, 32, 3))
            model = compile_lucasnet(model)
            model.load_weights(f"cifar/models/defense/cifardef-ds{d}-l{lambda_}-run{i}.keras")
            d_models.append(model)
        result[str(d)] = d_models
    return result

# dict containing all target models with different lambdas
target_models = {str(l): load_models(l) for l in lambdas}

  trackable.load_own_variables(weights_store.get(inner_path))


In [None]:
from keras.layers import Flatten
import tensorflow as tf

def get_formatted_model_output(model, m_input):
    output = model.predict(m_input)
    num_columns = output.shape[1]-1
    output = output[:, 0:num_columns]
    output = Flatten()(output)
    # reshape as model input
    my_x = tf.reshape(output, (1, output.shape[0]*output.shape[1]))
    return my_x

# dict containing all outputs from target models with different lambdas
model_outputs = {}
for l in target_models.keys():
    model_outputs[l] = {}
    for d in target_models[l].keys():
        model_outputs[l][d] = [get_formatted_model_output(m, model_input) for m in target_models[l][d]]

In [104]:
# get adversary outputs for defended models
import numpy as np
from sklearn.metrics import r2_score

all_adv_out = {}
for l in model_outputs.keys():
    all_adv_out[l] = {}
    y_pred = [] # for r2 calculation
    y_true = [] # for r2 calculation
    print(f"{l}: [")
    for dist in model_outputs[l].keys():
        model_outputs_d = model_outputs[l][dist]
        adv_out = [[a(x).numpy().flatten()[0] for x in model_outputs_d] for a in adversaries]
        flat_adv_out = np.concatenate(adv_out)
        y_pred = np.concatenate([y_pred, flat_adv_out])
        y_true = np.concatenate([y_true, np.repeat(float(dist), len(flat_adv_out))])
        rounded_num = round(float(np.mean([np.mean(lis) for lis in adv_out])),2)
        print(f"{rounded_num},")
        all_adv_out[l][dist] = adv_out
    print("]")
    print(f"r2 for l={l}: {round(r2_score(y_true, y_pred),2)}")

0.0: [
0.4,
0.38,
0.32,
0.34,
0.56,
0.61,
0.66,
0.73,
0.72,
]
r2 for l=0.0: 0.64
0.15: [
0.49,
0.44,
0.39,
0.44,
0.56,
0.52,
0.56,
0.59,
0.62,
]
r2 for l=0.15: 0.32


In [123]:
l = '0.0'
cifar_l0_allvalues = pd.DataFrame({
    'dist': np.concatenate([np.repeat(float(d), len(all_adv_out[l][d])*len(all_adv_out[l][d][0])) for d in all_adv_out[l].keys()]),
    'adv_out': np.concatenate(np.concatenate([all_adv_out[l][d] for d in all_adv_out[l].keys()]))
})

l = '0.15'
cifar_l015_allvalues = pd.DataFrame({
    'dist': np.concatenate([np.repeat(float(d), len(all_adv_out[l][d])*len(all_adv_out[l][d][0])) for d in all_adv_out[l].keys()]),
    'adv_out': np.concatenate(np.concatenate([all_adv_out[l][d] for d in all_adv_out[l].keys()]))
})

cifar_l0_allvalues.to_csv("generalize_results/cifar_l0_allvalues.csv", index=False)
cifar_l015_allvalues.to_csv("generalize_results/cifar_l015_allvalues.csv", index=False)

In [None]:
# next steps:
# line plots as in paper
# box plots for all_adv_out

## UTKFace

In [60]:
import pandas as pd
from utk_functions import utk_adversary

def load_and_concat(dist):
    return pd.concat([
            pd.read_csv(f"utkface_moremodels100/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels101/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels50/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels51/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels51_/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels50_/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels53_/data/shadow_model_outputs/{dist}/train.csv"),
            pd.read_csv(f"utkface_moremodels54_/data/shadow_model_outputs/{dist}/train.csv"),
        ])

dists = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# 9 volle datensätze in einer liste:
all_train_utk = [load_and_concat(dist) for dist in dists]
adv_train_utk = pd.concat(all_train_utk)

In [61]:
training_sets_utk = [
    pd.concat([d[:200] for d in all_train_utk]),
    pd.concat([d[100:300] for d in all_train_utk]),
    pd.concat([d[200:400] for d in all_train_utk]),
    pd.concat([d[300:500] for d in all_train_utk]),
    pd.concat([pd.concat([d[400:500] for d in all_train_utk]), pd.concat([d[:100] for d in all_train_utk])])
]

In [63]:
#test data
adv_test = [pd.read_csv(f"utkface/data/shadow_model_outputs/{dist}/test.csv") for dist in dists]
adv_test = pd.concat(adv_test)
adv_y_test = adv_test["y"]
adv_X_test = adv_test.drop(columns=["y"])

In [None]:
# train adversaries
import keras

adv_no = 0 # 0, 1, 2, 3

curr_adv_train = training_sets_utk[adv_no]

adv_y = curr_adv_train["y"]
adv_X = curr_adv_train.drop(columns=["y"])

checkpoint_filepath = f"utkface/models/manual_tuning_checkpoints_adv{adv_no}/keras.weights.h5"

currentmax = 0.5
for i in range(100): # multiple attempts to train until good test R² is reached
    manual_adversary = utk_adversary()
    history = manual_adversary.fit(
        adv_X,
        adv_y,
        epochs=200,
        validation_data=(adv_X_test, adv_y_test),
        batch_size=18,
        verbose=0,
        callbacks=[
            keras.callbacks.EarlyStopping('val_r2_score', mode='max', patience=20, verbose=1),
            keras.callbacks.ModelCheckpoint(
                filepath=checkpoint_filepath,
                save_weights_only=True,
                monitor='val_r2_score',
                mode='max',
                save_best_only=True)
        ])
    newmax = max(history.history['val_r2_score'])
    print(newmax)
    if newmax > currentmax:
        print(f"new max r2: {newmax}")
        currentmax = newmax
        manual_adversary.load_weights(checkpoint_filepath)
        manual_adversary.save(f"utkface/models/utkface_adv{adv_no}.keras")
        if newmax > 0.64:
            break

In [88]:
# load all adversaries:

from utk_functions import utk_adversary
from sklearn.metrics import r2_score

adversaries_utk = []

for adv_no in range(5):
    adv = utk_adversary()
    adv.load_weights(f"utkface/models/utkface_adv{adv_no}.keras")
    adversaries_utk.append(adv)
    adv_pred = adv(adv_X_test)
    print(f"r2: {r2_score(adv_y_test, adv_pred)}")


  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


r2: 0.5504196469266827
r2: 0.6131827399415866
r2: 0.6727928927241413
r2: 0.6233553449984546
r2: 0.5992339254179848


  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


In [126]:
# read target models from disk
from common.functions import get_defending_lucasnet_model, compile_categorical_model, ensure_path_exists, compile_lucasnet, get_lucasnet_model

lambdas = [0.0, 0.15]

def load_models(lambda_):
    result = {}
    for d in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
        d_models = []
        for i in range(5):
            model = get_lucasnet_model(
                num_classes=2,
                input_shape=(64, 64, 3))
            model = compile_lucasnet(model)
            model.load_weights(f"utkface/models/defense/utkdef-v2-ds{d}-l{lambda_}-run{i}.keras")
            d_models.append(model)
        result[str(d)] = d_models
    return result

# dict containing all target models with different lambdas
target_models_utk = {str(l): load_models(l) for l in lambdas}

In [76]:
# input dataset for utk target/shadow models
from utk_functions import get_lbfw_dataset

model_input_utk = get_lbfw_dataset()

In [127]:
# get adversary outputs for loaded models
from keras.layers import Flatten
import tensorflow as tf

def get_formatted_model_output(model, m_input):
    output = model.predict(m_input, verbose=0)
    num_columns = output.shape[1]-1
    output = output[:, 0:num_columns]
    output = Flatten()(output)
    # reshape as model input
    my_x = tf.reshape(output, (1, output.shape[0]*output.shape[1]))
    return my_x

# dict containing all outputs from target models with different lambdas
model_outputs_utk = {}
for l in target_models_utk.keys():
    print(f"lambda {l}...")
    model_outputs_utk[l] = {}
    for d in target_models_utk[l].keys():
        print(f"dist {d}...")
        model_outputs_utk[l][d] = [get_formatted_model_output(m, model_input_utk) for m in target_models_utk[l][d]]

lambda 0.0...
dist 0.1...
dist 0.2...
dist 0.3...
dist 0.4...
dist 0.5...
dist 0.6...
dist 0.7...
dist 0.8...
dist 0.9...
lambda 0.15...
dist 0.1...
dist 0.2...
dist 0.3...
dist 0.4...
dist 0.5...
dist 0.6...
dist 0.7...
dist 0.8...
dist 0.9...


In [128]:
# get mean adversary outputs for defended models and R² performance
import numpy as np
from sklearn.metrics import r2_score

all_adv_out_utk = {}
for l in model_outputs_utk.keys():
    all_adv_out_utk[l] = {}
    y_pred = [] # for r2 calculation
    y_true = [] # for r2 calculation
    print(f"{l}: [")
    for dist in model_outputs_utk[l].keys():
        model_outputs_d = model_outputs_utk[l][dist]
        adv_out = [[a(x).numpy().flatten()[0] for x in model_outputs_d] for a in adversaries_utk]
        flat_adv_out = np.concatenate(adv_out)
        y_pred = np.concatenate([y_pred, flat_adv_out])
        y_true = np.concatenate([y_true, np.repeat(float(dist), len(flat_adv_out))])
        rounded_num = round(float(np.mean([np.mean(lis) for lis in adv_out])),2)
        print(f"{rounded_num},")
        all_adv_out_utk[l][dist] = adv_out
    print("]")
    print(f"r2 for l={l}: {round(r2_score(y_true, y_pred),2)}")

0.0: [
0.22,
0.4,
0.43,
0.51,
0.64,
0.65,
0.65,
0.65,
0.68,
]
r2 for l=0.0: 0.58
0.15: [
0.35,
0.44,
0.47,
0.55,
0.5,
0.63,
0.57,
0.56,
0.56,
]
r2 for l=0.15: 0.3


In [129]:
l = '0.0'
utk_l0_allvalues = pd.DataFrame({
    'dist': np.concatenate([np.repeat(float(d), len(all_adv_out_utk[l][d])*len(all_adv_out_utk[l][d][0])) for d in all_adv_out_utk[l].keys()]),
    'adv_out': np.concatenate(np.concatenate([all_adv_out_utk[l][d] for d in all_adv_out_utk[l].keys()]))
})

l = '0.15'
utk_l015_allvalues = pd.DataFrame({
    'dist': np.concatenate([np.repeat(float(d), len(all_adv_out_utk[l][d])*len(all_adv_out_utk[l][d][0])) for d in all_adv_out_utk[l].keys()]),
    'adv_out': np.concatenate(np.concatenate([all_adv_out_utk[l][d] for d in all_adv_out_utk[l].keys()]))
})

utk_l0_allvalues.to_csv("generalize_results/utk_l0_allvalues.csv", index=False)
utk_l015_allvalues.to_csv("generalize_results/utk_l015_allvalues.csv", index=False)

In [130]:
l = "0.0"
generalize_eval_l0 = {
        'dist': [float(k) for k in all_adv_out_utk[l].keys()],
        'utk': [np.mean(np.concatenate(all_adv_out_utk[l][d])) for d in all_adv_out_utk[l].keys()],
        'cifar': [np.mean(np.concatenate(all_adv_out[l][d])) for d in all_adv_out[l].keys()],
    }
generalize_l0_mean_eval = pd.DataFrame(generalize_eval_l0)
generalize_l0_mean_eval.to_csv("generalize_results/l0_mean_eval.csv", index=False)

l = "0.15"
generalize_eval_l015 = {
        'dist': [float(k) for k in all_adv_out_utk[l].keys()],
        'utk': [np.mean(np.concatenate(all_adv_out_utk[l][d])) for d in all_adv_out_utk[l].keys()],
        'cifar': [np.mean(np.concatenate(all_adv_out[l][d])) for d in all_adv_out[l].keys()],
    }
generalize_l015_mean_eval = pd.DataFrame(generalize_eval_l015)
generalize_l015_mean_eval.to_csv("generalize_results/l015_mean_eval.csv", index=False)
generalize_l015_mean_eval

Unnamed: 0,dist,utk,cifar
0,0.1,0.353522,0.492574
1,0.2,0.436791,0.444326
2,0.3,0.465461,0.387219
3,0.4,0.548344,0.441666
4,0.5,0.50377,0.556148
5,0.6,0.62993,0.517414
6,0.7,0.570404,0.559133
7,0.8,0.56211,0.594531
8,0.9,0.555236,0.621457


In [131]:
all_adv_out

{'0.0': {'0.1': [[0.41709867, 0.60043234, 0.35545146, 0.27601588, 0.3426215],
   [0.46966153, 0.6329003, 0.39861685, 0.25013536, 0.3542658],
   [0.49893987, 0.49440652, 0.28670442, 0.21139379, 0.33899432],
   [0.42764646, 0.62307864, 0.41895276, 0.26168662, 0.39963275],
   [0.39296868, 0.56541306, 0.3943174, 0.31268916, 0.36011234]],
  '0.2': [[0.38033068, 0.3270929, 0.43093956, 0.42365128, 0.5655265],
   [0.361848, 0.25013536, 0.33959484, 0.3446923, 0.63099885],
   [0.3032317, 0.20660022, 0.20987827, 0.24907345, 0.5974672],
   [0.34972006, 0.28524798, 0.35499674, 0.34626204, 0.57554275],
   [0.3658196, 0.31268916, 0.34801164, 0.3929782, 0.5272546]],
  '0.3': [[0.23820373, 0.2985903, 0.2806291, 0.3005887, 0.41895986],
   [0.25013536, 0.3313409, 0.25013536, 0.30037558, 0.43450934],
   [0.22375247, 0.34128582, 0.24677208, 0.2392799, 0.4561655],
   [0.2631281, 0.33181077, 0.27829844, 0.3032027, 0.42931753],
   [0.31268916, 0.31268916, 0.31268916, 0.33352715, 0.40382534]],
  '0.4': [[0.335

In [95]:
all_adv_out_utk["0.0"]

{'0.1': [[0.3084762, 0.27633095, 0.24316682],
  [0.24988714, 0.16888705, 0.21883193],
  [0.08921543, 0.090263635, 0.28139067],
  [0.31348324, 0.14992428, 0.289155],
  [0.22977118, 0.24823597, 0.3136106]],
 '0.2': [[0.33635777, 0.37512988, 0.47727278],
  [0.3839995, 0.2693316, 0.42928937],
  [0.3802551, 0.34340328, 0.35620573],
  [0.31561184, 0.24428654, 0.37329054],
  [0.464601, 0.2652412, 0.2856149]],
 '0.3': [[0.6180749, 0.32135588, 0.48070198],
  [0.6126029, 0.3072423, 0.36353388],
  [0.43287396, 0.27499452, 0.37989628],
  [0.5793829, 0.26358032, 0.35962105],
  [0.58886755, 0.61893797, 0.4348889]],
 '0.4': [[0.534367, 0.5878189, 0.561129],
  [0.55153054, 0.53622454, 0.5910246],
  [0.36387262, 0.44820794, 0.4590885],
  [0.46398544, 0.45878506, 0.4059229],
  [0.5302571, 0.46459925, 0.64828664]],
 '0.5': [[0.74136996, 0.71456134, 0.7165613],
  [0.7227728, 0.7362656, 0.5709695],
  [0.7270825, 0.59608984, 0.62502563],
  [0.6335664, 0.71008825, 0.5683923],
  [0.6907783, 0.70344114, 0.5868

In [90]:
all_adv_out_utk

{'0.0': {'0.1': [[0.3084762, 0.27633095, 0.24316682],
   [0.24988714, 0.16888705, 0.21883193],
   [0.08921543, 0.090263635, 0.28139067],
   [0.31348324, 0.14992428, 0.289155],
   [0.22977118, 0.24823597, 0.3136106]],
  '0.2': [[0.33635777, 0.37512988, 0.47727278],
   [0.3839995, 0.2693316, 0.42928937],
   [0.3802551, 0.34340328, 0.35620573],
   [0.31561184, 0.24428654, 0.37329054],
   [0.464601, 0.2652412, 0.2856149]],
  '0.3': [[0.6180749, 0.32135588, 0.48070198],
   [0.6126029, 0.3072423, 0.36353388],
   [0.43287396, 0.27499452, 0.37989628],
   [0.5793829, 0.26358032, 0.35962105],
   [0.58886755, 0.61893797, 0.4348889]],
  '0.4': [[0.534367, 0.5878189, 0.561129],
   [0.55153054, 0.53622454, 0.5910246],
   [0.36387262, 0.44820794, 0.4590885],
   [0.46398544, 0.45878506, 0.4059229],
   [0.5302571, 0.46459925, 0.64828664]],
  '0.5': [[0.74136996, 0.71456134, 0.7165613],
   [0.7227728, 0.7362656, 0.5709695],
   [0.7270825, 0.59608984, 0.62502563],
   [0.6335664, 0.71008825, 0.5683923],
 