In [335]:
from cifar_functions import data_train_test_cifar

X_train, X_test, Y_train, Y_test, sensitive, sensitive_t = data_train_test_cifar()

In [336]:
print("hello")

hello


In [378]:
import operator
import numpy as np
import pandas as pd
import os
from keras.layers import RandomFlip, Conv2D, GroupNormalization, MaxPooling2D, Dense, Flatten
from keras.losses import CategoricalCrossentropy
from keras.optimizers import Adam
from keras import Sequential, Input
from keras.src.saving.saving_api import load_model, save_model
from keras.utils import set_random_seed
import keras
import tensorflow as tf

class DatasetWithForcedDistribution:
    def __init__(self, sensitive_attribute_name, distribution, X_train, X_test, y_train, y_test, sensitive, sensitive_t):
        self.sensitive_attribute_name = sensitive_attribute_name
        self.distribution = distribution
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.sensitive = sensitive
        self.sensitive_t = sensitive_t

def ensure_path_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)

def drop_index(df, idx):
    return df.reset_index().drop(index=idx).drop(columns=["index"])

def find_indices_to_drop(sensitive, target_distribution):
    length = len(sensitive)
    indices_to_drop = []
    def current_dist(sensitive_value=1):
        if sensitive_value == 1:
            return (pd.Series(sensitive).value_counts()[1] - len(indices_to_drop)) / (length - len(indices_to_drop))
        else:
            return (pd.Series(sensitive).value_counts()[1]) / (length - len(indices_to_drop))

    if current_dist() > target_distribution:
        comp = operator.gt
        sensitive_value_to_delete = 1
    else:
        comp = operator.lt
        sensitive_value_to_delete = 0

    i = 0
    while comp(current_dist(sensitive_value_to_delete), target_distribution):
        if i >= length:
            raise ValueError("Unable to reach target distribution. Not enough entries with the sensitive value to delete.")
        if sensitive[i] == sensitive_value_to_delete:
            diff = abs(current_dist(sensitive_value_to_delete) - target_distribution)
            if diff > 0.1:
                indices_to_drop.extend([i + j for j in range(10) if i + j < length])
                i += 10
            else:
                indices_to_drop.append(i)
                i += 1
        else:
            i += 1

    return indices_to_drop

def get_lucasnet_sequence(num_classes, input_shape):
    groups = 32
    return [
        Input(shape=input_shape),
        RandomFlip("horizontal", seed=42),
        Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding="same", activation="relu"),
        GroupNormalization(groups=groups),
        MaxPooling2D(2, 2),
        Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding="same", activation="relu"),
        GroupNormalization(groups=groups),
        MaxPooling2D(2, 2),
        Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same", activation="relu"),
        GroupNormalization(groups=groups),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(512, activation="relu"),
        GroupNormalization(groups=groups),
        Dense(num_classes, activation="softmax"),
    ]

def get_lucasnet_model(num_classes, input_shape):
    return Sequential(get_lucasnet_sequence(num_classes, input_shape))

def compile_lucasnet(model):
    model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['accuracy'])
    return model

def fit_lucasnet(X_train, y_train, X_test, y_test, batch_size=32, epochs=5, verbose=0, input_shape=(64, 64, 3), num_classes=2):
    model = get_lucasnet_model(num_classes, input_shape)
    model = compile_lucasnet(model)
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=None if X_test is None else (X_test, y_test), verbose=verbose)
    return model, history

def train_and_generate_output(X_train, y_train, shadow_input, load_model_path, save_model_path, model_no, input_shape, num_classes):
    if os.path.isfile(f"{load_model_path}{model_no}.keras"):
        print(f"Loading model {model_no}")
        shadow_model = load_model(f"{load_model_path}{model_no}.keras")
    else:
        set_random_seed(model_no)
        shadow_model, _ = fit_lucasnet(X_train, y_train, X_test=None, y_test=None, input_shape=input_shape, num_classes=num_classes)
        if save_model_path is not None:
            shadow_model.save(f"{save_model_path}{model_no}.keras")
    output = np.array(shadow_model.predict(shadow_input, verbose=0)).astype(np.float16)
    return output[:, 0:output.shape[1]-1]

def generate_shadow_model_outputs(dataset, shadow_input, load_model_path, save_model_path, n_shadow_models=100, use_test_data=False, input_shape=(64, 64, 3), num_classes=2):
    if use_test_data:
        X = dataset.X_test
        y = dataset.y_test
    else:
        X = dataset.X_train
        y = dataset.y_train

    outputs = [train_and_generate_output(X, y, shadow_input, load_model_path, save_model_path, i, input_shape, num_classes) for i in range(n_shadow_models)]
    outputs = np.array([o.flatten() for o in outputs])
    return outputs

def train_shadow_models(test_run, n_shadow_models, distributed_datasets, model_input, input_shape, num_classes, base_path, save_models=True):
    for ds in distributed_datasets:
        print(f"now generating {ds.distribution}...")
        load_model_path = f"{base_path}/models/shadow_models/{str(ds.distribution)}/{'test' if test_run else 'train'}/"
        if save_models:
            save_model_path = f"{base_path}/models/shadow_models/{str(ds.distribution)}/{'test' if test_run else 'train'}/"
            ensure_path_exists(save_model_path)
        else:
            save_model_path = None
        outputs = generate_shadow_model_outputs(ds, model_input, load_model_path, save_model_path, n_shadow_models=n_shadow_models, use_test_data=test_run, input_shape=input_shape, num_classes=num_classes)
        adv_df = pd.DataFrame(outputs)
        adv_df["y"] = np.repeat(ds.distribution, n_shadow_models)
        save_data_path = f"{base_path}/data/shadow_model_outputs/{str(ds.distribution)}/"
        ensure_path_exists(save_data_path)
        adv_df.to_csv(f"{save_data_path}{'test' if test_run else 'train'}.csv", index=False)

class DefendingModel(keras.Sequential):
    def __init__(self, pia_adversary, adv_input, sensitive, training_lambda, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.pia_adversary = pia_adversary
        self.adv_input = adv_input
        self.sensitive = sensitive
        self.training_lambda = training_lambda
        self.adversary_metric = keras.metrics.Mean(name='adversary_prediction')
        self.p_rule_metric = keras.metrics.Mean(name='p_rule')

        # Initialize TensorFlow variables for the fairness adversary
        self.init_fairness_adversary()
    
    @staticmethod
    def p_rule(y_pred, z_values, threshold=0.5):
        y_z_1 = y_pred[z_values == 1] > threshold if threshold else y_pred[z_values == 1]
        y_z_0 = y_pred[z_values == 0] > threshold if threshold else y_pred[z_values == 0]
        odds = y_z_1.mean() / y_z_0.mean()
        return np.min([odds, 1/odds])

    def init_fairness_adversary(self):
        adv = keras.Sequential([
            Input(shape=(10,)),
            Dense(5, activation='relu'),
            Dense(2, activation='softmax')
        ])
        adv.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.adversary = adv

    def train_fairness_adversary(self, y_pred_for_adv):
        with tf.GradientTape() as tape:
            # Forward pass
            adv_pred = self.adversary(y_pred_for_adv)
            # Compute the loss value
            adv_loss = tf.keras.losses.BinaryCrossentropy()(self.sensitive, adv_pred)

        # Compute gradients
        trainable_vars = self.adversary.trainable_variables
        gradients = tape.gradient(adv_loss, trainable_vars)

        # Update weights
        self.adversary.optimizer.apply_gradients(zip(gradients, trainable_vars))

    #def train_pia_adversary(
    
    @tf.function
    def train_step(self, data):
        x, y = data

        y_pred_for_adv = self(x, training=False)

        # Train the fairness adversary
        self.train_fairness_adversary(y_pred_for_adv)
        
        with tf.GradientTape() as tape:
            y_pred_for_adv = self(x, training=False)
            #y_pred_for_adv = tf.expand_dims(tf.cast(y_pred_for_adv, dtype=tf.float32), axis=1)
            
            adv_pred = self.adversary(y_pred_for_adv, training=False)
            adv_loss = tf.keras.losses.BinaryCrossentropy()(self.sensitive, adv_pred)

            y_pred = self(x, training=True)
            loss_train = self.compute_loss(x, y, y_pred)
            combined_loss = loss_train * (1-self.training_lambda) + self.training_lambda * adv_loss

        gradients = tape.gradient(combined_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        metrics = self.compute_metrics(x, y, y_pred, sample_weight=None)
        
        # check pia adversary output
        y_pred_for_adv = self(self.adv_input, training=False)
        # last column of prediction is redundant
        num_columns = y_pred_for_adv.shape[1]-1
        y_pred_for_adv = y_pred_for_adv[:, 0:num_columns]
        y_pred_for_adv = Flatten()(y_pred_for_adv)
        # reshape as model input
        my_x = tf.reshape(y_pred_for_adv, (1, y_pred_for_adv.shape[0]*y_pred_for_adv.shape[1]))
        
        metrics.update({'adversary_prediction': self.pia_adversary(my_x),
                        'p_rule': self.p_rule(self(x, training=False), self.sensitive)})
        return metrics

    def save_inner_model(self, filepath):
        seq = keras.Sequential(self.layers)
        seq = compile_categorical_model(seq)
        save_model(seq, filepath)


def get_fairness_lucasnet_model(pia_adversary, adversary_input, sensitive, training_lambda, num_classes, input_shape):
    return DefendingModel(pia_adversary, adversary_input, sensitive, training_lambda, get_lucasnet_sequence(num_classes, input_shape))

def compile_categorical_model(model):
    model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['accuracy'])
    return model


In [379]:
sensitive_categorical = tf.keras.utils.to_categorical(sensitive)
model = get_fairness_lucasnet_model(
    pia_adv,
    input_set,
    sensitive=sensitive_categorical,
    training_lambda=0.0,
    num_classes=10,
    input_shape=(32, 32, 3))
model = compile_categorical_model(model)

model_history = model.fit(
    X_train,
    Y_train,
    epochs=100,
    validation_data=(X_test, Y_test),
    batch_size=30000,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)]
)
    

Epoch 1/100


ValueError: in user code:

    File "/tmp/ipykernel_613361/3455634472.py", line 217, in train_step  *
        metrics.update({'adversary_prediction': self.pia_adversary(my_x),
    File "/tmp/ipykernel_613361/1057371444.py", line 153, in p_rule  *
        y_z_1 = y_pred[z_values == 1] > threshold if threshold else y_pred[z_values == 1]

    ValueError: Shapes (30000, 10) and (30000, 2) are incompatible


In [369]:
# cifar adv
def create_and_compile(input_shape):
    manual_adversary = keras.Sequential()
    manual_adversary.add(keras.Input(shape=input_shape))
    manual_adversary.add(keras.layers.Dense(30, activation='relu'))#, kernel_regularizer=regularizers.l2(0.01)))
    manual_adversary.add(keras.layers.Dropout(0.05))
    manual_adversary.add(keras.layers.Dense(8, activation='relu'))#, kernel_regularizer=regularizers.l2(0.04)))
    manual_adversary.add(keras.layers.Dense(1))
    manual_adversary.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.R2Score()])
    return manual_adversary

In [348]:
pia_adv = create_and_compile((45360,))

In [354]:
pia_adv.load_weights("cifar/models/cifar_adv_v2_0.59_test_r2.keras")

In [357]:
from cifar_functions import get_cifar_input_set

input_set = get_cifar_input_set()

In [364]:
pd.array(sensitive).value_counts().iloc[0] / len(sensitive)

0.6

In [None]:
model_l0 = model

In [328]:
model_l0 = model

In [331]:
pred_0 = model_l0(X_train)

In [332]:
pred_01 = model_l01(X_train)

In [322]:
adv_pred = model.adversary(np.reshape(pred, (30000, 10)))

In [323]:
adv_pred

<tf.Tensor: shape=(30000, 2), dtype=float32, numpy=
array([[0.50858974, 0.4914103 ],
       [0.5120358 , 0.4879643 ],
       [0.5096619 , 0.4903381 ],
       ...,
       [0.51186997, 0.48812997],
       [0.51150024, 0.48849967],
       [0.50139654, 0.4986034 ]], dtype=float32)>

In [316]:
sensitive

array([ True,  True, False, ..., False,  True,  True])

In [317]:
def p_rule(y_pred, z_values, threshold=0.5):
    y_z_1 = y_pred[z_values == 1] > threshold if threshold else y_pred[z_values == 1]
    y_z_0 = y_pred[z_values == 0] > threshold if threshold else y_pred[z_values == 0]
    odds = y_z_1.mean() / y_z_0.mean()
    return np.min([odds, 1/odds]) * 100

In [377]:
p_rule(pred_0.numpy(), sensitive) # lambda = 0.0

51.655925528105975

In [380]:
p_rule(pred_01.numpy(), sensitive) # lambda = 0.1

65.73178142383023

In [373]:
pred_01.shape

TensorShape([30000, 10])

In [374]:
sensitive.shape

(30000,)

In [381]:
pred_01[sensitive == 1] > 0.5# if threshold else y_pred[sensitive == 1]

<tf.Tensor: shape=(18000, 10), dtype=bool, numpy=
array([[False, False, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])>

In [384]:
for p in pred_01[:10]:
    # return index of element with highest value in p:
    print(np.argmax(p))

5
1
9
3
1
2
9
0
1
6
