<a href="https://colab.research.google.com/github/mmfara/Adversarial-Debiasing-Enhanced/blob/main/AL%2B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#New modified

import numpy as np
import pandas as pd

try:
    import tensorflow.compat.v1 as tf
    tf.disable_v2_behavior()
except ImportError as error:
    from logging import warning
    warning("{}: AdversarialDebiasing will be unavailable. To install, run:\n"
            "pip install 'aif360[AdversarialDebiasing]'".format(error))

from aif360.algorithms import Transformer

class AdversarialDebiasing(Transformer):
    def __init__(self,
                 unprivileged_groups,
                 privileged_groups,
                 scope_name,
                 sess,
                 seed=None,
                 adversary_loss_weight=0.1,
                 num_epochs=50,
                 batch_size=128,
                 classifier_num_hidden_units=200,
                 debias=True):

        super(AdversarialDebiasing, self).__init__(
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        self.scope_name = scope_name
        self.seed = seed

        self.unprivileged_groups = unprivileged_groups
        self.privileged_groups = privileged_groups

        # Allow multiple unprivileged/privileged group values
        protected_attrs = set()
        for group in (self.unprivileged_groups + self.privileged_groups):
            protected_attrs.update(group.keys())

        if len(protected_attrs) != 1:
            raise ValueError("Only one protected attribute can be handled at a time.")

        self.protected_attribute_name = list(protected_attrs)[0]

        self.sess = sess
        self.adversary_loss_weight = adversary_loss_weight
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.classifier_num_hidden_units = classifier_num_hidden_units
        self.debias = debias

        self.features_dim = None
        self.features_ph = None
        self.protected_attributes_ph = None
        self.true_labels_ph = None
        self.pred_labels = None

    def _classifier_model(self, features, features_dim, keep_prob):
        with tf.variable_scope("classifier_model"):
            W1 = tf.get_variable('W1', [features_dim, self.classifier_num_hidden_units],
                                 initializer=tf.initializers.glorot_uniform(seed=self.seed1))
            b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]), name='b1')
            h1 = tf.nn.relu(tf.matmul(features, W1) + b1)
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob, seed=self.seed2)

            W2 = tf.get_variable('W2', [self.classifier_num_hidden_units, 1],
                                 initializer=tf.initializers.glorot_uniform(seed=self.seed3))
            b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')

            pred_logit = tf.matmul(h1, W2) + b2
            pred_label = tf.sigmoid(pred_logit)

        return pred_label, pred_logit

    def _adversary_model(self, pred_logits, true_labels, num_classes):
        with tf.variable_scope("adversary_model"):
            s = tf.sigmoid(pred_logits)
            concat_input = tf.concat([s, s * true_labels, s * (1.0 - true_labels)], axis=1)
            input_dim = concat_input.shape[1]

            W = tf.get_variable('W_adv', [input_dim, num_classes],
                                initializer=tf.initializers.glorot_uniform(seed=self.seed4))
            b = tf.Variable(tf.zeros(shape=[num_classes]), name='b_adv')

            logits = tf.matmul(concat_input, W) + b
            preds = tf.nn.softmax(logits)

        return preds, logits

    def fit(self, dataset):
        if tf.executing_eagerly():
            raise RuntimeError("AdversarialDebiasing does not work in eager execution mode.")

        if self.seed is not None:
            np.random.seed(self.seed)
        ii32 = np.iinfo(np.int32)
        self.seed1, self.seed2, self.seed3, self.seed4 = np.random.randint(ii32.min, ii32.max, size=4)

        temp_labels = dataset.labels.copy()
        if not np.array_equal(np.unique(temp_labels), [0.0, 1.0]):
            temp_labels[(dataset.labels == dataset.favorable_label).ravel(), 0] = 1.0
            temp_labels[(dataset.labels == dataset.unfavorable_label).ravel(), 0] = 0.0

        if np.isnan(dataset.features).any() or np.isinf(dataset.features).any():
            raise ValueError("Features contain NaN or Inf values. Please clean the data.")

        with tf.variable_scope(self.scope_name):
            num_train_samples, self.features_dim = np.shape(dataset.features)

            self.features_ph = tf.placeholder(tf.float32, shape=[None, self.features_dim])
            self.protected_attributes_ph = tf.placeholder(tf.int32, shape=[None])
            self.true_labels_ph = tf.placeholder(tf.float32, shape=[None, 1])
            self.keep_prob = tf.placeholder(tf.float32)

            self.pred_labels, pred_logits = self._classifier_model(self.features_ph, self.features_dim, self.keep_prob)
            pred_labels_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.true_labels_ph, logits=pred_logits))

            unique_prot_attr = np.unique(dataset.protected_attributes[:, dataset.protected_attribute_names.index(self.protected_attribute_name)])
            num_classes = len(unique_prot_attr)

            if self.debias:
                pred_protected_attributes_labels, pred_protected_attributes_logits = self._adversary_model(
                    pred_logits, self.true_labels_ph, num_classes)
                pred_protected_attributes_loss = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.protected_attributes_ph,
                        logits=pred_protected_attributes_logits))

            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.0001  # More stable
            learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 1000, 0.96, staircase=True)
            classifier_opt = tf.train.AdamOptimizer(learning_rate)
            if self.debias:
                adversary_opt = tf.train.AdamOptimizer(learning_rate)

            classifier_vars = [var for var in tf.trainable_variables(scope=self.scope_name) if 'classifier_model' in var.name]
            if self.debias:
                adversary_vars = [var for var in tf.trainable_variables(scope=self.scope_name) if 'adversary_model' in var.name]
                adversary_grads = {var: grad for (grad, var) in adversary_opt.compute_gradients(pred_protected_attributes_loss, var_list=classifier_vars)}

            normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny)

            classifier_grads = []
            for (grad, var) in classifier_opt.compute_gradients(pred_labels_loss, var_list=classifier_vars):
                if self.debias:
                    unit_adv_grad = normalize(adversary_grads[var])
                    grad -= tf.reduce_sum(grad * unit_adv_grad) * unit_adv_grad
                    grad -= self.adversary_loss_weight * adversary_grads[var]
                grad = tf.clip_by_value(grad, -5.0, 5.0)  # Gradient clipping
                classifier_grads.append((grad, var))

            classifier_minimizer = classifier_opt.apply_gradients(classifier_grads, global_step=global_step)
            if self.debias:
                with tf.control_dependencies([classifier_minimizer]):
                    adversary_minimizer = adversary_opt.minimize(pred_protected_attributes_loss, var_list=adversary_vars)

            self.sess.run(tf.global_variables_initializer())
            self.sess.run(tf.local_variables_initializer())

            for epoch in range(self.num_epochs):
                shuffled_ids = np.random.choice(num_train_samples, num_train_samples, replace=False)
                for i in range(num_train_samples // self.batch_size):
                    batch_ids = shuffled_ids[self.batch_size * i: self.batch_size * (i + 1)]
                    batch_features = dataset.features[batch_ids]
                    batch_labels = np.reshape(temp_labels[batch_ids], [-1, 1])
                    batch_protected_attributes = dataset.protected_attributes[batch_ids][:,
                        dataset.protected_attribute_names.index(self.protected_attribute_name)]

                    feed_dict = {
                        self.features_ph: batch_features,
                        self.true_labels_ph: batch_labels,
                        self.protected_attributes_ph: batch_protected_attributes,
                        self.keep_prob: 0.8
                    }

                    if self.debias:
                        _, _, loss_cls, loss_adv = self.sess.run([
                            classifier_minimizer, adversary_minimizer,
                            pred_labels_loss, pred_protected_attributes_loss
                        ], feed_dict=feed_dict)
                        if i % 200 == 0:
                            print(f"epoch {epoch}; iter: {i}; classifier loss: {loss_cls}; adversarial loss: {loss_adv}")
                    else:
                        _, loss_cls = self.sess.run([
                            classifier_minimizer,
                            pred_labels_loss
                        ], feed_dict=feed_dict)
                        if i % 200 == 0:
                            print(f"epoch {epoch}; iter: {i}; classifier loss: {loss_cls}")
        return self

    def predict(self, dataset):
        if self.seed is not None:
            np.random.seed(self.seed)

        num_test_samples, _ = np.shape(dataset.features)
        samples_covered = 0
        pred_labels = []

        while samples_covered < num_test_samples:
            start = samples_covered
            end = min(start + self.batch_size, num_test_samples)
            batch_ids = np.arange(start, end)
            batch_features = dataset.features[batch_ids]
            batch_labels = np.reshape(dataset.labels[batch_ids], [-1, 1])

            feed_dict = {
                self.features_ph: batch_features,
                self.true_labels_ph: batch_labels,
                self.keep_prob: 1.0
            }

            pred_labels += self.sess.run(self.pred_labels, feed_dict=feed_dict)[:, 0].tolist()
            samples_covered += len(batch_features)

        dataset_new = dataset.copy(deepcopy=True)
        dataset_new.scores = np.array(pred_labels, dtype=np.float64).reshape(-1, 1)
        dataset_new.labels = (np.array(pred_labels) > 0.5).astype(np.float64).reshape(-1, 1)

        temp_labels = dataset_new.labels.copy()
        temp_labels[(dataset_new.labels == 1.0).ravel(), 0] = dataset.favorable_label
        temp_labels[(dataset_new.labels == 0.0).ravel(), 0] = dataset.unfavorable_label

        dataset_new.labels = temp_labels.copy()
        return dataset_new
