In [4]:
import os
import pandas as pd
import numpy as np
from itertools import combinations

In [6]:
dataset = 'german_credit'
data_path = '/home/dhasade/audits/ml-audits/data'
features = pd.read_csv(os.path.join(data_path, dataset, "features.csv"))
labels = pd.read_csv(os.path.join(data_path, dataset, "labels.csv"))

In [7]:
protected_attributes = ["a0", "a1"] # Synthetic data
protected_attributes = ["sex", "age"] # German credit data

In [8]:
n = 2

all_probs = dict()
all_ys = dict()

for base_agent in range(n): # Base agent
    print(f'Working on base agent {base_agent}')
    base_attr = protected_attributes[base_agent]
    possible_collaborators = [i for i in range(n) if i != base_agent]
    all_probs[base_agent] = dict()
    all_ys[base_agent] = dict()

    X_0 = features.copy()
    X_0 = X_0[X_0[base_attr] == 0]
    y_0 = labels.loc[X_0.index]

    X_1 = features.copy()
    X_1 = X_1[X_1[base_attr] == 1]
    y_1 = labels.loc[X_1.index]

    for k in range(1, n): # Number of collaborators, 1 to n-1

        print(f'Working on k={k}')
        all_probs[base_agent][k] = dict()
        all_ys[base_agent][k] = dict()

        agent_combinations_list = list(combinations(possible_collaborators, k))

        for agent_combination in agent_combinations_list:
            agent_comb_str = ''.join([str(elem) for elem in agent_combination])

            all_probs[base_agent][k][agent_comb_str] = dict()
            all_ys[base_agent][k][agent_comb_str] = dict()

            total_strings = 2 ** (k)
            binary_strings = [format(i, f'0{k}b') for i in range(total_strings)]

            attrs = [protected_attributes[i] for i in agent_combination]
            print(f'Working on {attrs}')
            for binary_string in binary_strings:

                all_probs[base_agent][k][agent_comb_str][binary_string] = dict()
                all_ys[base_agent][k][agent_comb_str][binary_string] = dict()

                pairs = [(attrs[i], int(binary_string[i])) for i in range(k)]

                # Restore X_transformed that satisfies the binary string
                X_temp = X_0.copy()
                for attr, val in pairs:
                    X_temp = X_temp[X_temp[attr] == val]
                y_tmp = y_0.loc[X_temp.index]
                assert len(X_temp) == len(y_tmp), f'Length mismatch ==> X: {len(X_temp)}, y: {len(y_tmp)}'
                
                all_probs[base_agent][k][agent_comb_str][binary_string][0] = len(X_temp) / len(features)
                all_ys[base_agent][k][agent_comb_str][binary_string][0] = y_tmp.mean().item()
                
                X_temp = X_1.copy()
                for attr, val in pairs:
                    X_temp = X_temp[X_temp[attr] == val]
                y_tmp = y_1.loc[X_temp.index]
                assert len(X_temp) == len(y_tmp), f'Length mismatch ==> X: {len(X_temp)}, y: {len(y_tmp)}'
                
                all_probs[base_agent][k][agent_comb_str][binary_string][1] = len(X_temp) / len(features)
                all_ys[base_agent][k][agent_comb_str][binary_string][1] = y_tmp.mean().item()


Working on base agent 0
Working on k=1
Working on ['age']
Working on base agent 1
Working on k=1
Working on ['sex']


In [10]:
all_probs[1]

{1: {'0': {'0': {0: 0.105, 1: 0.205}, '1': {0: 0.085, 1: 0.605}}}}