<a href="https://colab.research.google.com/github/qahtanaa/OnSubGroupFairness/blob/main/German_close_smote_total_ratio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [115]:
%reset -f

In [116]:
!pip install aif360



In [117]:
import numpy as np
import pandas as pd
from aif360.metrics import utils
from scipy.sparse import issparse
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from aif360.metrics import BinaryLabelDatasetMetric
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import random
from sklearn.neighbors import NearestNeighbors
from sympy import Symbol
from sympy.solvers import solve

In [118]:
# remember to import raw_dataset_german
df = pd.read_csv('/content/raw_dataset_german.csv')

In [119]:
#so now I have the German dataframe, I want to change the age and personal_status
def convert_age_to_category(age):
    if age >= 25:
        return 1 #'Adult'
    else:
        return 0 #'Youth'

def convert_sex(sex):
  if sex == 'male':
    return 1 #male
  else:
    return 0 #female

df['age'] = df['age'].apply(convert_age_to_category)
df['personal_status'] = df['personal_status'].apply(convert_sex)

print(df.head())

  status  month credit_history purpose  credit_amount savings employment  \
0    A11      6            A34     A43           1169     A65        A75   
1    A12     48            A32     A43           5951     A61        A73   
2    A14     12            A34     A46           2096     A61        A74   
3    A11     42            A32     A42           7882     A61        A74   
4    A11     24            A33     A40           4870     A61        A73   

   investment_as_income_percentage  personal_status other_debtors  ...  \
0                                4                1          A101  ...   
1                                2                0          A101  ...   
2                                2                1          A101  ...   
3                                2                1          A103  ...   
4                                3                1          A101  ...   

   property age  installment_plans housing number_of_credits  skill_level  \
0      A121   1      



---

---

DATA PREPARATION

In [120]:
class DataPreparation():
  """
  ........
  """
  def __init__(self, df, sensitive, label, priv, unpriv, fav, unfav, categorical = [] ):
      """
      Construct all necessary attributes for the data preparation.

      df : (pandas DataFrame) containing the data
      sensitive : (list(str)) specifying the column names of all sensitive features
      label : (str) specifying the label column
      priv : (list(dicts)) representation of the privileged groups
      unpriv : (list(dicts)) representation of the unprivileged groups
      fav : (str/int/..) value representing the favorable label
      unfav : (str/int/..) value representing the unfavorable label
      categorical : (list(str)) (optional) specifying column names of categorical features
      """
      self.df = df
      self.sensitive = sensitive
      self.label = label
      self.priv = priv
      self.unpriv = unpriv
      self.fav = fav
      self.unfav = unfav
      self.categorical = categorical


  def detect_missing_values(self):
      """
      Detect rows with missing values and remove them from the DataFrame.
      """
      initial_rows = len(self.df)
      self.df = self.df.dropna()
      removed_rows = initial_rows - len(self.df)

      if removed_rows > 0:
          print(f"Detected {removed_rows} rows with missing values. Removed them.")
      else:
          print("No missing values detected.") #pass

  def binary_label(self):
      """
      Check that the decision label is made of two values, change it as a binary representation
      where favorable label = 1, unfavorable label = 0.
      """
      number_label_values = self.df[self.label].nunique()
      if number_label_values == 2:
        print(f"The '{self.label}' column has only two unique values.")
        self.df[self.label].replace([self.unfav, self.fav], [0, 1], inplace=True)
      else:
        print(f"The '{self.label}' column does not have exactly two unique values, as it should.")

  def find_categorical_attributes(self):
      """
      find the categorical attributes
      """
      self.attribute_types = {}

      for column in self.df.columns:
          # Skip the 'Group' column
          if column == 'Group':
              continue
          # if the column has already been classified as categorical by the user, leave it cat
          elif column in self.categorical:
              self.attribute_types[column] = 'Categorical'
              continue
          # if the column has only two distinguished values, consider it categorical
          elif self.df[column].nunique() == 2:
            self.attribute_types[column] = 'Categorical'
            continue
          else: #if it's not the group column, or if it's not been classified as categorical by the user, check every value of the column
            num_float = 0
            num_text = 0
            thresh = 0.99
            num_att_in_column = len(self.df[column])

            for value in self.df[column]:  # Accessing all values in the column
              # Attempt to convert the value to a float
              try:
                float(value)
                num_float += 1
                continue  # Move to the next value
              except ValueError:
                pass  # If it's not a float, continue to the next check
              # If it's not an integer or a float, consider it as text
              num_text += 1

            # now see if it's categorical or numerical
            if num_float / num_att_in_column > thresh:
              self.attribute_types[column] = 'Numerical'
              continue
            else:
              self.attribute_types[column] = 'Categorical'

      #encode the categorical attributes
      # Initialize cat_features list, to have if boolean
      self.cat_features = []
      # Iterate over attribute names and determine whether they are categorical or numerical
      for attr in self.attribute_types:
          self.cat_features.append(self.attribute_types[attr] == 'Categorical')

      encoder_dict = dict()
      #self.df = self.df.drop('Group', axis=1)
      self.columns_categorical = self.df.columns[self.cat_features]

      # for each categorical column, encode it
      for column in self.columns_categorical:
          le = LabelEncoder()
          self.df[column] = le.fit_transform(self.df[column].values)
          mapping = dict(zip(le.classes_, range(len(le.classes_))))
          encoder_dict[column] = mapping
      #print(X)

      # Invert the boolean values in the list using list comprehension
      self.numerical_features = [not feature for feature in self.cat_features]
      # Use the inverted list to select numerical columns
      self.columns_numerical = self.df.columns[self.numerical_features]

      # int columns to float, otherwise gower has problems
      for column in self.columns_numerical:
          self.df[column] = self.df[column].astype(float)

      return self.attribute_types, self.cat_features, self.numerical_features


  def create_group_column(self):
      """
      Create a 'Group' column in the DataFrame based on protected attributes, privileged/unprivileged conditions, and label.
      """
      group_combinations = pd.MultiIndex.from_product([self.df[sensitive].unique() for sensitive in self.sensitive] + [self.df[self.label].unique()], names=self.sensitive + [self.label])
      #print(group_combinations)
      print(list(enumerate(group_combinations)))
      # Create a mapping between group combinations and their corresponding numbers
      group_mapping = {group: idx for idx, group in enumerate(group_combinations)}
      reverse_group_mapping = {idx: group for group, idx in group_mapping.items()}  # Create reverse mapping
      # Apply the mapping to create a new column in the DataFrame
      self.df['Group'] = pd.MultiIndex.from_frame(self.df[self.sensitive + [self.label]]).map(group_mapping)

      return reverse_group_mapping #enumerate(group_combinations)


  def train_test_split(self):
      #stratific based on the group
      X = self.df.loc[:, self.df.columns != self.label]
      y = self.df[self.label]

      self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, shuffle = True, stratify = self.df['Group']) #, random_state=42
      #print(self.X_train, self.y_train, self.X_test, self.y_test)
      #print(type(self.y_train))
      return self.X_train, self.y_train, self.X_test, self.y_test

  def standardization_numerical(self):
      # Select only the numerical columns
      train_dataset_numerical = self.X_train[self.columns_numerical]
      test_dataset_numerical = self.X_test[self.columns_numerical]

      # Create a StandardScaler instance and fit it to the training dataset
      scaler = StandardScaler().fit(train_dataset_numerical)

      # Transform the numerical features in the training and test datasets using the trained scaler
      train_dataset_scaled_numerical = scaler.transform(train_dataset_numerical)
      test_dataset_scaled_numerical = scaler.transform(test_dataset_numerical)

      # Update the training and test datasets with the scaled numerical features
      self.X_train[self.columns_numerical] = train_dataset_scaled_numerical
      self.X_test[self.columns_numerical] = test_dataset_scaled_numerical

      #concatenate X_train with y_train, and X_test with y_test
      self.X_train = pd.concat([self.X_train, self.y_train], axis=1)
      self.X_test = pd.concat([self.X_test, self.y_test], axis=1)
      return self.X_train, self.X_test

  def prepare(self):
      """
      Perform all preprocessing steps.
      """
      self.detect_missing_values()
      self.binary_label()
      self.find_categorical_attributes()
      self.create_group_column()
      self.train_test_split()
      self.standardization_numerical()
      return self

In [121]:
#use the DataPreparation class to preprocess the dataframe
data_prep = DataPreparation(df, ['personal_status','age'], 'credit', [1, 1], [0, 0], 1, 2)
data_prep.prepare()

# use this reset index to have indices from 0 to len(df)
data_prep.df = data_prep.df.reset_index(drop=True)
print(data_prep.df)

No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
     status  month  credit_history  purpose  credit_amount  savings  \
0         0    6.0               4        4         1169.0        4   
1         1   48.0               2        4         5951.0        0   
2         3   12.0               4        7         2096.0        0   
3         0   42.0               2        3         7882.0        0   
4         0   24.0               3        0         4870.0        0   
..      ...    ...             ...      ...            ...      ...   
995       3   12.0               2        3         1736.0        0   
996       0   30.0               2        1         3857.0        0   
997       3   12.0               2        4          804.0        0   
998       0   45.0               2        4         1845.0        0   
999       1   

In [122]:
X_train, X_test = data_prep.X_train, data_prep.X_test

In [123]:
# Count instances from each group in X_train
group_counts_train = X_train['Group'].value_counts().sort_index()

# Count instances from each group in X_test
group_counts_test = X_test['Group'].value_counts().sort_index()

print("Group counts in X_train:")
print(group_counts_train)

print("\nGroup counts in X_test:")
print(group_counts_test)

Group counts in X_train:
Group
0    321
1    116
2     28
3     18
4    107
5     51
6     34
7     25
Name: count, dtype: int64

Group counts in X_test:
Group
0    138
1     50
2     12
3      7
4     46
5     22
6     14
7     11
Name: count, dtype: int64


In [124]:
total_ratio = (X_train['credit'] == 1).sum() / (X_train['credit'] == 0).sum()

In [125]:
attribute_types = data_prep.attribute_types
print(attribute_types)

{'status': 'Categorical', 'month': 'Numerical', 'credit_history': 'Categorical', 'purpose': 'Categorical', 'credit_amount': 'Numerical', 'savings': 'Categorical', 'employment': 'Categorical', 'investment_as_income_percentage': 'Numerical', 'personal_status': 'Categorical', 'other_debtors': 'Categorical', 'residence_since': 'Numerical', 'property': 'Categorical', 'age': 'Categorical', 'installment_plans': 'Categorical', 'housing': 'Categorical', 'number_of_credits': 'Numerical', 'skill_level': 'Categorical', 'people_liable_for': 'Categorical', 'telephone': 'Categorical', 'foreign_worker': 'Categorical', 'credit': 'Categorical'}


In [126]:
cat_features = data_prep.cat_features
numerical_features = data_prep.numerical_features
reverse_group_mapping = data_prep.create_group_column()
theoretical_num_groups = len(reverse_group_mapping)

[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]




---


---

DISTANCES \ only Gower, for now

In [127]:
!pip install gower
import gower



In [128]:
def gower_distance(df_train, cat_features_boolean, n=5):
    """
    ndary: ndarray, that if the dataframe X precedentemente converted to numpy, it has all columns beside the Group one (yes, it also has
          the label column)
    cat_features: array of T/F where True = the attribute is categorical, False = the attribute is numerical
    n = number or closest neighbors
    """
    X = df_train
    cat_features = cat_features_boolean
    # n+1 because in the matrix with indices, at the first position there's the point itself
    n = n + 1

    # compute the matrix of the distances using Gower
    matrix_dist_gower = gower.gower_matrix(X, cat_features=cat_features)

    # compute the matrix with top-n neighbors and the distance values
    num_rows_gower = X.shape[0]

    # Initialize a list to store the results
    matrix_neighbors_gower = []

    # Mapping between original indices and indices between 0 and 699
    index_mapping = {original_index: i for i, original_index in enumerate(X.index)}
    print(index_mapping)
    reverse_index_mapping = {v: k for k, v in index_mapping.items()}
    print(reverse_index_mapping)

    # Loop through each row
    for i in range(num_rows_gower):
        top_n_neighbors_gower = gower.gower_topn(X.iloc[i:i + 1, :], X.iloc[:, :], cat_features=cat_features, n=n)
        #mapped_neighbors = [{'index': index_mapping[item['index']], 'values': item['values']} for item in top_n_neighbors_gower]
        #print(top_n_neighbors_gower)
        matrix_neighbors_gower.append(top_n_neighbors_gower)

    # Convert the list of dictionaries to a matrix
    matrix_neighbors_gower_index = np.array([item['index'] for item in matrix_neighbors_gower])
    matrix_neighbors_gower_values = np.array([item['values'] for item in matrix_neighbors_gower])
    print(matrix_neighbors_gower_index)

    # Initialize a list to store the mapped indices
    mapped_matrix_neighbors_gower_index = []

    # Loop through each row
    for row in matrix_neighbors_gower_index:
        # Map the indices in the current row to their original values
        mapped_row = np.array([reverse_index_mapping[idx] for idx in row])
        #print(mapped_row)
        # Append the mapped row to the list
        mapped_matrix_neighbors_gower_index.append(mapped_row)

    # Convert the list of mapped rows to a NumPy array
    mapped_matrix_neighbors_gower_index = np.array(mapped_matrix_neighbors_gower_index)
    #print(mapped_matrix_neighbors_gower_index)

    return mapped_matrix_neighbors_gower_index, matrix_neighbors_gower_values


In [129]:
## Save the 'Group' column from X_train
subgroup_column_train = X_train['Group']
subgroup_column_test = X_test['Group']

# Drop the 'Group' column from X_train
X_train = X_train.drop(columns=['Group'])
X_test = X_test.drop(columns=['Group'])

In [130]:
#NOTE: I DO NOT NEED CLOSEST_VALUES_GOWER, I DO NOT NEED TO SAVE TWICE THE MATRIX WITH INDICES (ONE TIME WITH 6 COLUMNS THE SECOND TIME WITH 5 COLUMNS)
# Now you can compute distances or perform any other operations on X_train without the 'Group' column
closest_index_gower, closest_values_gower = gower_distance(X_train, cat_features, 5)
print(closest_index_gower,'\n', closest_values_gower)  #'\n', closest_values_gower)

{188: 0, 292: 1, 282: 2, 40: 3, 45: 4, 307: 5, 989: 6, 58: 7, 384: 8, 142: 9, 703: 10, 667: 11, 186: 12, 366: 13, 698: 14, 785: 15, 693: 16, 619: 17, 145: 18, 546: 19, 591: 20, 261: 21, 228: 22, 147: 23, 368: 24, 210: 25, 84: 26, 397: 27, 861: 28, 486: 29, 27: 30, 932: 31, 499: 32, 858: 33, 197: 34, 840: 35, 345: 36, 42: 37, 649: 38, 657: 39, 63: 40, 484: 41, 747: 42, 511: 43, 306: 44, 95: 45, 51: 46, 89: 47, 802: 48, 225: 49, 866: 50, 181: 51, 782: 52, 38: 53, 943: 54, 722: 55, 521: 56, 192: 57, 791: 58, 363: 59, 512: 60, 539: 61, 947: 62, 633: 63, 630: 64, 931: 65, 927: 66, 697: 67, 871: 68, 867: 69, 503: 70, 154: 71, 189: 72, 623: 73, 226: 74, 873: 75, 692: 76, 236: 77, 61: 78, 800: 79, 349: 80, 67: 81, 715: 82, 229: 83, 707: 84, 909: 85, 964: 86, 981: 87, 17: 88, 393: 89, 553: 90, 46: 91, 864: 92, 13: 93, 614: 94, 594: 95, 463: 96, 544: 97, 429: 98, 711: 99, 991: 100, 29: 101, 930: 102, 491: 103, 243: 104, 289: 105, 33: 106, 833: 107, 190: 108, 163: 109, 48: 110, 387: 111, 108: 112



---





---


---

OVERSAMPLE \
NO taxonomy

In [131]:
def custom_smote_notax(X_train, closest_index_gower, cat_features, pu_ix, nu_ix, group_column_train):
    """
    X_train is the training dataset preprocessed, subgroup_column_train is a column containing the subgroup of each
    instance in X_train
    """
    cat_attr_ix = [i for i, value in enumerate(cat_features) if value]
    # Separate minority and majority class points
    #X0 = X_train[group_column_train == 0].values
    #X1 = X_train[group_column_train == 1].values
    X2_df = X_train[group_column_train == pu_ix]
    X2 = X2_df.values
    X3_df = X_train[group_column_train == nu_ix]
    X3 = X3_df.values
    #print(X2_df)
    #PP = len(X0)
    #NP = len(X1)
    PU = len(X2)
    NU = len(X3)

    if (PU / NU) > total_ratio:
        oversampling_target =  (PU / total_ratio) - NU # Oversample NU
        print(oversampling_target)
        os_df = X3_df
        os_ix = nu_ix
    else:
        oversampling_target = (total_ratio * NU)  - PU # Oversample PU
        print(oversampling_target)
        os_df = X2_df
        os_ix = pu_ix
    print(os_ix, 'os_ix')

    #os_df = X2_df
    #os_ix = pu_ix

    # Initialize synthetic samples list
    synthetic_samples = []

    while len(synthetic_samples) < oversampling_target:
        #this is the index of the point
        idx_A = random.choice(os_df.index)
        #print(idx_A)
        point_A = os_df.loc[idx_A]
        #print(point_A, 'punto A')

        nearest_neighbors = np.where(closest_index_gower[:, 0] == idx_A)[0][0]
        nearest_neighbors_indices = closest_index_gower[nearest_neighbors, 1:]
        #print(nearest_neighbors_indices, 'NN ind')

        same_subgroup_indices = [idx for idx in nearest_neighbors_indices if group_column_train.loc[idx] == os_ix]
        #print(same_subgroup_indices)
        if len(same_subgroup_indices) == 0:
            continue

        idx_B = np.random.choice(same_subgroup_indices)
        #print(idx_B, 'indice B')
        point_B = X_train.loc[idx_B]
        #print(point_B, 'point B')

        synthetic_point = np.zeros(len(point_A))
        for i in range(len(point_A)):
            if i in cat_attr_ix:
                neighbor_values = [X_train.loc[idx, X_train.columns[i]] for idx in same_subgroup_indices]
                #print(neighbor_values, 'neig values')
                synthetic_point[i] = max(set(neighbor_values), key=neighbor_values.count)
                #print(synthetic_point[i])
            else:
                epsilon = 1e-10
                alpha = np.random.rand() * (1 - 2 * epsilon) + epsilon
                synthetic_point[i] = point_A[i] + alpha * (point_B[i] - point_A[i])

        synthetic_samples.append(synthetic_point)

    synthetic_samples = np.array(synthetic_samples)

    return synthetic_samples, len(synthetic_samples), os_ix


In [132]:
def oversample_groups(X_train, closest_index_gower, cat_features, custom_smote, group_pairs, group_column_train):
    synthetic_samples = []
    synthetic_samples_group = []
    oversample_method = custom_smote
    subgroup_column_train = group_column_train

    for pu, nu in group_pairs:
        synthetic_points, synthetic_count, os_ix = oversample_method(X_train, closest_index_gower, cat_features, pu, nu, subgroup_column_train)
        # Add a column of 'pu' values to synthetic_points
        pu_column = np.full((len(synthetic_points), 1), os_ix)
        synthetic_samples.append(synthetic_points)
        synthetic_samples_group.append(pu_column)
        print(f"Oversampling for groups {pu} (positive) and {nu} (negative): Added {synthetic_count} synthetic samples in group {os_ix}.")

    # Concatenate all synthetic samples into a single matrix
    synthetic_samples_matrix = np.vstack(synthetic_samples)
    synthetic_samples_group = np.vstack(synthetic_samples_group).flatten()

    return synthetic_samples_matrix, synthetic_samples_group

In [133]:
group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)] #(0, 1),

#matrix containing all the new synthetic samples, columns containing their subgroup class label
synthetic_samples_matrix_notax, synthetic_samples_group_notax = oversample_groups(X_train, closest_index_gower, cat_features, custom_smote_notax, group_pairs, subgroup_column_train)

21.571428571428555
1 os_ix
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
14.0
2 os_ix
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
12.000000000000014
4 os_ix
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
24.333333333333336
6 os_ix
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.


In [134]:
X_train_resampled_notax = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_notax, columns=X_train.columns)], ignore_index=True)
#closest_index_gower_resampled, closest_values_gower_resampled = gower_distance(X_train_resampled_notax, cat_features, 5)
#print(closest_index_gower_resampled, 'closest index gower resampled')
subgroup_column_resampled_notax = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_notax)], ignore_index=True)
#taxonomy_df_resampled = compute_taxonomy_df(X_train_resampled_notax, subgroup_column_resampled_notax, closest_index_gower_resampled)
#print(taxonomy_df_resampled, 'taxonomy resampled')



---


---

OVERSAMPLE \
taxonomy

In [135]:
def custom_smote_tax(X_train, closest_index_gower, cat_features, pu_ix, nu_ix, group_column_train, s=0.1, b=0.8, r=0.1, o=0):
    """
    X_train is the training dataset preprocessed, subgroup_column_train is a column containing the subgroup of each
    instance in X_train
    """
    cat_attr_ix = [i for i, value in enumerate(cat_features) if value]

    #[0, 0.4, 0.6] [0, 0.5, 0.5] [0.33,0.33,0.33] [0.2, 0.7, 0.1] [0.1, 0.8, 0.1]
    # Separate minority and majority class points
    #X0 = X_train[group_column_train == 0].values
    #X1 = X_train[group_column_train == 1].values
    X2_df = X_train[group_column_train == pu_ix]
    X2 = X2_df.values
    X3_df = X_train[group_column_train == nu_ix]
    X3 = X3_df.values

    #PP = len(X0)
    #NP = len(X1)
    PU = len(X2)
    NU = len(X3)

    if (PU / NU) > total_ratio:
        oversampling_target =  (PU / total_ratio) - NU # Oversample NU
        #print(oversampling_target)
        os_df = X3_df
        os_ix = nu_ix
    else:
        oversampling_target = (total_ratio * NU)  - PU # Oversample PU
        #print(oversampling_target)
        os_df = X2_df
        os_ix = pu_ix
    #print(os_ix, 'os_ix')


    #ratio as many as the most priviledged group
    #oversampling_target = (NU * PP) / NP - PU

    #os_df = X2_df
    #os_ix = pu_ix
    # Calculate weights for taxonomy
    weights = calculate_weights_taxonomy(os_df, closest_index_gower, X_train, group_column_train, s, b, r, o)
    #normalize so that the probabilities sum to one
    weights /= np.sum(weights)

    # Initialize synthetic samples list
    synthetic_samples = []

    while len(synthetic_samples) < oversampling_target:
        #this is the index of the point
        idx_A = np.random.choice(os_df.index, p=weights)
        #print(idx_A)
        point_A = os_df.loc[idx_A]
        #print(point_A, 'punto A')
        nearest_neighbors = np.where(closest_index_gower[:, 0] == idx_A)[0][0]
        nearest_neighbors_indices = closest_index_gower[nearest_neighbors, 1:]
        same_subgroup_indices = [idx for idx in nearest_neighbors_indices if group_column_train.loc[idx] == os_ix]
        #print(same_subgroup_indices)

        if len(same_subgroup_indices) == 0:
            continue

        idx_B = np.random.choice(same_subgroup_indices)
        #print(idx_B, 'indice B')
        point_B = X_train.loc[idx_B]
        #print(point_B, 'point B')

        synthetic_point = np.zeros(len(point_A))
        for i in range(len(point_A)):
            if i in cat_attr_ix:
                neighbor_values = [X_train.loc[idx, X_train.columns[i]] for idx in same_subgroup_indices]
                #print(neighbor_values, 'neig values')
                synthetic_point[i] = max(set(neighbor_values), key=neighbor_values.count)
                #print(synthetic_point[i])
            else:
                epsilon = 1e-10
                alpha = np.random.rand() * (1 - 2 * epsilon) + epsilon
                synthetic_point[i] = point_A[i] + alpha * (point_B[i] - point_A[i])

        synthetic_samples.append(synthetic_point)

    synthetic_samples = np.array(synthetic_samples)

    return synthetic_samples, len(synthetic_samples), os_ix

def calculate_weights_taxonomy(X2_df, closest_index_gower, X_train, group_column_train, s, b, r, o):
    weights = np.zeros(len(X2_df))

    for ix in range(0, len(X2_df)):
        #print(X2_df, 'X2_df')
        idx_complete_df = X2_df.index[ix]
        #print(idx_complete_df, 'indice nel dataset completo')
        nearest_neighbors = np.where(closest_index_gower[:,0] == idx_complete_df)[0][0]
        nearest_neighbors_indices = closest_index_gower[nearest_neighbors, 1:]
        #print(nearest_neighbors_indices, 'NN speriamo')

        same_subgroup_indices = [idx for idx in nearest_neighbors_indices if group_column_train.loc[idx] == group_column_train.loc[idx_complete_df]]
        #print(same_subgroup_indices)

        if len(same_subgroup_indices) == 5 or len(same_subgroup_indices) == 4:
            weights[ix] = s
        elif len(same_subgroup_indices) == 3 or len(same_subgroup_indices) == 2:
            weights[ix] = b
        elif len(same_subgroup_indices) == 1:
            solo_neighbor = same_subgroup_indices[0]
            #print(solo_neighbor, 'solo neigh indx')
            solo_neighbor_row = np.where(closest_index_gower[:, 0] == solo_neighbor)[0][0]
            #print(solo_neighbor_row, 'rowwww')
            neighbors_of_solo_neighbor = closest_index_gower[solo_neighbor_row, 1:]
            #print(neighbors_of_solo_neighbor, 'neigh solo')


            same_class_neighbors = sum(
                1 for neighbor_index in neighbors_of_solo_neighbor if group_column_train.loc[neighbor_index] == group_column_train.loc[idx_complete_df]
            )
            #print(same_class_neighbors, 'same class nei')

            if same_class_neighbors == 0 or same_class_neighbors == 1:
                weights[ix] = r
            else:
                weights[ix] = b
        elif len(same_subgroup_indices) == 0:
            weights[ix] = o
        else:
            weights[ix] = b

    return weights

In [136]:
group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)] #(0, 1),

#matrix containing all the new synthetic samples, columns containing their subgroup class label
synthetic_samples_matrix_tax, synthetic_samples_group_tax = oversample_groups(X_train, closest_index_gower, cat_features, custom_smote_tax, group_pairs, subgroup_column_train)

Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.


In [137]:
X_train_resampled_tax = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_tax, columns=X_train.columns)], ignore_index=True)
#closest_index_gower_resampled, closest_values_gower_resampled = gower_distance(X_train_resampled_tax, cat_features, 5)
#print(closest_index_gower_resampled)
subgroup_column_resampled_tax = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_tax)], ignore_index=True)
#taxonomy_df_resampled = compute_taxonomy_df(X_train_resampled_tax, subgroup_column_resampled_tax, closest_index_gower_resampled)
#print(taxonomy_df_resampled, 'taxonomy resampled')



---



---

OVERSAMPLE - USING SMOTENC

In [138]:
def custom_smotenc(X_train, closest_index_gower, cat_features, pu_ix, nu_ix, group_column_train):
    """
    X_train is the training dataset preprocessed, subgroup_column_train is a column containing the subgroup of each
    instance in X_train
    """
    cat_attr_ix = [i for i, value in enumerate(cat_features) if value]
    X2_df = X_train[group_column_train == pu_ix]
    X2 = X2_df.values
    X2_df = X2_df.reset_index(drop=True)  # Reset indices

    X3_df = X_train[group_column_train == nu_ix]
    X3 = X3_df.values
    X3_df = X3_df.reset_index(drop=True)  # Reset indices
    PU = len(X2)
    NU = len(X3)

    if (PU / NU) > total_ratio:
        oversampling_target =  (PU / total_ratio) - NU # Oversample NU
        os_df = X3_df
        os_ix = nu_ix
    else:
        oversampling_target = (total_ratio * NU)  - PU # Oversample PU
        os_df = X2_df
        os_ix = pu_ix

    synthetic_samples = []

    while len(synthetic_samples) < oversampling_target:
        idx_A = random.choice(os_df.index)
        print(idx_A, 'indice A')
        point_A = os_df.loc[idx_A]
        #print(point_A, 'punto A')

        # Compute 5 nearest neighbors of point_A
        nn_model = NearestNeighbors(n_neighbors=6)  # 6 to include itself
        nn_model.fit(os_df)
        dists, nn_indices = nn_model.kneighbors([point_A])
        #print(dists, nn_indices, 'distanze e indici')

        # Exclude the first column (index 0) from both arrays
        dists = dists[:, 1:]
        nn_indices = nn_indices[:, 1:]

        # Select a random point between the 5 nearest neighbors
        idx_B = random.choice(nn_indices[0][1:])
        print(idx_B, 'indice B')
        point_B = os_df.loc[idx_B]


        synthetic_point = np.zeros(len(point_A))
        for i in range(len(point_A)):
            if i in cat_attr_ix:  # Categorical feature
                neighbor_values = [os_df.iloc[idx][i] for idx in nn_indices[0][1:]]
                synthetic_point[i] = max(set(neighbor_values), key=neighbor_values.count)
            else:  # Numerical feature
                alpha = np.random.rand()
                synthetic_point[i] = point_A[i] + alpha * (point_B[i] - point_A[i])

        synthetic_samples.append(synthetic_point)

    synthetic_samples = np.array(synthetic_samples)

    return synthetic_samples, len(synthetic_samples), os_ix


In [139]:
group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)] #(0, 1),

#matrix containing all the new synthetic samples, columns containing their subgroup class label
synthetic_samples_matrix_smotenc, synthetic_samples_group_smotenc = oversample_groups(X_train, closest_index_gower, cat_features, custom_smotenc, group_pairs, subgroup_column_train)

113 indice A
22 indice B
110 indice A
16 indice B
32 indice A
70 indice B
51 indice A
113 indice B
9 indice A
43 indice B
36 indice A
69 indice B
76 indice A
97 indice B
5 indice A
115 indice B
100 indice A
2 indice B
6 indice A
101 indice B
87 indice A
34 indice B
83 indice A
56 indice B
92 indice A
113 indice B
100 indice A
72 indice B
29 indice A
37 indice B
29 indice A




37 indice B
81 indice A
101 indice B
59 indice A
22 indice B
54 indice A
0 indice B
21 indice A
23 indice B
103 indice A
44 indice B
28 indice A
2 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
13 indice A
14 indice B
22 indice A
7 indice B
10 indice A
14 indice B
4 indice A
5 indice B
5 indice A
10 indice B
16 indice A
9 indice B
1 indice A
22 indice B
16 indice A
9 indice B
8 indice A
27 indice B
16 indice A
14 indice B
26 indice A




10 indice B
13 indice A
17 indice B
11 indice A
9 indice B
20 indice A
23 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
101 indice A
14 indice B
83 indice A
34 indice B
52 indice A
48 indice B
65 indice A
8 indice B
23 indice A
45 indice B
1 indice A
44 indice B
93 indice A
40 indice B
13 indice A
31 indice B
62 indice A
8 indice B
94 indice A




6 indice B
39 indice A
80 indice B
74 indice A
30 indice B
32 indice A
20 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
13 indice A
6 indice B
27 indice A
7 indice B




6 indice A
16 indice B
20 indice A
31 indice B
9 indice A
15 indice B
22 indice A
17 indice B
30 indice A
25 indice B
3 indice A
18 indice B
5 indice A
31 indice B
31 indice A
3 indice B
6 indice A
29 indice B
16 indice A
6 indice B
11 indice A




19 indice B
20 indice A
3 indice B
21 indice A
31 indice B
3 indice A
20 indice B
1 indice A
15 indice B
30 indice A
21 indice B
10 indice A
13 indice B
7 indice A
2 indice B
4 indice A
31 indice B
2 indice A
29 indice B
7 indice A
31 indice B
19 indice A
9 indice B
16 indice A
6 indice B




Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.


In [140]:
X_train_resampled_smotenc = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_smotenc, columns=X_train.columns)], ignore_index=True)
#closest_index_gower_resampled, closest_values_gower_resampled = gower_distance(X_train_resampled_tax, cat_features, 5)
#print(closest_index_gower_resampled)
subgroup_column_resampled_smotenc = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_smotenc)], ignore_index=True)
#taxonomy_df_resampled = compute_taxonomy_df(X_train_resampled_tax, subgroup_column_resampled_tax, closest_index_gower_resampled)
#print(taxonomy_df_resampled, 'taxonomy resampled')



---



---

TRAIN A CLASSIFIER (LOGREG)

In [141]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics import utils
from sklearn.metrics import balanced_accuracy_score, f1_score, precision_score, recall_score
from aif360.algorithms.preprocessing import *
from aif360.algorithms.preprocessing.optim_preproc_helpers import distortion_functions, opt_tools
from aif360.algorithms.inprocessing import *
from aif360.algorithms.postprocessing import *
import tensorflow
import math

In [142]:
def evaluate(test_data, pred, priv_group, unpriv_group):
    """
    Return the performance and fairness results based on the original data and the predictions.

    test_data : (BinaryLabelDataset) ground truth labels
    pred : (BinaryLabelDataset) predicted labels
    priv_group : (list(dict)) representation of the privileged group
    unpriv_group : (list(dict)) representation of the unprivileged group
    """
    #Evaluation of the model
    cm = ClassificationMetric(test_data, pred, unprivileged_groups=unpriv_group, privileged_groups=priv_group)
    dm = BinaryLabelDatasetMetric(pred, unprivileged_groups=unpriv_group, privileged_groups=priv_group)

    # priv_cond = utils.compute_boolean_conditioning_vector(
    #                         test_data.protected_attributes,
    #                         test_data.protected_attribute_names,
    #                         condition=priv_group)
    # unpriv_cond = utils.compute_boolean_conditioning_vector(
    #                         test_data.protected_attributes,
    #                         test_data.protected_attribute_names,
    #                         condition=unpriv_group)
    #print((priv_cond), (unpriv_cond))
    #print(priv_group)
    # privs_orig = test_data.labels[priv_cond]
    # unprivs_orig = test_data.labels[unpriv_cond]
    # privs_pred = pred.labels[priv_cond]
    # unprivs_pred = pred.labels[unpriv_cond]

    # Concatenate privs_orig and unprivs_orig
    # all_orig_labels = np.concatenate([privs_orig, unprivs_orig])

    # # Concatenate privs_pred and unprivs_pred
    # all_pred_labels = np.concatenate([privs_pred, unprivs_pred])
    # print(len(privs_orig), len(unprivs_orig), len(privs_pred), len(unprivs_pred))

    measure_scores = {
        'b_acc': balanced_accuracy_score(test_data.labels, pred.labels),
        'acc' : cm.accuracy(),
        'precision': precision_score(test_data.labels, pred.labels),
        'recall': recall_score(test_data.labels, pred.labels),
        'f1': f1_score(test_data.labels, pred.labels),
        'SP' : dm.disparate_impact(),
        'AOD' : cm.average_odds_difference(),
        'TPR': (cm.true_positive_rate_difference()),
        'FPR': (cm.false_positive_rate_difference()),
        'C'  : dm.consistency()[0],
        'TNR': (cm.true_negative_rate(privileged=True)-cm.true_negative_rate(privileged=False)),
        'FNR': cm.false_negative_rate_difference(),
        'PPV': (cm.positive_predictive_value(privileged=True)-cm.positive_predictive_value(privileged=False)),
        'FDR': cm.false_discovery_rate_difference(),
        'confmat' : cm.binary_confusion_matrix()
    }

    measure_names = ['b_acc', 'acc', 'precision', 'recall', 'f1', 'SP', 'AOD', 'TPR', 'FPR', 'C', 'TNR', 'FNR', 'PPV', 'FDR', 'confmat']
    results = [measure_scores[elem] for elem in measure_names]

    return results


In [143]:
def evaluate_model_performance(X_train, X_test, privileged_groups, unprivileged_groups, weights=None):
    # Define the protected attributes
    protected_attributes = ['personal_status', 'age']

    # If weights is not provided, create an array of ones with the same length as X_train
    if weights is None:
        weights = np.ones(len(X_train))

    # Define the label and favorable/unfavorable labels
    label_name = 'credit'
    favorable_label = 1
    unfavorable_label = 0

    # Create BinaryLabelDataset for training and testing data
    binary_ds_train = BinaryLabelDataset(df=X_train, label_names=[label_name],
                                         protected_attribute_names=protected_attributes,
                                         favorable_label=favorable_label, unfavorable_label=unfavorable_label)
    binary_ds_test = BinaryLabelDataset(df=X_test, label_names=[label_name],
                                        protected_attribute_names=protected_attributes,
                                        favorable_label=favorable_label, unfavorable_label=unfavorable_label)

    # Calculate metrics for original dataset
    metric_orig_train = BinaryLabelDatasetMetric(binary_ds_train,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    print("Disparate impact (of original labels) = %f" % metric_orig_train.disparate_impact())
    print("Difference in statistical parity (of original labels) = %f"
          % metric_orig_train.statistical_parity_difference())
    print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())

     # Calculate metrics for original dataset
    metric_orig_test = BinaryLabelDatasetMetric(binary_ds_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    print("Disparate impact (of original labels) (test) = %f" % metric_orig_test.disparate_impact())
    print("Difference in statistical parity (of original labels) (test) = %f"
          % metric_orig_test.statistical_parity_difference())
    print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())

    # Train a logistic regression model
    logreg = LogisticRegression()
    logreg.fit(X_train.drop(columns= [label_name]), X_train[label_name], sample_weight=weights) #columns= [label_name] + protected_attributes

    # Test the model on the test dataset
    result_LR = logreg.predict(X_test.drop(columns= [label_name])) #columns= [label_name] + protected_attributes

    # Display the confusion matrix
    print(confusion_matrix(result_LR, X_test[label_name]))

    # Create BinaryLabelDataset for predicted test data
    X_test_with_label_predict = pd.concat([X_test.drop(columns=[label_name]), pd.Series(result_LR, name=label_name,
                                                                                       index=X_test.index)], axis=1)
    binary_ds_test_pred = BinaryLabelDataset(df=X_test_with_label_predict, label_names=[label_name],
                                             protected_attribute_names=protected_attributes,
                                             favorable_label=favorable_label, unfavorable_label=unfavorable_label)

    # Evaluate model performance using evaluate method
    results = evaluate(binary_ds_test, binary_ds_test_pred, priv_group=privileged_groups, unpriv_group=unprivileged_groups)

    return results




---



---

Dataset original

In [144]:
# Define privileged and unprivileged groups
privileged_groups = [{'personal_status': 1, 'age': 1}]
unprivileged_groups = [{'personal_status': 0, 'age': 0}]

# Evaluate model performance
results_orig = evaluate_model_performance(X_train, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.752571
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 43  24]
 [ 47 186]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())




---



---

Dataset resampled no tax

In [145]:
# Evaluate model performance
results_notax = evaluate_model_performance(X_train_resampled_notax, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.770026
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 37  20]
 [ 53 190]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())




---



---

Dataset resampled tax

In [146]:
# Evaluate model performance
#[0, 0.4, 0.6] [0, 0.5, 0.5] [0.33,0.33,0.33] [0.2, 0.7, 0.1] [0.1, 0.8, 0.1]
results_tax = evaluate_model_performance(X_train_resampled_tax, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.772610
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 37  21]
 [ 53 189]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())




---



---

Dataset resampled SMOTENC


In [147]:
# Evaluate model performance
results_smotenc = evaluate_model_performance(X_train_resampled_smotenc, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.762791
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 35  22]
 [ 55 188]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())





---


---


---

OTHER MITIGATION ALGORITHMS - FAIR-SMOTE, REWEIGHING, GERRYFAIR, REMEDY



---


---


---

Fair-SMOTE

In [148]:
!git clone https://github.com/joymallyac/Fair-SMOTE.git

fatal: destination path 'Fair-SMOTE' already exists and is not an empty directory.


In [149]:
import sys
# Define the new list of directories
new_path = ['/content/Fair-SMOTE']  # Adjust the path as needed

# Replace sys.path with the new list
sys.path = new_path
print(sys.path)

from Generate_Samples import generate_samples
from Measure import measure_final_score, calculate_recall, calculate_far, calculate_precision, calculate_accuracy
from SMOTE import smote

['/content/Fair-SMOTE']


In [150]:
def oversample_fair_smote(X_train, group_counts_train, protected_attribute1, protected_attribute2):
    # Extracting group counts
    zero_zero_zero = group_counts_train[7]
    zero_zero_one = group_counts_train[6]
    zero_one_zero = group_counts_train[5]
    zero_one_one = group_counts_train[4]
    one_zero_zero = group_counts_train[3]
    one_zero_one = group_counts_train[2]
    one_one_zero = group_counts_train[1]
    one_one_one = group_counts_train[0]

    # Finding maximum
    maximum = max(zero_zero_zero, zero_zero_one, zero_one_zero, zero_one_one, one_zero_zero, one_zero_one, one_one_zero, one_one_one)
    print(f"Maximum count: {maximum}")

    # Printing which group has maximum count
    if maximum == zero_zero_zero:
        print("zero_zero_zero is maximum")
    elif maximum == zero_zero_one:
        print("zero_zero_one is maximum")
    elif maximum == zero_one_zero:
        print("zero_one_zero is maximum")
    elif maximum == zero_one_one:
        print("zero_one_one is maximum")
    elif maximum == one_zero_zero:
        print("one_zero_zero is maximum")
    elif maximum == one_zero_one:
        print("one_zero_one is maximum")
    elif maximum == one_one_zero:
        print("one_one_zero is maximum")
    elif maximum == one_one_one:
        print("one_one_one is maximum")

    # Calculating number of samples to be increased for each group
    zero_zero_zero_to_be_increased = maximum - zero_zero_zero
    zero_zero_one_to_be_increased = maximum - zero_zero_one
    zero_one_zero_to_be_increased = maximum - zero_one_zero
    zero_one_one_to_be_increased = maximum - zero_one_one
    one_zero_zero_to_be_increased = maximum - one_zero_zero
    one_zero_one_to_be_increased = maximum - one_zero_one
    one_one_zero_to_be_increased = maximum - one_one_zero
    one_one_one_to_be_increased = maximum - one_one_one

    print(f"Counts to be increased for each group:")
    print(f"zero_zero_zero: {zero_zero_zero_to_be_increased}")
    print(f"zero_zero_one: {zero_zero_one_to_be_increased}")
    print(f"zero_one_zero: {zero_one_zero_to_be_increased}")
    print(f"zero_one_one: {zero_one_one_to_be_increased}")
    print(f"one_zero_zero: {one_zero_zero_to_be_increased}")
    print(f"one_zero_one: {one_zero_one_to_be_increased}")
    print(f"one_one_zero: {one_one_zero_to_be_increased}")
    print(f"one_one_one: {one_one_one_to_be_increased}")

    # Filtering dataframes based on class and protected attributes
    df_zero_zero_zero = X_train[(X_train['credit'] == 0) & (X_train[protected_attribute1] == 0) & (X_train[protected_attribute2] == 0)]
    df_zero_zero_one = X_train[(X_train['credit'] == 1) & (X_train[protected_attribute1] == 0) & (X_train[protected_attribute2] == 0)]
    df_zero_one_zero = X_train[(X_train['credit'] == 0) & (X_train[protected_attribute1] == 0) & (X_train[protected_attribute2] == 1)]
    df_zero_one_one = X_train[(X_train['credit'] == 1) & (X_train[protected_attribute1] == 0) & (X_train[protected_attribute2] == 1)]
    df_one_zero_zero = X_train[(X_train['credit'] == 0) & (X_train[protected_attribute1] == 1) & (X_train[protected_attribute2] == 0)]
    df_one_zero_one = X_train[(X_train['credit'] == 1) & (X_train[protected_attribute1] == 1) & (X_train[protected_attribute2] == 0)]
    df_one_one_zero = X_train[(X_train['credit'] == 0) & (X_train[protected_attribute1] == 1) & (X_train[protected_attribute2] == 1)]
    df_one_one_one = X_train[(X_train['credit'] == 1) & (X_train[protected_attribute1] == 1) & (X_train[protected_attribute2] == 1)]

    df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
    df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)

    df_zero_zero_one['personal_status'] = df_zero_zero_one['personal_status'].astype(str)
    df_zero_zero_one['age'] = df_zero_zero_one['age'].astype(str)

    df_zero_one_zero['personal_status'] = df_zero_one_zero['personal_status'].astype(str)
    df_zero_one_zero['age'] = df_zero_one_zero['age'].astype(str)

    df_zero_one_one['personal_status'] = df_zero_one_one['personal_status'].astype(str)
    df_zero_one_one['age'] = df_zero_one_one['age'].astype(str)

    df_one_zero_zero['personal_status'] = df_one_zero_zero['personal_status'].astype(str)
    df_one_zero_zero['age'] = df_one_zero_zero['age'].astype(str)

    df_one_zero_one['personal_status'] = df_one_zero_one['personal_status'].astype(str)
    df_one_zero_one['age'] = df_one_zero_one['age'].astype(str)

    df_one_one_zero['personal_status'] = df_one_one_zero['personal_status'].astype(str)
    df_one_one_zero['age'] = df_one_one_zero['age'].astype(str)

    df_one_one_one['personal_status'] = df_one_one_one['personal_status'].astype(str)
    df_one_one_one['age'] = df_one_one_one['age'].astype(str)

    #print(df_zero_zero_zero.type)
    # Generating samples for each group
    df_zero_zero_zero = generate_samples(zero_zero_zero_to_be_increased, df_zero_zero_zero, 'Germann')
    df_zero_zero_one = generate_samples(zero_zero_one_to_be_increased, df_zero_zero_one, 'Germann')
    df_zero_one_zero = generate_samples(zero_one_zero_to_be_increased, df_zero_one_zero, 'Germann')
    df_zero_one_one = generate_samples(zero_one_one_to_be_increased, df_zero_one_one, 'Germann')
    df_one_zero_zero = generate_samples(one_zero_zero_to_be_increased, df_one_zero_zero, 'Germann')
    df_one_zero_one = generate_samples(one_zero_one_to_be_increased, df_one_zero_one, 'Germann')
    df_one_one_zero = generate_samples(one_one_zero_to_be_increased, df_one_one_zero, 'Germann')
    df_one_one_one = generate_samples(one_one_one_to_be_increased, df_one_one_one, 'Germann')

    # Concatenating dataframes
    X_train_resampled_fair_smote = pd.concat([df_zero_zero_zero, df_zero_zero_one, df_zero_one_zero, df_zero_one_one,
                                              df_one_zero_zero, df_one_zero_one, df_one_one_zero, df_one_one_one])
    X_train_resampled_fair_smote.columns = X_train.columns

    return X_train_resampled_fair_smote



In [151]:
protected_attribute1 = 'personal_status'
protected_attribute2 = 'age'

In [152]:
X_train_resampled_fair_smote = oversample_fair_smote(X_train, group_counts_train, protected_attribute1, protected_attribute2)

Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_one['personal_status'] = df_zero_zero_on

In [153]:
# Evaluate model performance
results_fair_smote = evaluate_model_performance(X_train_resampled_fair_smote, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.914097
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 49  44]
 [ 41 166]]




---



---



---

REWEIGHING - https://github.com/Trusted-AI/AIF360/blob/main/examples/demo_reweighing_preproc.ipynb

In [154]:
!git clone https://github.com/IBM/AIF360.git

fatal: destination path 'AIF360' already exists and is not an empty directory.


In [155]:
# import sys

In [156]:
#from aif360.algorithms.preprocessing.reweighing import Reweighing
new_path = ['/content']
sys.path = new_path
from reweighing_cust import Reweighing
# from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
#         import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

In [157]:
class CustomDataFrame(pd.DataFrame):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.protected_attribute_names = ['personal_status', 'age']
        self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
        self.labels = self['credit'].values.reshape(-1, 1)
        self.favorable_label = 1
        self.unfavorable_label = 0
        self.instance_weights = np.ones(len(self))
        self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
        self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
        self.feature_names = self.columns[self.columns != 'credit'].tolist()

X_train = CustomDataFrame(X_train)

  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()


In [158]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)

RW.fit(X_train)
dataset_transf_train = RW.transform(X_train)

In [159]:
results_reweighing = evaluate_model_performance(X_train, X_test, privileged_groups, unprivileged_groups, weights=X_train.instance_weights)

Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.752571
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 41  23]
 [ 49 187]]




---



---



---

GERRY FAIR

In [160]:
import copy
from aif360.algorithms.inprocessing.gerryfair import heatmap
from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple
from aif360.algorithms.inprocessing.gerryfair.learner import Learner
from aif360.algorithms.inprocessing.gerryfair.auditor import *
from aif360.algorithms.inprocessing.gerryfair.classifier_history import ClassifierHistory
from aif360.algorithms import Transformer

In [161]:
classifier = GerryFairClassifier(C=10, printflag=True, heatmapflag=False, max_iters=100, gamma=0.01, fairness_def='FP')
# NOTE : fairness definition = FP, so it decreases the values of FPR. FN works bad, SP doesn't work
# even though FPR is small, SP is not that close to 1
protected_attributes = ['personal_status', 'age']
binary_ds_train = BinaryLabelDataset(df=X_train, label_names=['credit'],
                                    protected_attribute_names=protected_attributes,
                                    favorable_label=1, unfavorable_label=0)
binary_ds_test =  BinaryLabelDataset(df=X_test, label_names=['credit'],
                                    protected_attribute_names=protected_attributes,
                                    favorable_label=1, unfavorable_label=0)

# Fit the classifier on the dataset
classifier.fit(binary_ds_train, early_termination=False)

# Make predictions on the test set
binary_ds_test_pred = classifier.predict(binary_ds_test)

# Evaluate model performance using evaluate method
results_gerryF = evaluate(binary_ds_test, binary_ds_test_pred, priv_group=privileged_groups, unpriv_group=unprivileged_groups)

iteration: 1, error: 0.22714285714285715, fairness violation: 0.013496598639455785, violated group size: 0.10857142857142857
iteration: 2, error: 0.3292857142857143, fairness violation: 0.00906122448979592, violated group size: 0.10857142857142857
iteration: 3, error: 0.32761904761904764, fairness violation: 0.008553287981859425, violated group size: 0.1657142857142857
iteration: 4, error: 0.3167857142857143, fairness violation: 0.008486394557823133, violated group size: 0.1657142857142857
iteration: 5, error: 0.30742857142857144, fairness violation: 0.008897959183673476, violated group size: 0.1657142857142857
iteration: 6, error: 0.298095238095238, fairness violation: 0.0094421768707483, violated group size: 0.13428571428571429
iteration: 7, error: 0.28938775510204084, fairness violation: 0.009675413022351814, violated group size: 0.1657142857142857
iteration: 8, error: 0.2817857142857143, fairness violation: 0.010090136054421772, violated group size: 0.13428571428571429
iteration: 9



---



---



---

REMEDY

In [162]:
import sys
sys.path.append('/content')  # Add the directory containing remedy_cust.py to the Python path
from remedy_cust import *

In [163]:
#columns_all = ["List of all of the columns in the dataset except y_label"]
columns_all = X_train.drop(columns=['credit'])

#compas_y = "column name of y_label"
label_y = 'credit'

#columns_compas = ["List of all of the protected attributes in the dataset"]
columns_protected = ['personal_status', 'age']

In [164]:
#names = ["Output of get_temp function, all of the attributes for given group"]
#temp2 = ["Output of get_temp function, sum count by group"]
temp2, names = get_temp(X_train, columns_protected, label_y)
print(temp2, names)
#temp2 counts the number of instances for each combination of sensitive attr and label (like my group count)
#names is the list of sensit attr
unfair_group, unfair_names, skew_candidates, unfair_dict = get_unfair_group(columns_protected, [])
#all empty beside skew candidates that is the list of sensitive attr
print(unfair_group, unfair_names, skew_candidates, unfair_dict)
#all_names is a dict that has 0:[], 1:sens attr1, 2:sens attr2, 3:sens attr1, sens attr2. So for each key (number) it
#associates all possible combinations of sens attr
all_names = candidate_groups(skew_candidates, unfair_dict, columns_protected, unfair_names)
#name values is a dict with sens attrib: possible values
names_values = name_val_dict(X_train, names)

all_names_lst = list(all_names.keys())[1:] # CHANGED HERE
all_names_lst.reverse()
#all_names_lst is a list of the keys of the dict all_names, so numbers [3,2,1]
all_names_lst

['personal_status', 'age', 'credit']
   personal_status  age  credit  cnt
0                0    0       0   25
1                0    0       1   34
2                0    1       0   51
3                0    1       1  107
4                1    0       0   18
5                1    0       1   28
6                1    1       0  116
7                1    1       1  321 ['personal_status', 'age']
[] [] ['personal_status', 'age'] {}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


[3, 2, 1]

In [165]:
#get all of the candidate groups possible with the combos and names
filter_count = 30
#copy of the dataset
new_train_data = copy.deepcopy(X_train)
print(new_train_data.shape[0])

#iterate over all the names to get the temp2 df for each name
for a in all_names_lst:
  print("?????/////")
  print(a)
  #temp2 counts the number of instances for each combination of sensitive attr and label (like my group count)
  #names is the list of sensit attr
  temp2, names = get_temp(new_train_data, all_names[a], label_y)
  print(temp2, '\n', names, 'temp2,names \n')
  #temp is a df with the entire columns, where the columns are in the first iteration all sens attr and the label
  # in the second iteration are one sens attrib and the label
  # temp_g counts the instances considering only the sens attrib (first iteration both sens attr, then only one at the time)
  temp, temp_g = get_temp_g(new_train_data, names, label_y)
  print(temp,'\n', temp_g, 'temp,temp_g \n')
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  #lst_of_counts is a list of df, the first df have first sens attr and the credit + count, the second is second sens attr
  #and the credit + count
  lst_of_counts = compute_lst_of_counts(temp, names, label_y)
  print(lst_of_counts, 'listof counts \n')

  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, label_y, lst_of_counts)
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started duplication")
  new_train_data = naive_duplicate(new_train_data, temp2, names, need_pos, need_neg, label_y)
  print(new_train_data.shape[0])
  print("label y ", new_train_data[label_y].value_counts())
#new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [label_y])
new_train_label = new_train_label[label_y]
new_train_label = new_train_label.astype('int')

700
?????/////
3
['personal_status', 'age', 'credit']
   personal_status  age  credit  cnt
0                0    0       0   25
1                0    0       1   34
2                0    1       0   51
3                0    1       1  107
4                1    0       0   18
5                1    0       1   28
6                1    1       0  116
7                1    1       1  321 
 ['personal_status', 'age'] temp2,names 

     personal_status  age  credit  cnt
188                1    0       0    0
292                0    1       1    0
282                1    1       1    0
40                 1    1       1    0
45                 0    1       1    0
..               ...  ...     ...  ...
803                1    1       1    0
252                0    0       0    0
473                1    1       1    0
314                1    1       1    0
458                0    1       1    0

[700 rows x 4 columns] 
    personal_status  age  cnt
0                0    0   59
1                0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
801
label y  credit
1    543
0    258
Name: count, dtype: int64
?????/////
2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


['age', 'credit']
   age  credit  cnt
0    0       0   43
1    0       1   94
2    1       0  215
3    1       1  449 
 ['age'] temp2,names 

     age  credit  cnt
0      0       0    0
1      1       1    0
2      1       1    0
3      1       1    0
4      1       1    0
..   ...     ...  ...
796    1       0    0
797    1       0    0
798    1       0    0
799    1       0    0
800    1       0    0

[801 rows x 3 columns] 
    age  cnt
0    0  137
1    1  664 temp,temp_g 

[credit
0    258
1    543
Name: cnt, dtype: int64] listof counts 

The sets of need pos and neg are
[]
[]
started duplication
801
label y  credit
1    543
0    258
Name: count, dtype: int64
?????/////
1
['personal_status', 'credit']
   personal_status  credit  cnt
0                0       0   76
1                0       1  177
2                1       0  182
3                1       1  366 
 ['personal_status'] temp2,names 

     personal_status  credit  cnt
0                  1       0    0
1                  0 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


In [166]:
X_train_resampled_remedy = new_train_data.drop(columns= ['skewed', 'diff'])

In [167]:
# Evaluate model performance
results_remedy = evaluate_model_performance(X_train_resampled_remedy, X_test, privileged_groups, unprivileged_groups)

Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.759800
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.729333
[[ 40  27]
 [ 50 183]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())




---





---



---

COMPARE

In [171]:
# Define the arrays
arrays = [results_orig, results_notax, results_tax, results_smotenc, results_fair_smote, results_reweighing, results_gerryF, results_remedy]

# Define the headers
headers = ['b_acc', 'acc', 'prec', 'recall', 'f1', 'SP', 'AOD', 'TPR', 'FPR', 'C', 'TNR', 'FNR', 'PPV', 'FDR']

# Transpose the arrays
arrays_transposed = np.transpose(arrays)

print('\t \t none ,wo taxo, w taxo, smonc, fairsm, reweigh, gerryF; remed')
# Print the values with corresponding headers
for header, row in zip(headers, arrays_transposed):
    row_values = ['{:.4f}'.format(value) for value in row]
    print(header +'\t'+ '\t' + '\t'.join(row_values))

print(results_orig[-1])
print(results_notax[-1])
print(results_tax[-1])
print(results_reweighing[-1])
print(results_fair_smote[-1])

	 	 none ,wo taxo, w taxo, smonc, fairsm, reweigh, gerryF; remed
b_acc		0.6817	0.6579	0.6556	0.6421	0.6675	0.6730	0.6619	0.6579
acc		0.7633	0.7567	0.7533	0.7433	0.7167	0.7600	0.7667	0.7433
prec		0.7983	0.7819	0.7810	0.7737	0.8019	0.7924	0.7823	0.7854
recall		0.8857	0.9048	0.9000	0.8952	0.7905	0.8905	0.9238	0.8714
f1		0.8397	0.8389	0.8363	0.8300	0.7962	0.8386	0.8472	0.8262
SP		0.6972	1.1178	1.1766	1.1766	1.3383	1.0231	0.7520	1.1817
AOD		-0.1998	0.1616	0.2171	0.2107	0.2920	0.0807	-0.1670	0.2033
TPR		-0.0414	0.1159	0.1159	0.1232	0.1967	0.1159	-0.0776	0.1594
FPR		-0.3582	0.2073	0.3182	0.2982	0.3873	0.0455	-0.2564	0.2473
C		0.8620	0.8767	0.8687	0.8813	0.8307	0.8693	0.8807	0.8560
TNR		-0.3582	0.2073	0.3182	0.2982	0.3873	0.0455	-0.2564	0.2473
FNR		0.0414	-0.1159	-0.1159	-0.1232	-0.1967	-0.1159	0.0776	-0.1594
PPV		-0.0360	0.1880	0.2212	0.2144	0.2369	0.1299	0.0563	0.1922
FDR		-0.0360	0.1880	0.2212	0.2144	0.2369	0.1299	0.0562	0.1922
{'TP': 186.0, 'FP': 47.0, 'TN': 43.0, 'FN': 24.0}
{'TP': 190.0,

In [172]:
class ModelEvaluator:
    def __init__(self, df, protected_attributes, label_name, privileged, unprivileged, fav, unfav, num_iterations=10, oversampling=None):
        self.df = df
        self.protected_attributes = protected_attributes
        self.label_name = label_name
        self.privileged = privileged
        self.unprivileged = unprivileged
        self.fav = fav
        self.unfav = unfav
        self.num_iterations = num_iterations
        self.oversampling = oversampling

    def evaluate_model_performance_mean(self):
        results_list = []

        for _ in range(self.num_iterations):
            # Prepare data
            data_prep = DataPreparation(self.df, self.protected_attributes, self.label_name,
                                        self.privileged, self.unprivileged, self.fav, self.unfav)
            data_prep.prepare()
            data_prep.df = data_prep.df.reset_index(drop=True)
            X_train, X_test = data_prep.X_train, data_prep.X_test

            cat_features = data_prep.cat_features
            numerical_features = data_prep.numerical_features

            # Count instances from each group in X_train
            group_counts_train = X_train['Group'].value_counts().sort_index()

            # Save the 'Group' column from X_train
            subgroup_column_train = X_train['Group']
            subgroup_column_test = X_test['Group']

            # Drop the 'Group' column from X_train
            X_train = X_train.drop(columns=['Group'])
            X_test = X_test.drop(columns=['Group'])

            total_ratio = (X_train['credit'] == 1).sum() / (X_train['credit'] == 0).sum()

            # Define privileged and unprivileged groups
            # privileged_groups = [{attr: self.favorable_label[i] for i, attr in enumerate(self.protected_attributes)}]
            # unprivileged_groups = [{attr: self.unfavorable_label[i] for i, attr in enumerate(self.protected_attributes)}]
            privileged_groups = [{'personal_status': 1, 'age': 1}]
            unprivileged_groups = [{'personal_status': 0, 'age': 0}]

            if self.oversampling is None:
                results = evaluate_model_performance(X_train, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'custom_smote_notax':
                # Compute distances
                closest_index_gower, closest_values_gower = gower_distance(X_train, cat_features, 5)

                # Oversample
                group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)]
                synthetic_samples_matrix_notax, synthetic_samples_group_notax = oversample_groups(X_train, closest_index_gower,
                                                                                                  cat_features, custom_smote_notax,
                                                                                                  group_pairs, subgroup_column_train)
                # Create resampled matrix
                X_train_resampled_notax = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_notax, columns=X_train.columns)],
                                                    ignore_index=True)
                subgroup_column_resampled_notax = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_notax)],
                                                            ignore_index=True)
                # Evaluate model performance
                results = evaluate_model_performance(X_train_resampled_notax, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'custom_smote_tax':
                # Compute distances
                closest_index_gower, closest_values_gower = gower_distance(X_train, cat_features, 5)

                # Oversample
                group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)]
                synthetic_samples_matrix_tax, synthetic_samples_group_tax = oversample_groups(X_train, closest_index_gower,
                                                                                              cat_features, custom_smote_tax,
                                                                                                  group_pairs, subgroup_column_train)
                # Create resampled matrix
                X_train_resampled_tax = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_tax, columns=X_train.columns)],
                                                    ignore_index=True)
                subgroup_column_resampled_tax = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_tax)],
                                                            ignore_index=True)
                # Evaluate model performance
                results = evaluate_model_performance(X_train_resampled_tax, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'custom_smotenc':
                closest_index_gower = [1,2]
                # Oversample
                group_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)]
                synthetic_samples_matrix_smotenc, synthetic_samples_group_smotenc = oversample_groups(X_train, closest_index_gower,
                                                                                              cat_features, custom_smotenc,
                                                                                                  group_pairs, subgroup_column_train)
                # Create resampled matrix
                X_train_resampled_smotenc = pd.concat([X_train, pd.DataFrame(synthetic_samples_matrix_smotenc, columns=X_train.columns)],
                                                    ignore_index=True)
                subgroup_column_resampled_smotenc = pd.concat([subgroup_column_train, pd.Series(synthetic_samples_group_smotenc)],
                                                            ignore_index=True)
                # Evaluate model performance
                results = evaluate_model_performance(X_train_resampled_smotenc, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'Fair-SMOTE':
                X_train_resampled_fair_smote = oversample_fair_smote(X_train, group_counts_train,
                                                                     protected_attribute1 = 'personal_status',
                                                                     protected_attribute2= 'age')
                # Evaluate model performance
                results = evaluate_model_performance(X_train_resampled_fair_smote, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'Reweighing':
                X_train = CustomDataFrame(X_train)
                RW = Reweighing(unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)
                RW.fit(X_train)
                dataset_transf_train = RW.transform(X_train)

                # Evaluate model performance
                results = evaluate_model_performance(X_train, X_test, privileged_groups, unprivileged_groups, weights=X_train.instance_weights)
                results_list.append(results[:14])

            elif self.oversampling == 'GerryFair':
                classifier = GerryFairClassifier(C=10, printflag=True, heatmapflag=False, max_iters=100, gamma=0.01, fairness_def='FP')
                # NOTE : fairness definition = FP, so it decreases the values of FPR. FN works bad, SP doesn't work
                # even though FPR is small, SP is not that close to 1
                # protected_attributes = ['personal_status', 'age']
                binary_ds_train = BinaryLabelDataset(df=X_train, label_names=['credit'],
                                                    protected_attribute_names=protected_attributes,
                                                    favorable_label=1, unfavorable_label=0)
                binary_ds_test =  BinaryLabelDataset(df=X_test, label_names=['credit'],
                                                    protected_attribute_names=protected_attributes,
                                                    favorable_label=1, unfavorable_label=0)
                # Fit the classifier on the dataset
                classifier.fit(binary_ds_train, early_termination=True)
                binary_ds_test_pred = classifier.predict(binary_ds_test)

                # Evaluate model performance using evaluate method
                results = evaluate(binary_ds_test, binary_ds_test_pred, priv_group=privileged_groups, unpriv_group=unprivileged_groups)
                results_list.append(results[:14])

            elif self.oversampling == 'Remedy':
                columns_all = X_train.drop(columns=['credit']) #["List of all of the columns in the dataset except y_label"]
                label_y = 'credit' #"column name of y_label"
                columns_protected = ['personal_status', 'age'] #["List of all of the protected attributes in the dataset"]
                temp2, names = get_temp(X_train, columns_protected, label_y)
                unfair_group, unfair_names, skew_candidates, unfair_dict = get_unfair_group(columns_protected, [])
                print(unfair_group, unfair_names, skew_candidates, unfair_dict)
                all_names = candidate_groups(skew_candidates, unfair_dict, columns_protected, unfair_names)
                names_values = name_val_dict(X_train, names)

                all_names_lst = list(all_names.keys())[1:] # CHANGED HERE
                all_names_lst.reverse()
                #get all of the candidate groups possible with the combos and names
                filter_count = 30
                new_train_data = copy.deepcopy(X_train)
                #iterate over all the names to get the temp2 df for each name
                for a in all_names_lst:
                  temp2, names = get_temp(new_train_data, all_names[a], label_y)
                  temp, temp_g = get_temp_g(new_train_data, names, label_y)
                  temp_g = temp_g[temp_g['cnt'] > filter_count]
                  lst_of_counts = compute_lst_of_counts(temp, names, label_y)
                  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, label_y, lst_of_counts)
                  new_train_data['skewed'] = 0
                  new_train_data["diff"] = 0
                  new_train_data = naive_duplicate(new_train_data, temp2, names, need_pos, need_neg, label_y)
                #new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
                new_train_label = pd.DataFrame(new_train_data, columns = [label_y])
                new_train_label = new_train_label[label_y]
                new_train_label = new_train_label.astype('int')

                X_train_resampled_remedy = new_train_data.drop(columns= ['skewed', 'diff'])

                results = evaluate_model_performance(X_train_resampled_remedy, X_test, privileged_groups, unprivileged_groups)
                results_list.append(results[:14])

        # Compute the mean of the evaluation results across iterations
        mean_results = np.mean(results_list, axis=0)

        return mean_results

In [173]:
model_evaluator_orig = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10)
results_orig_mean = model_evaluator_orig.evaluate_model_performance_mean()

model_evaluator_notax = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'custom_smote_notax')
results_notax_mean = model_evaluator_notax.evaluate_model_performance_mean()

model_evaluator_tax = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'custom_smote_tax')
results_tax_mean = model_evaluator_tax.evaluate_model_performance_mean()

model_evaluator_smotenc = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'custom_smotenc')
results_smotenc_mean = model_evaluator_smotenc.evaluate_model_performance_mean()

model_evaluator_fair_smote = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'Fair-SMOTE')
results_fair_smote_mean = model_evaluator_fair_smote.evaluate_model_performance_mean()

model_evaluator_reweighing = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'Reweighing')
results_reweighing_mean = model_evaluator_reweighing.evaluate_model_performance_mean()

model_evaluator_gerryfair = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'GerryFair')
results_gerryfair_mean = model_evaluator_gerryfair.evaluate_model_performance_mean()

model_evaluator_remedy = ModelEvaluator(df, ['personal_status', 'age'], 'credit', [1, 1], [0, 0], 1,2, num_iterations=10, oversampling = 'Remedy')
results_remedy_mean = model_evaluator_remedy.evaluate_model_performance_mean()

No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.742571
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.752667
[[ 43  25]
 [ 47 185]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.761714
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.730667
[[ 46  28]
 [ 44 182]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.757143
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.727333
[[ 48  24]
 [ 42 186]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.745143
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.744000
[[ 45  19]
 [ 45 191]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.754857
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.740667
[[ 39  22]
 [ 51 188]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.745714
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.757333
[[ 44  19]
 [ 46 191]]
No missing values detected.
The 'credit' column has only two unique values.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.751143
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.748667
[[ 41  27]
 [ 49 183]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.747714
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.752000
[[ 47  25]
 [ 43 185]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.745143
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.756000
[[ 46  24]
 [ 44 186]]
No missing values detected.
The 'credit

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())


Individual fairness metric (consistency) = 0.753714
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.730000
[[ 37  11]
 [ 53 199]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


{779: 0, 698: 1, 145: 2, 766: 3, 938: 4, 156: 5, 477: 6, 78: 7, 581: 8, 582: 9, 772: 10, 663: 11, 18: 12, 656: 13, 457: 14, 246: 15, 62: 16, 447: 17, 357: 18, 973: 19, 748: 20, 117: 21, 970: 22, 943: 23, 103: 24, 200: 25, 398: 26, 109: 27, 300: 28, 829: 29, 95: 30, 215: 31, 443: 32, 903: 33, 65: 34, 769: 35, 563: 36, 158: 37, 754: 38, 868: 39, 837: 40, 342: 41, 17: 42, 176: 43, 666: 44, 811: 45, 417: 46, 265: 47, 533: 48, 163: 49, 23: 50, 958: 51, 578: 52, 281: 53, 847: 54, 80: 55, 626: 56, 472: 57, 175: 58, 450: 59, 900: 60, 639: 61, 48: 62, 442: 63, 15: 64, 920: 65, 818: 66, 266: 67, 124: 68, 814: 69, 904: 70, 968: 71, 471: 72, 548: 73, 756: 74, 848: 75, 229: 76, 244: 77, 653: 78, 264: 79, 172: 80, 42: 81, 369: 82, 352: 83, 324: 84, 89: 85, 874: 86, 969: 87, 676: 88, 910: 89, 937: 90, 516: 91, 51: 92, 940: 93, 546: 94, 346: 95, 589: 96, 678: 97, 880: 98, 556: 99, 794: 100, 134: 101, 344: 102, 26: 103, 298: 104, 886: 105, 88: 106, 834: 107, 268: 108, 608: 109, 978: 110, 602: 111, 207:

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{149: 0, 428: 1, 143: 2, 547: 3, 404: 4, 395: 5, 715: 6, 830: 7, 926: 8, 422: 9, 243: 10, 935: 11, 461: 12, 234: 13, 880: 14, 356: 15, 811: 16, 941: 17, 499: 18, 388: 19, 429: 20, 864: 21, 167: 22, 164: 23, 959: 24, 983: 25, 274: 26, 452: 27, 779: 28, 602: 29, 691: 30, 757: 31, 637: 32, 873: 33, 326: 34, 313: 35, 835: 36, 590: 37, 288: 38, 670: 39, 394: 40, 660: 41, 447: 42, 796: 43, 718: 44, 71: 45, 458: 46, 66: 47, 125: 48, 211: 49, 170: 50, 829: 51, 990: 52, 120: 53, 430: 54, 384: 55, 489: 56, 349: 57, 530: 58, 686: 59, 707: 60, 28: 61, 78: 62, 882: 63, 869: 64, 744: 65, 789: 66, 305: 67, 728: 68, 542: 69, 666: 70, 774: 71, 180: 72, 182: 73, 490: 74, 250: 75, 943: 76, 816: 77, 402: 78, 992: 79, 405: 80, 183: 81, 862: 82, 891: 83, 652: 84, 215: 85, 314: 86, 697: 87, 75: 88, 929: 89,

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.763566
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.750000
[[ 34  13]
 [ 56 197]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{345: 0, 658: 1, 428: 2, 272: 3, 3: 4, 720: 5, 390: 6, 455: 7, 844: 8, 260: 9, 865: 10, 283: 11, 453: 12, 221: 13, 945: 14, 326: 15, 683: 16, 935: 17, 854: 18, 647: 19, 71: 20, 645: 21, 411: 22, 156: 23, 186: 24, 232: 25, 376: 26, 286: 27, 793: 28, 569: 29, 809: 30, 109: 31, 923: 32, 237: 33, 714: 34, 329: 35, 783: 36, 693: 37, 433: 38, 398: 39, 26: 40, 862: 41, 276: 42, 957: 43, 61: 44, 115: 45, 152: 46, 743: 47, 107: 48, 971: 49, 497: 50, 265: 51, 776: 52, 117: 53, 678: 54, 670: 55, 694: 56, 212: 57, 988: 58, 72: 59, 712: 60

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{96: 0, 557: 1, 824: 2, 791: 3, 771: 4, 203: 5, 927: 6, 891: 7, 893: 8, 990: 9, 376: 10, 996: 11, 941: 12, 378: 13, 36: 14, 574: 15, 543: 16, 946: 17, 496: 18, 320: 19, 390: 20, 803: 21, 225: 22, 565: 23, 768: 24, 541: 25, 624: 26, 724: 27, 854: 28, 143: 29, 454: 30, 43: 31, 256: 32, 88: 33, 892: 34, 265: 35, 602: 36, 915: 37, 542: 38, 140: 39, 857: 40, 794: 41, 155: 42, 632: 43, 889: 44, 179: 45, 871: 46, 460: 47, 76: 48, 657: 49, 845: 50, 741: 51, 373: 52, 667: 53, 469: 54, 669: 55, 56: 56, 9: 57, 375: 58, 864: 59, 316: 60, 439: 61, 733: 62, 521: 63, 842: 64, 869: 65, 266: 66, 286: 67, 813: 68, 438: 69, 91: 70, 965: 71, 811: 72, 87: 73, 653: 74, 426: 75, 500: 76, 311: 77, 67: 78, 334: 79, 776: 80, 134: 81, 808: 82, 220: 83, 431: 84, 988: 85, 954: 86, 480: 87, 199: 88, 677: 89, 481: 

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{152: 0, 806: 1, 141: 2, 675: 3, 237: 4, 855: 5, 50: 6, 868: 7, 904: 8, 707: 9, 22: 10, 49: 11, 908: 12, 563: 13, 60: 14, 260: 15, 418: 16, 578: 17, 959: 18, 473: 19, 799: 20, 350: 21, 930: 22, 643: 23, 540: 24, 24: 25, 648: 26, 2: 27, 888: 28, 27: 29, 715: 30, 41: 31, 554: 32, 609: 33, 90: 34, 607: 35, 485: 36, 252: 37, 765: 38, 367: 39, 972: 40, 827: 41, 499: 42, 604: 43, 564: 44, 321: 45, 853: 46, 785: 47, 525: 48, 764: 49, 782: 50, 329: 51, 382: 52, 662: 53, 12: 54, 679: 55, 796: 56, 140: 57, 333: 58, 343: 59, 979: 60, 332: 61, 309: 62, 58: 63, 369: 64, 918: 65, 72: 66, 694: 67, 518: 68, 210: 69, 356: 70, 745: 71, 845: 72, 632: 73, 328: 74, 866: 75, 292: 76, 451: 77, 374: 78, 249: 79, 407: 80, 207: 81, 193: 82, 597: 83, 945: 84, 261: 85, 48: 86, 64: 87, 334: 88, 455: 89, 477: 90, 

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{963: 0, 525: 1, 999: 2, 410: 3, 757: 4, 478: 5, 432: 6, 733: 7, 255: 8, 22: 9, 166: 10, 646: 11, 386: 12, 904: 13, 103: 14, 687: 15, 686: 16, 115: 17, 216: 18, 227: 19, 365: 20, 282: 21, 629: 22, 415: 23, 370: 24, 983: 25, 706: 26, 207: 27, 425: 28, 939: 29, 548: 30, 671: 31, 919: 32, 515: 33, 833: 34, 656: 35, 284: 36, 265: 37, 517: 38, 159: 39, 801: 40, 100: 41, 990: 42, 43: 43, 520: 44, 729: 45, 393: 46, 433: 47, 447: 48, 333: 49, 406: 50, 218: 51, 176: 52, 717: 53, 140: 54, 773: 55, 485: 56, 213: 57, 739: 58, 87: 59, 677: 60, 994: 61, 587: 62, 630: 63, 439: 64, 575: 65, 346: 66, 357: 67, 450: 68, 44: 69, 735: 70, 845: 71, 476: 72, 110: 73, 181: 74, 220: 75, 700: 76, 523: 77, 61: 78, 118: 79, 578: 80, 177: 81, 70: 82, 645: 83, 470: 84, 615: 85, 407: 86, 655: 87, 318: 88, 694: 89, 

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{851: 0, 496: 1, 318: 2, 299: 3, 752: 4, 633: 5, 103: 6, 866: 7, 371: 8, 331: 9, 153: 10, 152: 11, 608: 12, 875: 13, 682: 14, 433: 15, 139: 16, 268: 17, 903: 18, 176: 19, 263: 20, 570: 21, 229: 22, 551: 23, 19: 24, 531: 25, 347: 26, 824: 27, 620: 28, 579: 29, 792: 30, 796: 31, 904: 32, 465: 33, 845: 34, 609: 35, 109: 36, 138: 37, 540: 38, 233: 39, 71: 40, 115: 41, 565: 42, 827: 43, 269: 44, 190: 45, 57: 46, 820: 47, 766: 48, 72: 49, 736: 50, 30: 51, 623: 52, 84: 53, 68: 54, 778: 55, 187: 56, 578: 57, 270: 58, 479: 59, 136: 60, 672: 61, 195: 62, 688: 63, 850: 64, 412: 65, 220: 66, 512: 67, 75: 68, 275: 69, 811: 70, 893: 71, 576: 72, 822: 73, 708: 74, 837: 75, 529: 76, 965: 77, 737: 78, 709: 79, 987: 80, 11: 81, 189: 82, 192: 83, 480: 84, 900: 85, 146: 86, 253: 87, 502: 88, 255: 89, 761

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{969: 0, 353: 1, 572: 2, 207: 3, 890: 4, 940: 5, 16: 6, 646: 7, 299: 8, 938: 9, 976: 10, 913: 11, 289: 12, 781: 13, 540: 14, 752: 15, 334: 16, 876: 17, 457: 18, 536: 19, 439: 20, 232: 21, 595: 22, 297: 23, 893: 24, 733: 25, 659: 26, 383: 27, 188: 28, 869: 29, 335: 30, 889: 31, 639: 32, 631: 33, 968: 34, 622: 35, 395: 36, 527: 37, 775: 38, 945: 39, 643: 40, 426: 41, 428: 42, 902: 43, 484: 44, 895: 45, 642: 46, 724: 47, 307: 48, 35: 49, 296: 50, 822: 51, 748: 52, 26: 53, 942: 54, 150: 55, 359: 56, 500: 57, 101: 58, 850: 59, 977: 60, 8: 61, 660: 62, 915: 63, 554: 64, 786: 65, 158: 66, 773: 67, 474: 68, 737: 69, 309: 70, 946: 71, 193: 72, 887: 73, 939: 74, 157: 75, 882: 76, 394: 77, 198: 78, 21: 79, 925: 80, 22: 81, 549: 82, 955: 83, 480: 84, 449: 85, 45: 86, 552: 87, 791: 88, 605: 89, 73

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{917: 0, 499: 1, 285: 2, 649: 3, 949: 4, 121: 5, 983: 6, 209: 7, 140: 8, 389: 9, 636: 10, 173: 11, 30: 12, 581: 13, 101: 14, 454: 15, 423: 16, 718: 17, 972: 18, 792: 19, 236: 20, 514: 21, 937: 22, 778: 23, 94: 24, 455: 25, 911: 26, 816: 27, 184: 28, 296: 29, 125: 30, 474: 31, 803: 32, 733: 33, 575: 34, 502: 35, 322: 36, 113: 37, 224: 38, 313: 39, 843: 40, 851: 41, 232: 42, 522: 43, 807: 44, 841: 45, 651: 46, 849: 47, 162: 48, 795: 49, 403: 50, 82: 51, 618: 52, 677: 53, 130: 54, 318: 55, 820: 56, 132: 57, 105: 58, 50: 59, 648: 60, 783: 61, 825: 62, 273: 63, 214: 64, 409: 65, 776: 66, 996: 67, 914: 68, 21: 69, 640: 70, 468: 71, 295: 72, 492: 73, 191: 74, 250: 75, 72: 76, 906: 77, 238: 78, 992: 79, 45: 80, 899: 81, 595: 82, 407: 83, 309: 84, 920: 85, 307: 86, 747: 87, 788: 88, 452: 89, 9

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.764083
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.732667
[[ 38  23]
 [ 52 187]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{380: 0, 452: 1, 555: 2, 660: 3, 58: 4, 368: 5, 657: 6, 414: 7, 627: 8, 607: 9, 232: 10, 327: 11, 292: 12, 227: 13, 403: 14, 905: 15, 654: 16, 760: 17, 567: 18, 162: 19, 94: 20, 865: 21, 358: 22, 73: 23, 294: 24, 593: 25, 214: 26, 287: 27, 465: 28, 268: 29, 897: 30, 286: 31, 318: 32, 672: 33, 822: 34, 856: 35, 708: 36, 192: 37, 52: 38, 430: 39, 104: 40, 263: 41, 926: 42, 622: 43, 436: 44, 989: 45, 26: 46, 356: 47, 619: 48, 398: 49, 803: 50, 587: 51, 397: 52, 478: 53, 189: 54, 944: 55, 150: 56, 709: 57, 448: 58, 886: 59, 543: 6

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{344: 0, 516: 1, 14: 2, 694: 3, 858: 4, 477: 5, 909: 6, 163: 7, 110: 8, 399: 9, 797: 10, 9: 11, 644: 12, 594: 13, 200: 14, 124: 15, 470: 16, 79: 17, 755: 18, 731: 19, 960: 20, 219: 21, 842: 22, 277: 23, 448: 24, 315: 25, 528: 26, 502: 27, 856: 28, 600: 29, 342: 30, 155: 31, 267: 32, 231: 33, 569: 34, 119: 35, 58: 36, 187: 37, 580: 38, 833: 39, 264: 40, 658: 41, 903: 42, 828: 43, 565: 44, 711: 45, 303: 46, 633: 47, 947: 48, 650: 49, 587: 50, 182: 51, 380: 52, 882: 53, 249: 54, 90: 55, 262: 56, 636: 57, 844: 58, 150: 59, 748: 60, 215: 61, 948: 62, 588: 63, 945: 64, 16: 65, 82: 66, 765: 67, 736: 68, 243: 69, 274: 70, 294: 71, 415: 72, 834: 73, 98: 74, 383: 75, 208: 76, 487: 77, 895: 78, 614: 79, 499: 80, 891: 81, 615: 82, 767: 83, 404: 84, 81: 85, 703: 86, 45: 87, 923: 88, 829: 89, 527: 

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 39  22]
 [ 51 188]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{756: 0, 502: 1, 869: 2, 698: 3, 935: 4, 428: 5, 531: 6, 651: 7, 655: 8, 865: 9, 298: 10, 188: 11, 784: 12, 702: 13, 327: 14, 90: 15, 838: 16, 712: 17, 198: 18, 503: 19, 481: 20, 888: 21, 757: 22, 561: 23, 507: 24, 621: 25, 284: 26, 613: 27, 311: 28, 684: 29, 341: 30, 695: 31, 795: 32, 145: 33, 942: 34, 235: 35, 720: 36, 995: 37, 877: 38, 366: 39, 673: 40, 370: 41, 247: 42, 619: 43, 381: 44, 138: 45, 763: 46, 661: 47, 836: 48, 745: 49, 423: 50, 859: 51, 69: 52, 375: 53, 96: 54, 573: 55, 595: 56, 358: 57, 686: 58, 762: 59, 753: 60, 348: 61, 585: 62, 569: 63, 434: 64, 352: 65, 567: 66, 442: 67, 868: 68, 574: 69, 229: 70, 679: 71, 732: 72, 890: 73, 823: 74, 389: 75, 580: 76, 827: 77, 985: 78, 556: 79, 500: 80, 37: 81, 984: 82, 337: 83, 448: 84, 373: 85, 499: 86, 96

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())



No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{454: 0, 581: 1, 33: 2, 646: 3, 67: 4, 173: 5, 416: 6, 442: 7, 174: 8, 91: 9, 701: 10, 744: 11, 481: 12, 666: 13, 822: 14, 317: 15, 359: 16, 370: 17, 882: 18, 730: 19, 696: 20, 420: 21, 563: 22, 627: 23, 118: 24, 983: 25, 756: 26, 476: 27, 216: 28, 11: 29, 492: 30, 562: 31, 543: 32, 241: 33, 742: 34, 243: 35, 376: 36, 870: 37, 619: 38, 758: 39, 601: 40, 909: 41, 455: 42, 80: 43, 766: 44, 399: 45, 474: 46, 648: 47, 237: 48, 473: 49, 574: 50, 815: 51, 451: 52, 732: 53, 552: 54, 933: 55, 615: 56, 393: 57, 190: 58, 886: 59, 879: 60, 676: 61, 785: 62, 554: 63, 371: 64, 537: 65, 637: 66, 635: 67, 257: 68, 776: 69, 952: 70, 228: 71, 291: 72, 360: 73, 353: 74, 662: 75, 579: 76, 335: 77, 765: 78, 330: 79, 352: 80, 366: 81, 778: 82, 689: 83, 598: 84, 942: 85, 194: 86, 211: 87, 187: 88, 568: 89

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 38  24]
 [ 52 186]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{894: 0, 199: 1, 300: 2, 146: 3, 676: 4, 331: 5, 31: 6, 51: 7, 598: 8, 416: 9, 415: 10, 381: 11, 19: 12, 611: 13, 973: 14, 196: 15, 654: 16, 652: 17, 538: 18, 81: 19, 29: 20, 290: 21, 599: 22, 342: 23, 982: 24, 363: 25, 481: 26, 720: 27, 734: 28, 801: 29, 738: 30, 790: 31, 721: 32, 816: 33, 910: 34, 735: 35, 366: 36, 617: 37, 855: 38, 459: 39, 942: 40, 461: 41, 829: 42, 698: 43, 514: 44, 788: 45, 74: 46, 887: 47, 609: 48, 59: 49, 641: 50, 417: 51, 433: 52, 884: 53, 665: 54, 36: 55, 947: 56, 79: 57, 760: 58, 974: 59, 766: 60, 567: 61, 990: 62, 638: 63, 107: 64, 264: 65, 922: 66, 941: 67, 836: 68, 885: 69, 858: 70, 777: 71, 954: 72, 623: 73, 241: 74, 255: 75, 436: 76, 503: 77, 865: 78, 7: 79, 555: 80, 357: 81, 998: 82, 696: 83, 496: 84, 159: 85, 627: 86, 312: 87, 

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 39  19]
 [ 51 191]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{713: 0, 57: 1, 175: 2, 753: 3, 778: 4, 931: 5, 678: 6, 720: 7, 952: 8, 399: 9, 604: 10, 868: 11, 97: 12, 239: 13, 118: 14, 764: 15, 851: 16, 742: 17, 971: 18, 594: 19, 108: 20, 921: 21, 44: 22, 895: 23, 279: 24, 893: 25, 890: 26, 696: 27, 639: 28, 32: 29, 672: 30, 156: 31, 549: 32, 928: 33, 361: 34, 591: 35, 230: 36, 86: 37, 695: 38, 900: 39, 298: 40, 659: 41, 330: 42, 750: 43, 56: 44, 734: 45, 842: 46, 874: 47, 843: 48, 269: 49, 424: 50, 920: 51, 872: 52, 685: 53, 232: 54, 54: 55, 589: 56, 50: 57, 7: 58, 259: 59, 415: 60, 561: 61, 323: 62, 655: 63, 145: 64, 350: 65, 565: 66, 875: 67, 524: 68, 472: 69, 620: 70, 687: 71, 799: 72, 974: 73, 829: 74, 382: 75, 117: 76, 543: 77, 765: 78, 398: 79, 793: 80, 240: 81, 857: 82, 174: 83, 0: 84, 646: 85, 76: 86, 445: 87, 81

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 38  21]
 [ 52 189]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{13: 0, 728: 1, 191: 2, 195: 3, 594: 4, 755: 5, 412: 6, 500: 7, 786: 8, 194: 9, 536: 10, 974: 11, 101: 12, 932: 13, 317: 14, 121: 15, 644: 16, 859: 17, 849: 18, 192: 19, 61: 20, 770: 21, 779: 22, 631: 23, 222: 24, 973: 25, 785: 26, 270: 27, 216: 28, 168: 29, 640: 30, 49: 31, 703: 32, 577: 33, 885: 34, 527: 35, 614: 36, 7: 37, 505: 38, 800: 39, 817: 40, 74: 41, 267: 42, 599: 43, 387: 44, 471: 45, 19: 46, 766: 47, 821: 48, 445: 49, 600: 50, 287: 51, 477: 52, 699: 53, 911: 54, 448: 55, 561: 56, 321: 57, 580: 58, 389: 59, 904: 60, 686: 61, 550: 62, 483: 63, 740: 64, 296: 65, 345: 66, 630: 67, 260: 68, 469: 69, 962: 70, 702: 71, 353: 72, 708: 73, 835: 74, 982: 75, 189: 76, 380: 77, 902: 78, 16: 79, 559: 80, 847: 81, 46: 82, 494: 83, 395: 84, 261: 85, 733: 86, 502: 87

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 42  22]
 [ 48 188]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{403: 0, 203: 1, 208: 2, 568: 3, 902: 4, 5: 5, 620: 6, 84: 7, 805: 8, 204: 9, 740: 10, 890: 11, 29: 12, 971: 13, 625: 14, 457: 15, 312: 16, 732: 17, 206: 18, 241: 19, 822: 20, 730: 21, 336: 22, 517: 23, 296: 24, 944: 25, 636: 26, 578: 27, 40: 28, 773: 29, 812: 30, 803: 31, 852: 32, 662: 33, 447: 34, 353: 35, 899: 36, 748: 37, 43: 38, 103: 39, 806: 40, 558: 41, 339: 42, 774: 43, 932: 44, 820: 45, 81: 46, 370: 47, 298: 48, 235: 49, 911: 50, 51: 51, 950: 52, 580: 53, 163: 54, 684: 55, 835: 56, 680: 57, 367: 58, 251: 59, 785: 60, 900: 61, 859: 62, 365: 63, 564: 64, 672: 65, 27: 66, 398: 67, 769: 68, 801: 69, 254: 70, 940: 71, 63: 72, 807: 73, 782: 74, 501: 75, 95: 76, 571: 77, 80: 78, 700: 79, 227: 80, 191: 81, 606: 82, 994: 83, 863: 84, 603: 85, 591: 86, 754: 87, 2

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{710: 0, 850: 1, 474: 2, 769: 3, 156: 4, 846: 5, 519: 6, 830: 7, 661: 8, 61: 9, 473: 10, 81: 11, 801: 12, 477: 13, 408: 14, 885: 15, 265: 16, 630: 17, 313: 18, 135: 19, 31: 20, 500: 21, 585: 22, 421: 23, 283: 24, 322: 25, 524: 26, 905: 27, 233: 28, 154: 29, 171: 30, 521: 31, 635: 32, 237: 33, 251: 34, 932: 35, 647: 36, 851: 37, 744: 38, 76: 39, 894: 40, 663: 41, 816: 42, 371: 43, 502: 44, 869: 45, 66: 46, 565: 47, 358: 48, 605: 49, 776: 50, 292: 51, 817: 52, 999: 53, 482: 54, 124: 55, 824: 56, 948: 57, 903: 58, 388: 59, 766: 60, 99: 61, 299: 62, 871: 63, 270: 64, 17: 65, 148: 66, 106: 67, 743: 68, 331: 69, 190: 70, 437: 71, 775: 72, 36: 73, 448: 74, 50: 75, 698: 76, 829: 77, 586: 78, 41: 79, 504: 80, 2: 81, 393: 82, 130: 83, 411: 84, 200: 85, 789: 86, 515: 87, 528: 88, 674: 89, 522: 9

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 33  19]
 [ 57 191]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{709: 0, 137: 1, 99: 2, 70: 3, 724: 4, 190: 5, 717: 6, 293: 7, 80: 8, 113: 9, 227: 10, 310: 11, 671: 12, 370: 13, 646: 14, 783: 15, 493: 16, 872: 17, 757: 18, 48: 19, 115: 20, 748: 21, 327: 22, 109: 23, 544: 24, 710: 25, 878: 26, 688: 27, 72: 28, 902: 29, 811: 30, 198: 31, 672: 32, 315: 33, 309: 34, 53: 35, 758: 36, 560: 37, 699: 38, 194: 39, 729: 40, 369: 41, 132: 42, 247: 43, 329: 44, 331: 45, 447: 46, 459: 47, 804: 48, 490: 49, 889: 50, 381: 51, 838: 52, 746: 53, 979: 54, 966: 55, 56: 56, 751: 57, 257: 58, 430: 59, 880: 60, 840: 61, 903: 62, 167: 63, 857: 64, 425: 65, 118: 66, 210: 67, 359: 68, 996: 69, 815: 70, 62: 71, 73: 72, 300: 73, 337: 74, 485: 75, 715: 76, 696: 77, 987: 78, 216: 79, 107: 80, 21: 81, 263: 82, 829: 83, 592: 84, 352: 85, 487: 86, 764: 87,

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) (test) = 0.723333
[[ 44  32]
 [ 46 178]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
{290: 0, 265: 1, 258: 2, 394: 3, 88: 4, 369: 5, 570: 6, 560: 7, 282: 8, 3: 9, 68: 10, 430: 11, 188: 12, 185: 13, 177: 14, 248: 15, 223: 16, 58: 17, 596: 18, 772: 19, 112: 20, 759: 21, 609: 22, 252: 23, 121: 24, 728: 25, 132: 26, 694: 27, 575: 28, 977: 29, 142: 30, 699: 31, 840: 32, 410: 33, 812: 34, 663: 35, 110: 36, 204: 37, 250: 38, 227: 39, 632: 40, 495: 41, 14: 42, 498: 43, 205: 44, 828: 45, 700: 46, 303: 47, 197: 48, 435: 49, 411: 50, 653: 51, 259: 52, 341: 53, 745: 54, 964: 55, 965: 56, 47: 57, 33: 58, 975: 59, 202: 60, 881: 61, 344: 62, 57: 63, 4: 64, 154: 65, 959: 66, 274: 67, 751: 68, 291: 69, 517: 70, 797: 71, 816: 72, 254: 73, 590: 74, 183: 75, 937: 76, 521: 77, 726: 78, 519: 79, 340: 80, 313:

  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
87 indice A
109 indice B
67 indice A
26 indice B
86 indice A
67 indice B
38 indice A
108 indice B
108 indice A
13 indice B
112 indice A
46 indice B
32 indice A
84 indice B
105 indice A
95 indice B
99 indice A
73 indice B
68 indice A
36 indice B
3 indice A
39 indice B




77 indice A
13 indice B
38 indice A
73 indice B
15 indice A
4 indice B
34 indice A
14 indice B
7 indice A
20 indice B
49 indice A
32 indice B
80 indice A
28 indice B
92 indice A
22 indice B
72 indice A
46 indice B
91 indice A
16 indice B
65 indice A
4 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
2 indice A
9 indice B
16 indice A
26 indice B
8 indice A
21 indice B
0 indice A
5 indice B
7 indice A
3 indice B
15



 indice A
6 indice B
16 indice A
26 indice B
22 indice A
0 indice B
27 indice A
9 indice B
2 indice A
9 indice B
21 indice A
1 indice B
27 indice A
9 indice B
4 indice A
10 indice B
20 indice A
16 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
81 indice A
49 indice B
74 indice A
53 indice B
44 indice A
0 indice B
26 indice A
89 indice B
39 indice A
80



 indice B
59 indice A
100 indice B
90 indice A
11 indice B
74 indice A
53 indice B
58 indice A
91 indice B
2 indice A
100 indice B
5 indice A
24 indice B
6 indice A
53 indice B
63 indice A
38 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
31 indice A
13 indice B
19 indice A
21 indice B
9 indice A
15 indice B
18 indice A
4 indice B
14 indice A
24 indice B
30 indice A
3 indice B
29 indice A
10 indice B
3 indice A




32 indice B
24 indice A
32 indice B
21 indice A
13 indice B
30 indice A
24 indice B
12 indice A
23 indice B
8 indice A
6 indice B
14 indice A
6 indice B
10 indice A
28 indice B
0 indice A
14 indice B
4 indice A
3 indice B
19 indice A
24 indice B
21 indice A
29 indice B
30 indice A
32 indice B
1 indice A
32 indice B
4 indice A
29 indice B
22 indice A
9 indice B
12 indice A
7 indice B
14 indice A
24 indice B


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.774419
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738667
[[ 42  30]
 [ 48 180]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
97 indice A
5 indice B
3 indice A
92 indice B
0 indice A
19 indice B
102 indice A
99 indice B
109 indice A
68 indice B
104 indice A
107 indice B
63 indice A
94 indice B
16 indice A
41 indice B
111 indice A
51 indice B
54 indice A
48 indice B
102 indice A
13 indice B
66 indice A
24 indice B
86 indice A
80 indice B
114 indic



4 indice A
78 indice B
1 indice A
114 indice B
54 indice A
75 indice B
85 indice A
103 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
12 indice A
3 indice B
13 indice A
17 indice B
4 indice A
12 indice B
9 indice A
17 indice B
9 indice A
17 indice B
1 indice A
17 indice B
13 indice A
17 indice B
0 indice A
3 indice B
6 indice A
23 indice B
25 indice A
1 indice B
19 indice A
23



 indice B
20 indice A
11 indice B
5 indice A
23 indice B
20 indice A
12 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
51 indice A
15 indice B
40 indice A
30 indice B
4 indice A
43 indice B
40 indice A
86 indice B
30 indice A
61 indice B
40 indice A
30 indice B
22 indice A
49 indice B
48 indice A
9 indice B
36 indice A
84 indice B
35 indice A
57 indice B
98 indice A
46 indice B
100 indice A
36 indice B
11 indice A
15 indice B




Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
11 indice A
4 indice B
28 indice A
5 indice B
5 indice A
20 indice B
22 indice A
6 indice B
0 indice A
1 indice B
26 indice A
2 indice B
28 indice A
9 indice B
4 indice A
2 indice B
18 indice A
2 indice B
18 indice A
2 indice B
13 indice A
18 indice B
22 indice A
12 indice B
17 indice A
5 indice B
15 indice A
12 indice B
15 indice A
14 indice B
33 indice A
2 indice B
1 indice A
27 indice B




30 indice A
5 indice B
5 indice A
7 indice B
8 indice A
2 indice B
20 indice A
5 indice B
22 indice A
6 indice B
32 indice A
9 indice B
14 indice A
28 indice B
14 indice A
13 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.749871
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.763333
[[ 41  14]
 [ 49 196]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
12 indice A
24 indice B
70 indice A
24 indice B
0 indice A
20 indice B
8 indice A
113 indice B
101 indice A
27 indice B
63 indice A
105 indice B
106 indice A
52 indice B
82 indice A
21 indice B
76 indice A
23 indice B
111 indice A
96 indice B




114 indice A
54 indice B
68 indice A
50 indice B
38 indice A
2 indice B
72 indice A
69 indice B
81 indice A
76 indice B
9 indice A
29 indice B
89 indice A
72 indice B
68 indice A
20 indice B
30 indice A
97 indice B
10 indice A
114 indice B
89 indice A
79 indice B
30 indice A
20 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
16 indice A
21 indice B
16 indice A
2 indice B
7 indice A
16 indice B
26 indice A
20 indice B




27 indice A
0 indice B
23 indice A
16 indice B
7 indice A
16 indice B
17 indice A
13 indice B
3 indice A
19 indice B
7 indice A
16 indice B
5 indice A
4 indice B
7 indice A
17 indice B
18 indice A
13 indice B
0 indice A
16 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
63 indice A
14 indice B
59 indice A
21 indice B
54 indice A
49 indice B
77 indice A
38 indice B
2 indice A
98 indice B
2 indice A
104 indice B
69 indice A
36 indice B
74 indice A
58 



indice B
36 indice A
67 indice B
96 indice A
56 indice B
104 indice A
65 indice B
77 indice A
47 indice B
97 indice A
98 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
11 indice A
3 indice B
0 indice A
12 indice B
29 indice A
20 indice B
20 indice A
6 indice B
25 indice A
6 indice B
30 indice A
25 indice B
24 indice A
11 indice B
16 indice A
29 indice B
12 indice A
4 indice B
24 indice A
11 indice B
30 indice A
25 indice B




1 indice A
33 indice B
23 indice A
6 indice B
15 indice A
22 indice B
30 indice A
32 indice B
29 indice A
20 indice B
1 indice A
22 indice B
8 indice A
16 indice B
17 indice A
5 indice B
12 indice A
20 indice B
15 indice A
22 indice B
26 indice A
3 indice B
30 indice A
25 indice B
9 indice A
8 indice B
23 indice A
14 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.750904
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.763333
[[ 42  17]
 [ 48 193]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
30 indice A
35 indice B




70 indice A
15 indice B
13 indice A
10 indice B
95 indice A
33 indice B
76 indice A
49 indice B
5 indice A
30 indice B
60 indice A
105 indice B
66 indice A
20 indice B
66 indice A
20 indice B
94 indice A
63 indice B
87 indice A
14 indice B
70 indice A
15 indice B
24 indice A
89 indice B
115 indice A
0 indice B
76 indice A
35 indice B
33 indice A
95 indice B
40 indice A
77 indice B




78 indice A
27 indice B
47 indice A
48 indice B
84 indice A
6 indice B
55 indice A
22 indice B
53 indice A
63 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
27 indice A
0 indice B
23 indice A
6 indice B
16 indice A
5 indice B
13 indice A
17 indice B
19 indice A
11 indice B
14 indice A
12 indice B
26 indice A
14 indice B
17 indice A
19 indice B
24 indice A
14 indice B
24 indice A
14 indice B
25 indice A
16 indice B
25 indice A
17 indice B




23 indice A
9 indice B
15 indice A
14 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
84 indice A
15 indice B
5 indice A
37 indice B
92 indice A
36 indice B
57 indice A
10 indice B
6 indice A
92 indice B
49 indice A
4 indice B
104 indice A
39 indice B
48 indice A
76 indice B
69 indice A
95 indice B
3 indice A
54 indice B
47 indice A
45 indice B
68 indice A
0 indice B
70 indice A
62 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
8 indice A
10 indice B
13 indice A
23 indice B




10 indice A
25 indice B
31 indice A
6 indice B
3 indice A
28 indice B
3 indice A
25 indice B
24 indice A
0 indice B
27 indice A
22 indice B
25 indice A
9 indice B
14 indice A
6 indice B
27 indice A
22 indice B
20 indice A
6 indice B
6 indice A
31 indice B
19 indice A
17 indice B
10 indice A
25 indice B
6 indice A
20 indice B
1 indice A
2 indice B
28 indice A
30 indice B
20 indice A
22 indice B
8 indice A
9 indice B


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


23 indice A
13 indice B
30 indice A
4 indice B
13 indice A
16 indice B
19 indice A
26 indice B
19 indice A
26 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.760207
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.760000
[[ 40  22]
 [ 50 188]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
39 indice A
49 indice B
3 indice A
102 indice B
36 indice A
53 indice B
45 indice A
31 indice B
115 indice A
54 indice B
14 indice A
35 indice B
97 indice A
28 indice B
57 indice A
111 indice B
54 indice A



 indice A
78 indice B
78 indice A
61 indice B
10 indice A
108 indice B
85 indice A
74 indice B
77 indice A
15 indice B




29 indice A
22 indice B
108 indice A
82 indice B
38 indice A




107 indice B
107 indice A
61 indice B
99 indice A
39 indice B
28 indice A
37 indice B
91 indice A




110 indice B
110 indice A
111 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
10 indice A
11 indice B
22 indice A
11 indice B
9 indice A
11 indice B
25 indice A
18 indice B
23 indice A
9 indice B
18 indice A
7 indice B
24 indice A
11 indice B
4 indice A
15 indice B
21 indice A
23 indice B
13 indice A
17 indice B
9 indice A
27 indice B
22 indice A
12 indice B
10 indice A
16 indice B
2 indice A
17 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
70 indice A
5 indice B
62 indice A
22 indice B
24 indice A
20 indice B
62 indice A
22 indice B
48 indice A
86 indice B
85 indice A
106 indice B
61 indice A
78 indice B
85 indice A
45 indice B
101 indice A
86 indice B
40 indice A
14 indice B
100 indice A
97 indice B




44 indice A
87 indice B
52 indice A
67 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
6 indice A
2 indice B
3 indice A
16 indice B
14 indice A
12 indice B
13 indice A
23 indice B
33 indice A
3 indice B
13 indice A
0 indice B
4 indice A
29 indice B
19 indice A
14 indice B
19 indice A
7 indice B
14 indice A
12 indice B
30 indice A
16 indice B
17 indice A
21 indice B
29 indice A
27 indice B
3 indice A
31 indice B
33 indice A




32 indice B
17 indice A
6 indice B
22 indice A
29 indice B
16 indice A
23 indice B
16 indice A
10 indice B
26 indice A
2 indice B
2 indice A
5 indice B
25 indice A
24 indice B
6 indice A
25 indice B
13 indice A
0 indice B
30 indice A
16 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.764599
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.728667


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 37  26]
 [ 53 184]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
60 indice A
55 indice B
53 indice A
54 indice B
39 indice A




57 indice B
69 indice A
23 indice B
12 indice A
74 indice B
78 indice A
60 indice B
46 indice A
60 indice B
26 indice A
94 indice B
22 indice A
82 indice B
90 indice A
113 indice B
80 indice A
59 indice B
83 indice A
2 indice B
16 indice A
39 



indice B
52 indice A
38 indice B
92 indice A
89 indice B
11 indice A




95 indice B
73 indice A
22 indice B
0 indice A




33 indice B
68 indice A
47 indice B
6 indice A
50 indice B
83 indice A
51 indice B
41 indice A
106 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
26 indice A
0 indice B
9 indice A
4 indice B
21 indice A
3 indice B
26 indice A
25 indice B




24 indice A
0 indice B
7 indice A
13 indice B
5 indice A
22 indice B
8 indice A
26 indice B
1 indice A
19 indice B
13 indice A




27 indice B
18 indice A
25 indice B
1 indice A
21 indice B
5 indice A
20 indice B
5 indice A
20



 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
15 indice A
34 indice B
41 indice A
30 indice B
40 indice A
23 indice B
72 indice A
96 indice B
48 indice A
23 indice B
67 indice A
51 indice B
83 indice A
101 indice B
33 indice A
69 indice B
79 indice A
63 indice B
69 indice A
33 indice B
68 indice A
3 indice B




94 indice A
78 indice B
61 indice A
103 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
20 indice A
25 indice B
30 indice A
13 indice B
13 indice A
21 indice B
3 indice A
25 indice B
9 indice A
24 indice B
6 indice A




27 indice B
3 indice A
25 indice B
30 indice A
13 indice B
7 indice A
21 indice B
24 indice A
17 indice B
19 indice A
33 indice B
19 indice A
33 indice B
9 indice A
5 indice B
32 indice A
16 indice B
16 indice A
22 indice B




24 indice A
17 indice B
10 indice A
7 indice B
5 indice A
28 indice B
24 indice A
9 indice B
6 indice A
30 indice B
18 indice A
10 indice B
6 indice A
13 indice B
0 indice A
5 indice B




32 indice A
6 indice B
2 indice A
4 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.776744
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.716667


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 35  26]
 [ 55 184]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
15 indice A
30 indice B
27 indice A
26 indice B
5 indice A
37 indice B
45 indice A




86 indice B
88 indice A
110 indice B
102 indice A
71 indice B
69 indice A
30 indice B
115 indice A
57 indice B
2 indice A
37 indice B
79 indice A
98 indice B
99 indice A
20 indice B
75 indice A
21 indice B
80 indice A
16 indice B
71 indice A
98 indice B
21 indice A
77 indice B
65 indice A
20 indice B
37 indice A
46 indice B
75 indice A
21 indice B




82 indice A
20 indice B
50 indice A
97 indice B
21 indice A
68 indice B
24 indice A
86 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
12 indice A
14 indice B
10 indice A
1 indice B
19 indice A
10 indice B
0 indice A
19 indice B
5 indice A
15 indice B
22 indice A
9 indice B
5 indice A
21 indice B
21 indice A
23 indice B
14 indice A
1 indice B
7 indice A
9 indice B




15 indice A
14 indice B
23 indice A
9 indice B
22 indice A
9 indice B
8 indice A
14 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
36 indice A
82 indice B
57 indice A
31 indice B
30 indice A
61 indice B
90 indice A
17 indice B
54 indice A
84 indice B
35 indice A
29 indice B
84 indice A
47 indice B
40 indice A
70 indice B
4 indice A
72 indice B
50 indice A
6 indice B




41 indice A
2 indice B
40 indice A
9 indice B
22 indice A
18 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
10 indice A
16 indice B
13 indice A
22 indice B
25 indice A
16 indice B
7 indice A
31 indice B
15 indice A
4 indice B
24 indice A
12 indice B
29 indice A
25 indice B
33 indice A
1 indice B
24 indice A
23 indice B
4 indice A
24 indice B
26 indice A
21 indice B




9 indice A
19 indice B
22 indice A
11 indice B
18 indice A
3 indice B
11 indice A
24 indice B
25 indice A
32 indice B
30 indice A
17 indice B
14 indice A
16 indice B
17 indice A
30 indice B
28 indice A
16 indice B
24 indice A
12 indice B
9 indice A
20 indice B
14 indice A
13 indice B
12 indice A
25 indice B


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


1 indice A
29 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.767700
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.762000
[[ 41  13]
 [ 49 197]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
59 indice A
103 indice B
108 indice A
41 indice B
72 indice A
69 indice B
25 indice A
63 indice B
14 indice A
1 indice B
40 indice A
15 indice B
73 indice A
112 indice B
42 indice A
55 indice B
72 indice A
80 indice B
51 indice A
73 indice B
35 indice A
17 indice B
44 indice A
38 indice B
98 indice A




60 indice B
39 indice A
15 indice B
84 indice A
113 indice B
95 indice A
25 indice B
6 indice A
54 indice B
58 indice A
88 indice B
64 indice A
6 indice B
45 indice A
3 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
27 indice A
26 indice B
17 indice A
11 indice B
10 indice A
16 indice B
15 indice A
20 indice B
16 indice A
23 indice B
1 indice A
11 indice B
27 indice A
20 indice B
15 indice A
6 indice B




7 indice A
18 indice B
3 indice A
4 indice B
13 indice A
0 indice B
16 indice A
7 indice B
17 indice A
6 indice B
1 indice A
11 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
6 indice A
99 indice B
102 indice A
52 indice B
9 indice A
104 indice B
0 indice A
31 indice B
78 indice A
75 indice B
27 indice A
100 indice B
0 indice A
31 indice B
84 indice A
56 indice B
85 indice A
37 indice B
4 indice A
73 indice B
23



 indice A
2 indice B
31 indice A
69 indice B
33 indice A
97 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
16 indice A
32 indice B
32 indice A
24 indice B
31 indice A
33 indice B
32 indice A
11 indice B
15 indice A
22 indice B
12 indice A
32 indice B
10 indice A
0 indice B
20 indice A
6 indice B
15 indice A
14 indice B
33 indice A
23 indice B
24 indice A
16 indice B
26 indice A
33 indice B
26 indice A
33 



indice B
17 indice A
0 indice B
15 indice A
22 indice B
28 indice A
11 indice B
33 indice A
31 indice B
3 indice A
9 indice B
4 indice A
12 indice B
23 indice A
32 indice B
32 indice A
22 indice B
29 indice A
33 indice B
4 indice A
11 indice B
21 indice A
25 indice B
1 indice A
30 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.776486
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.732667
[[ 35  20]
 [ 55 190]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
19 indice A
68 indice B
102 indice A
75 indice B
19 indice A
55 indice B
9 indice A




32 indice B
20 indice A
10 indice B
84 indice A
31 indice B
104 indice A
84 indice B
93 indice A
68 indice B
1 indice A
23 indice B
56 indice A
53 indice B
28 indice A
31 indice B
32 indice A
52 indice B
79 indice A
33 indice B
60 indice A
34 indice B
60 indice A
14 indice B
34 indice A
0 indice B
77 indice A
85 indice B
45 indice A
55 indice B
105 indice A




12 indice B
68 indice A
54 indice B
87 indice A
98 indice B
81 indice A
55 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
17 indice A
19 indice B
8 indice A
24 indice B
22 indice A
11 indice B
12 indice A
11 indice B
0 indice A
11 indice B
12 indice A
7 indice B
12 indice A
5 indice B
12 indice A
14 indice B
19 indice A
11 indice B
17 indice A
7 indice B
25 indice A
22 indice B
4 indice A
7 indice B
24



 indice A
4 indice B
20 indice A
5 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
21 indice A
59 indice B
32 indice A
39 indice B
74 indice A
17 indice B
18 indice A
70 indice B
39 indice A
46 indice B
20 indice A
24 indice B
52 indice A
53 indice B
23 indice A
3 indice B
41 indice A
89 indice B
45 indice A
100 indice B
72 indice A
48 indice B
84 indice A
38 indice B
28 indice A
17 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
0 indice A
1 indice B




19 indice A
30 indice B
1 indice A
15 indice B
25 indice A
3 indice B
20 indice A
27 indice B
13 indice A
31 indice B
5 indice A
0 indice B
0 indice A
1 indice B
12 indice A
19 indice B
33 indice A
3 indice B
33 indice A
30 indice B
6 indice A
4 indice B
30 indice A
9 indice B
33 indice A
30 indice B
33 indice A
30 indice B
10 indice A
16 indice B
21 indice A


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


24 indice B
5 indice A
15 indice B
20 indice A
27 indice B
0 indice A
5 indice B
11 indice A
23 indice B
16 indice A
10 indice B
4 indice A
20 indice B
28 indice A
24 indice B
14 indice A
19 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.
Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.773643
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.726000
[[ 40  26]
 [ 50 184]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
62 indice A
29 indice B
59 indice A
100 indice B
55 indice A
89 indice B
93 indice A
40 indice B
42 indice A
51 indice B
83 



93 indice A
71 indice B
4 indice A
3 indice B
23 indice A
65 indice B
47 indice A
3 indice B
109 indice A
52 indice B
64 indice A
43 indice B
6 indice A
5 indice B
99 indice A
1 indice B
74 indice A
84 indice B
9 indice A
96 indice B
112 indice A
13 indice B
Oversampling for groups 0 (positive) and 1 (negative): Added 22 synthetic samples in group 1.
26 indice A
1 indice B
25 indice A
21 indice B
18 indice A
4 indice B
23 indice A
21 indice B
20 indice A
8 indice B




15 indice A
12 indice B
10 indice A
27 indice B
12 indice A
15 indice B
0 indice A
2 indice B
1 indice A
27 indice B
18 indice A
4 indice B
2 indice A
23 indice B
4 indice A
23 indice B
20 indice A
1 indice B
Oversampling for groups 2 (positive) and 3 (negative): Added 14 synthetic samples in group 2.
36 indice A
89 indice B
60 indice A
86 indice B
74 indice A
47 indice B
71 indice A
17 indice B
82 indice A
46 indice B
9 indice A
64 indice B
77 indice A




75 indice B
20 indice A
91 indice B
5 indice A
63 indice B
80 indice A
21 indice B
60 indice A
86 indice B
98 indice A
81 indice B
39 indice A
60 indice B
Oversampling for groups 4 (positive) and 5 (negative): Added 13 synthetic samples in group 4.
32 indice A
21 indice B
2 indice A
25 indice B
7 indice A
14 indice B
31 indice A
18 indice B
31 indice A
26 indice B
1 indice A
22 indice B
17 indice A
1 indice B
30 indice A
7 indice B
28 indice A
7 indice B
6 indice A
28 indice B




25 indice A
28 indice B
17 indice A
31 indice B
2 indice A
32 indice B
16 indice A
22 indice B
25 indice A
32 indice B
18 indice A
12 indice B
28 indice A
3 indice B
9 indice A
4 indice B
30 indice A
19 indice B
15 indice A
28 indice B
30 indice A
7 indice B
24 indice A
0 indice B
20 indice A
13 indice B
8 indice A
32 indice B
30 indice A
6 indice B
Oversampling for groups 6 (positive) and 7 (negative): Added 25 synthetic samples in group 6.


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 1.004339
Difference in statistical parity (of original labels) = 0.003035
Individual fairness metric (consistency) = 0.774419
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.720000
[[ 35  23]
 [ 55 187]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_one['personal_status'] = df_zero_zero_on

Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.915498
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.733333
[[ 55  40]
 [ 35 170]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_one['personal_status'] = df_zero_zero_on

Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.913240
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.749333
[[ 46  34]
 [ 44 176]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000
Individual fairness metric (consistency) = 0.914486
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.774000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

[[ 41  31]
 [ 49 179]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.919938
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.728667
[[ 50  60]
 [ 40 150]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.913785
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.758667
[[ 49  46]
 [ 41 164]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.920327
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.741333
[[ 54  42]
 [ 36 168]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.916822
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.742667
[[ 46  35]
 [ 44 175]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_one['personal_status'] = df_zero_zero_on

Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.923131
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.728667
[[ 47  33]
 [ 43 177]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_one_zero_zero['personal_status'] = df_one_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_one_zero_zero['age'] = df_one_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_one_zero_one['personal_status'] = df_one_zero_one['per

Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['personal_status'] = df_zero_zero_zero['personal_status'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_zero_zero_zero['age'] = df_zero_zero_zero['age'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

Individual fairness metric (consistency) = 0.919704
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.754000
[[ 47  40]
 [ 43 170]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Maximum count: 321
one_one_one is maximum
Counts to be increased for each group:
zero_zero_zero: 296
zero_zero_one: 287
zero_one_zero: 270
zero_one_one: 214
one_zero_zero: 303
one_zero_one: 293
one_one_zero: 205
one_one_one: 0




Disparate impact (of original labels) = 1.000000
Difference in statistical parity (of original labels) = 0.000000


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.917991
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.735333
[[ 37  38]
 [ 53 172]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.752000
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.730667
[[ 38  27]
 [ 52 183]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.756571
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.733333
[[ 48  29]
 [ 42 181]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.756000
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.780667
[[ 43  23]
 [ 47 187]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.749429
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.744000
[[ 42  20]
 [ 48 190]]
No missing values detected.
The 'credit

  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.750571
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.746667
[[ 47  19]
 [ 43 191]]
No missing values detected.
The 'credit' column has only two unique values.


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.750000
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738000
[[ 42  27]
 [ 48 183]]
No missing values detected.
The 'credit' column has only two unique values.


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.736857
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738000
[[ 48  22]
 [ 42 188]]


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.733429
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.753333


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 45  27]
 [ 45 183]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.741429
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738000
[[ 41  30]
 [ 49 180]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
Disparate impact (of original labels) = 0.784519
Difference in statistical parity (of original labels) = -0.158283
Individual fairness metric (consistency) = 0.745714
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.751333


  self.protected_attribute_names = ['personal_status', 'age']
  self.protected_attributes = np.column_stack([self['personal_status'], self['age']])
  self.labels = self['credit'].values.reshape(-1, 1)
  self.instance_weights = np.ones(len(self))
  self.privileged_protected_attributes = [np.array([1.]), np.array([1.])]
  self.unprivileged_protected_attributes = [np.array([0.]), np.array([0.])]
  self.feature_names = self.columns[self.columns != 'credit'].tolist()
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[[ 40  23]
 [ 50 187]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
iteration: 1, error: 0.23, fairness violation: 0.012748299319727896, violated group size: 0.13428571428571429
iteration: 2, error: 0.26571428571428574, fairness violation: 0.0063741496598639525, violated group size: 0.13428571428571429
iteration: 3, error: 0.27714285714285714, fairness violation: 0.0042494331065759585, violated group size: 0.13428571428571429
iteration: 4, error: 0.28285714285714286, fairness violation: 0.003187074829931984, violated group size: 0.1657142857142857
iteration: 5, error: 0.28628571428571425, fairness violation: 0.0025496598639456057, violated group size: 0.1657142857142857
iteration: 6, error: 0.2885714285714286, fairness violation: 0.002124716553288008, violated group size: 0.1657142857142857
iteration: 7, error: 0.29, fairness vi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.768789
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738667
[[ 38  20]
 [ 52 190]]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

Individual fairness metric (consistency) = 0.774782
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.701333
[[ 43  41]
 [ 47 169]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.776529
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.731333
[[ 38  26]
 [ 52 184]]
No missing values detected.
The 'credit' column has only two unique values.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.772035
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.746667
[[ 44  32]
 [ 46 178]]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.766042
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.726000
[[ 39  21]
 [ 51 189]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.762547
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.715333
[[ 43  38]
 [ 47 172]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.755056
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.747333


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

[[ 42  22]
 [ 48 188]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


Individual fairness metric (consistency) = 0.763546
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.738667
[[ 39  28]
 [ 51 182]]
No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records
names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.750062
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.766000
[[ 43  26]
 [ 47 184]]


  print("Individual fairness metric (consistency) = %f" % metric_orig_train.consistency())
  print("Individual fairness metric (consistency) (test) = %f" % metric_orig_test.consistency())


No missing values detected.
The 'credit' column has only two unique values.
[(0, (1, 1, 1)), (1, (1, 1, 0)), (2, (1, 0, 1)), (3, (1, 0, 0)), (4, (0, 1, 1)), (5, (0, 1, 0)), (6, (0, 0, 1)), (7, (0, 0, 0))]
['personal_status', 'age', 'credit']
[] [] ['personal_status', 'age'] {}
['personal_status', 'age', 'credit']
1.9565217391304348 34 25 14.9130434782609
1.9565217391304348 34 25 14.9130434782609
Adding 15 positive records
names  ['personal_status', 'age'] [0, 0]
2.517730496453901 107 51 21.4042553191489
2.517730496453901 107 51 21.4042553191489
Adding 21 positive records


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0


names  ['personal_status', 'age'] [0, 1]
2.517730496453901 28 18 17.3191489361702
2.517730496453901 28 18 17.3191489361702
Adding 17 positive records
names  ['personal_status', 'age'] [1, 0]
1.9565217391304348 321 116 48.0666666666667
Adding 48 negative records
names  ['personal_status', 'age'] [1, 1]
['age', 'credit']
['personal_status', 'credit']
Disparate impact (of original labels) = 1.000463
Difference in statistical parity (of original labels) = 0.000306
Individual fairness metric (consistency) = 0.760549
Disparate impact (of original labels) (test) = 0.762899
Difference in statistical parity (of original labels) (test) = -0.174043
Individual fairness metric (consistency) (test) = 0.733333


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['cnt'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

[[ 44  28]
 [ 46 182]]


In [174]:
# Define the arrays
arrays = [results_orig_mean, results_notax_mean, results_tax_mean, results_smotenc_mean, results_fair_smote_mean, results_reweighing_mean, results_gerryfair_mean, results_remedy_mean]

# Define the headers
headers = ['b_acc', 'acc', 'prec', 'recall', 'f1', 'SP', 'AOD', 'TPR', 'FPR', 'C', 'TNR', 'FNR', 'PPV', 'FDR']

# Transpose the arrays
arrays_transposed = np.transpose(arrays)

print('\t \t none; wo taxo; w taxo; smonc; fairsm; reweigh; gerryF; remed')
# Print the values with corresponding headers
for header, row in zip(headers, arrays_transposed):
    row_values = ['{:.4f}'.format(value) for value in row]
    print(header +'\t'+ '\t' + '\t'.join(row_values))

	 	 none; wo taxo; w taxo; smonc; fairsm; reweigh; gerryF; remed
b_acc		0.6889	0.6564	0.6684	0.6639	0.6672	0.6823	0.6860	0.6623
acc		0.7707	0.7483	0.7567	0.7570	0.7243	0.7623	0.7443	0.7437
prec		0.8019	0.7831	0.7900	0.7862	0.7994	0.7991	0.8145	0.7887
recall		0.8933	0.8862	0.8890	0.8967	0.8100	0.8824	0.8319	0.8657
f1		0.8450	0.8312	0.8364	0.8377	0.8041	0.8386	0.8172	0.8252
SP		0.6889	1.0301	1.0080	0.9629	1.1662	0.8926	0.7110	1.0120
AOD		-0.2109	0.0858	0.0660	0.0376	0.1952	-0.0288	-0.1652	0.0767
TPR		-0.1146	0.0610	0.0374	-0.0407	0.0893	0.0230	-0.0994	0.0341
FPR		-0.3073	0.1105	0.0945	0.1158	0.3011	-0.0805	-0.2311	0.1193
C		0.8611	0.8684	0.8741	0.8743	0.8312	0.8601	0.8570	0.8561
TNR		-0.3073	0.1105	0.0945	0.1158	0.3011	-0.0805	-0.2311	0.1193
FNR		0.1146	-0.0610	-0.0374	0.0407	-0.0893	-0.0230	0.0994	-0.0341
PPV		0.0133	0.1682	0.1689	0.2017	0.2237	0.0949	0.0367	0.1781
FDR		0.0133	0.1682	0.1689	0.2017	0.2237	0.0949	0.0367	0.1781


In [175]:
counts_remedy = X_train_resampled_remedy.groupby(['personal_status', 'age', 'credit']).size()

# Iterate over the groups and print the results
for (personal_status, age, credit), count in counts_remedy.items():
    print(f"Personal Status: {personal_status}, Age: {age}, Credit: {credit}, Count: {count}")

Personal Status: 0, Age: 0, Credit: 0, Count: 25
Personal Status: 0, Age: 0, Credit: 1, Count: 49
Personal Status: 0, Age: 1, Credit: 0, Count: 51
Personal Status: 0, Age: 1, Credit: 1, Count: 128
Personal Status: 1, Age: 0, Credit: 0, Count: 18
Personal Status: 1, Age: 0, Credit: 1, Count: 45
Personal Status: 1, Age: 1, Credit: 0, Count: 164
Personal Status: 1, Age: 1, Credit: 1, Count: 321
