In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from statsmodels.distributions.empirical_distribution import ECDF
from fairness import *
import shutil
import time
from sklearn.metrics import roc_curve, auc

# ---- Quantile Function Class ----
class EQF:
    def __init__(self, sample_data):
        self._calculate_eqf(sample_data)

    def _calculate_eqf(self, sample_data):
        """ Calculate the quantile function for the given sample data """
        sorted_data = np.sort(sample_data)
        linspace = np.linspace(0, 1, num=len(sample_data))
        self.interpolater = interp1d(linspace, sorted_data)
        self.min_val = sorted_data[0]
        self.max_val = sorted_data[-1]

    def __call__(self, value_):
        """ Interpolate a value based on the quantile function """
        try:
            return self.interpolater(value_)
        except ValueError:
            if value_ < self.min_val:
                return 0.0
            elif value_ > self.max_val:
                return 1.0
            else:
                raise ValueError('Error with input value')

# ---- Fairness Objects Calculation ----
def get_fairness_objects(sensitive_vector, predictions_s_0, predictions_s_1):
    """
    Calculate ECDF (Empirical CDF) and EQF (Empirical Quantile Function) for sensitive and non-sensitive groups.
    
    Parameters:
    - sensitive_vector: Array indicating sensitive attributes.
    - predictions_s_0: Predictions for non-sensitive group.
    - predictions_s_1: Predictions for sensitive group.
    
    Returns:
    - pi_dict: Proportions of sensitive and non-sensitive groups.
    - ecdf_dict: ECDF objects for both groups.
    - eqf_dict: EQF objects for both groups.
    """
    # Calculate CDF and quantile function for both groups
    ecdf_dict = {
        'p_non_sensitive': ECDF(predictions_s_0.reshape(-1,)),
        'p_sensitive': ECDF(predictions_s_1.reshape(-1,))
    }

    eqf_dict = {
        'p_non_sensitive': EQF(predictions_s_0.reshape(-1,)),
        'p_sensitive': EQF(predictions_s_1.reshape(-1,))
    }

    # Calculate group proportions
    pi_dict = {
        'p_non_sensitive': sensitive_vector[sensitive_vector == 0.0].shape[0] / sensitive_vector.shape[0],
        'p_sensitive': 1 - sensitive_vector[sensitive_vector == 0.0].shape[0] / sensitive_vector.shape[0]
    }

    return pi_dict, ecdf_dict, eqf_dict

# ---- Data Reading Function ----
def read_data(path_base, model, task, sens_dict):
    """
    Read and combine train, validation, and test data for a specific task and model.
    
    Parameters:
    - path_base: Base path for the data.
    - model: Model name.
    - task: Task name.
    - sens_dict: Sensitive attribute dictionary.
    
    Returns:
    - TRAIN: List containing training data, training scores, and training sensitive attributes.
    - TEST: List containing test data, test scores, and test sensitive attributes.
    """
    # Load train and validation datasets
    df_train = pd.read_csv(f'{path_base}/DATA_VLDB/{task}/train.csv')
    scores_train = pd.read_csv(f'{path_base}/VLDB_RES/{task}_{model}/score_train.csv')
    sens_train = make_sens_vector(df_train, task, sens_dict)

    df_valid = pd.read_csv(f'{path_base}/DATA_VLDB/{task}/valid.csv')
    scores_valid = pd.read_csv(f'{path_base}/VLDB_RES/{task}_{model}/score_valid.csv')
    sens_valid = make_sens_vector(df_valid, task, sens_dict)

    # Combine train and validation datasets
    df_train = pd.concat([df_train, df_valid])
    scores_train = pd.concat([scores_train, scores_valid])
    sens_train = np.concatenate((sens_train, sens_valid))

    # Load test dataset
    df_test = pd.read_csv(f'{path_base}/DATA_VLDB/{task}/test.csv')
    scores_test = pd.read_csv(f'{path_base}/VLDB_RES/{task}_{model}/score_test.csv')
    sens_test = make_sens_vector(df_test, task, sens_dict)

    TRAIN = [df_train, scores_train, sens_train]
    TEST = [df_test, scores_test, sens_test]

    return TRAIN, TEST

# ---- Fairness Estimation Function ----
def get_fair_estimation(p_dict, ecdf_dict, eqf_dict, predictions_nonsensitive, predictions_sensitive, jitter=0.0001):
    """
    Estimate fair probabilities for both sensitive and non-sensitive groups.
    
    Parameters:
    - p_dict: Proportions of sensitive and non-sensitive groups.
    - ecdf_dict: ECDF objects for both groups.
    - eqf_dict: EQF objects for both groups.
    - predictions_nonsensitive: Predictions for non-sensitive group.
    - predictions_sensitive: Predictions for sensitive group.
    - jitter: Small random noise added to avoid numerical issues.
    
    Returns:
    - vals_1: Calibrated values for non-sensitive group.
    - vals_2: Calibrated values for sensitive group.
    """
    # Sample jitters
    np.random.seed(int(time.time()))
    jitter_matrix = np.random.uniform(-jitter, jitter, (predictions_sensitive.shape[0] + predictions_nonsensitive.shape[0]))

    # ECDF-ified values
    f_preds_nonsensitive = ecdf_dict['p_non_sensitive'](predictions_nonsensitive)
    f_preds_sensitive = ecdf_dict['p_sensitive'](predictions_sensitive)

    # Calculate calibrated values for non-sensitive group
    vals_1 = np.zeros_like(predictions_nonsensitive)
    vals_1 += p_dict['p_non_sensitive'] * eqf_dict['p_non_sensitive'](f_preds_nonsensitive)
    vals_1 += p_dict['p_sensitive'] * eqf_dict['p_sensitive'](f_preds_nonsensitive)

    # Calculate calibrated values for sensitive group
    vals_2 = np.zeros_like(predictions_sensitive)
    vals_2 += p_dict['p_non_sensitive'] * eqf_dict['p_non_sensitive'](f_preds_sensitive)
    vals_2 += p_dict['p_sensitive'] * eqf_dict['p_sensitive'](f_preds_sensitive)

    return vals_1, vals_2

# ---- Fairness Metrics Calculation ----
def _stats_(sens_array, prob_array, y_array):
    """
    Calculate fairness metrics including Demographic Parity, Equal Opportunity, and Equal Opportunity Difference.
    
    Parameters:
    - sens_array: Sensitive attribute array.
    - prob_array: Probability predictions array.
    - y_array: Ground truth labels array.
    
    Returns:
    - DSP_EOD: Average Equal Opportunity Difference.
    - DSP_EO: Average Equal Opportunity.
    - DSP_DP: Average Demographic Parity.
    - auc_all: AUC score for all data.
    - Frac: Fraction of thresholds.
    """
    prob_minor = prob_array[sens_array == 1]
    prob_major = prob_array[sens_array == 0]

    y_minor = y_array[sens_array == 1]
    y_major = y_array[sens_array == 0]

    DP, EO, EOD = [], [], []
    PR_minor_all, TPR_minor_all = [], []
    PR_major_all, TPR_major_all = [], []

    # Calculate overall AUC
    fpr, tpr, _ = roc_curve(y_array, prob_array)
    auc_all = 100 * auc(fpr, tpr)

    N = 500
    for theta in np.linspace(0, 1, N):
        # Calculate metrics for minor group
        y_pred = np.array([1 if score > theta else 0 for score in prob_minor])
        tn, fp, fn, tp = confusion_matrix(y_minor, y_pred).ravel()
        tpr = tp / (tp + fn)
        pr = (tp + fp) / len(y_minor)
        fpr = fp / (fp + tn)
        PR_minor_all.append(pr)
        TPR_minor_all.append(tpr)

        # Calculate metrics for major group
        y_pred = np.array([1 if score > theta else 0 for score in prob_major])
        tn, fp, fn, tp = confusion_matrix(y_major, y_pred).ravel()
        tpr = tp / (tp + fn)
        pr = (tp + fp) / len(y_major)
        fpr = fp / (fp + tn)
        PR_major_all.append(pr)
        TPR_major_all.append(tpr)

        # Calculate fairness disparities
        EOD.append((np.abs(TPR_major_all[-1] - TPR_minor_all[-1]) + np.abs(fpr - fpr)))
        EO.append(np.abs(TPR_major_all[-1] - TPR_minor_all[-1]))
        DP.append(np.abs(PR_major_all[-1] - PR_minor_all[-1]))

    DSP_EOD = 100 * np.average(EOD)
    DSP_EO = 100 * np.average(EO)
    DSP_DP = 100 * np.average(DP)
    Frac = np.linspace(0, 1, N)

    return DSP_EOD, DSP_EO, DSP_DP, auc_all, Frac



In [4]:
import numpy as np
import pandas as pd
from fairness import *
import pickle


with open('saved_params/sens_attr_dict_valid.pkl', 'rb') as file:
    sens_attr_dict_valid = pickle.load(file)

with open('saved_params/sens_attr_dict_train.pkl', 'rb') as file:
    sens_attr_dict_train = pickle.load(file)




RES = {}

result_dict = {}
path_base = os.getcwd()

tasks = [ 'Fodors-Zagat','DBLP-GoogleScholar', 'iTunes-Amazon', 'Walmart-Amazon', 'Amazon-Google','Beer','DBLP-ACM']
models = ['deepmatcher', 'DITTO', 'EMTransformer', 'HierGAT','HierMatcher']


# tasks= ['iTunes-Amazon']
# models = ['HierGAT']


plots_res = {}

RES = {}




for task in tasks:
    RES[task] = {}
    plots_res[task] = {}
    for model in models:
        RES[task][model] = {
            'EOD_min':{},
            'EO_min':{},
            }
        
        plots_res[task][model] = {
            'EOD_min':{},
            'EO_min':{},
            }

        for rpt in ['EOD','EO']:
                
            tmp_all = []
            other = []
            frac = []   
            for TH in np.linspace(0,1,20):
                try:
                

                    

                
                    # # df_valid = pd.read_csv(path_base +'/DATA_VLDB/'+ task  + '/valid.csv')
                    # scores_valid = pd.read_csv(path_base+'/VLDB_RES/'+task+'_'+model+'/score_valid.csv')
                    # sens_valid = sens_attr_dict_valid[task]
                    
                    scores_valid = pd.concat([
                        pd.read_csv(path_base+'/VLDB_RES/'+task+'_'+model+'/score_train.csv'),
                        pd.read_csv(path_base+'/VLDB_RES/'+task+'_'+model+'/score_valid.csv')])

                    sens_valid = np.concatenate((
                        sens_attr_dict_train[task],
                        sens_attr_dict_valid[task] ))







                    for x in list(scores_valid.columns):
                        if len(np.unique(scores_valid[x])) >2:  
                            prob_valid = scores_valid[x]
                        else: 
                            y_valid = scores_valid[x]

                    
                    jitter = 0
                    prob_valid2 = prob_valid + np.random.uniform(-jitter, jitter, prob_valid.shape)



                    prob_valid2_ = prob_valid2[prob_valid2 > TH]
                    sens_valid_ = sens_valid[prob_valid2 > TH]

                    p1_pos, e1_pos, q1_pos = get_fairness_objects(
                    sens_valid_, 
                    np.array(prob_valid2_[sens_valid_ == 0]),  # non-sensitive = 0
                    np.array(prob_valid2_[sens_valid_ == 1])   # sensitive = 1
                    )




                    prob_valid2_ = prob_valid2[prob_valid2 <= TH]
                    sens_valid_ = sens_valid[prob_valid2 <= TH]

                    p1_neg, e1_neg, q1_neg = get_fairness_objects(
                    sens_valid_, 
                    np.array(prob_valid2_[sens_valid_ == 0]),  # non-sensitive = 0
                    np.array(prob_valid2_[sens_valid_ == 1])   # sensitive = 1
                    )




                    CALIBRATED = []
                    for idx, pnt in enumerate(prob_valid):
                        if sens_valid[idx] ==1:
                            in_0 = (np.array([])).reshape(-1,)
                            in_1 = (np.array([pnt])).reshape(-1,)
                        else: 
                            in_0 = (np.array([pnt])).reshape(-1,)
                            in_1 = (np.array([])).reshape(-1,)

                        if pnt > TH:
                            fair_nonsensitive, fair_sensitive = get_fair_estimation(
                                p1_pos, e1_pos, q1_pos, 
                                in_0, in_1, 
                                jitter= jitter)
                            

                        else:
                            fair_nonsensitive, fair_sensitive = get_fair_estimation(
                                p1_neg, e1_neg, q1_neg, 
                                in_0, in_1, 
                                jitter= jitter)
                        
                        if list(fair_sensitive) ==[]:
                            CALIBRATED.append(list(fair_nonsensitive)[0])
                        else:
                            CALIBRATED.append(list(fair_sensitive)[0])

                    






                    pred_fair = pd.DataFrame(prob_valid.copy())
                    pred_fair['calibrated'] = CALIBRATED

                    if (np.sum(pred_fair['calibrated'] < 0) + np.sum(pred_fair['calibrated'] > 1)) > 0 :
                        min_val = pred_fair['calibrated'].min()
                        pred_fair['calibrated'] = pred_fair['calibrated']  - min_val
                        max_val = pred_fair['calibrated'].max()
                        pred_fair['calibrated'] = pred_fair['calibrated'] / max_val


                    DSP_EOD, DSP_EO, DSP_DP, auc_all, Frac = _stats_(sens_valid,pred_fair['calibrated'], y_valid)

                    if rpt == 'EO':
                        tmp_all.append(DSP_EO)
                        other.append(DSP_EOD)
                    else:
                        tmp_all.append(DSP_EOD)
                        other.append(DSP_EO)

                    frac.append(TH)

                except:
                    # print(TH, end =' ')
                    continue
            
            
            
            
                    

            import numpy as np
            import pandas as pd
            from fairness import *
            import pickle


            with open('saved_params/sens_attr_dict_test.pkl', 'rb') as file:
                sens_attr_dict_test = pickle.load(file)

            df_test = pd.read_csv(path_base +'/DATA_VLDB/'+ task  + '/test.csv')
            scores_test = pd.read_csv(path_base+'/VLDB_RES/'+task+'_'+model+'/score_test.csv')
            sens_test = sens_attr_dict_test[task]

            result_dict = {}
            path_base = os.getcwd()




                
            df_test = pd.read_csv(path_base +'/DATA_VLDB/'+ task  + '/test.csv')
            scores_test = pd.read_csv(path_base+'/VLDB_RES/'+task+'_'+model+'/score_test.csv')
            sens_test = sens_attr_dict_test[task]
            




            for x in list(scores_test.columns):
                if len(np.unique(scores_test[x])) >2:  
                    prob_test = scores_test[x]
                else: 
                    y_test = scores_test[x]

            
            
            
            TH = frac[np.argmin(tmp_all)]

            print('Before:', TH)
            DSP_EOD, DSP_EO, DSP_DP, auc_all, Frac = _stats_(sens_test,prob_test, y_test)
            print(task, model, round(DSP_DP,2), round(DSP_EO,2), round(DSP_EOD,2), auc_all)


            jitter = 0
            prob_test2 = prob_test + np.random.uniform(-jitter, jitter, prob_test.shape)



            prob_test2_ = prob_test2[prob_test2 > TH]
            sens_test_ = sens_test[prob_test2 > TH]

            p1_pos, e1_pos, q1_pos = get_fairness_objects(
            sens_test_, 
            np.array(prob_test2_[sens_test_ == 0]),  # non-sensitive = 0
            np.array(prob_test2_[sens_test_ == 1])   # sensitive = 1
         )




            prob_test2_ = prob_test2[prob_test2 <= TH]
            sens_test_ = sens_test[prob_test2 <= TH]

            p1_neg, e1_neg, q1_neg = get_fairness_objects(
            sens_test_, 
            np.array(prob_test2_[sens_test_ == 0]),  # non-sensitive = 0
            np.array(prob_test2_[sens_test_ == 1])   # sensitive = 1
         )




            CALIBRATED = []
            for idx, pnt in enumerate(prob_test):
                if sens_test[idx] ==1:
                    in_0 = (np.array([])).reshape(-1,)
                    in_1 = (np.array([pnt])).reshape(-1,)
                else: 
                    in_0 = (np.array([pnt])).reshape(-1,)
                    in_1 = (np.array([])).reshape(-1,)

                if pnt > TH:
                    fair_nonsensitive, fair_sensitive = get_fair_estimation(
                        p1_pos, e1_pos, q1_pos, 
                        in_0, in_1, 
                        jitter= jitter)
                    

                else:
                    fair_nonsensitive, fair_sensitive = get_fair_estimation(
                        p1_neg, e1_neg, q1_neg, 
                        in_0, in_1, 
                        jitter= jitter)
                
                if list(fair_sensitive) ==[]:
                    CALIBRATED.append(list(fair_nonsensitive)[0])
                else:
                    CALIBRATED.append(list(fair_sensitive)[0])

            



            # print(rpt, np.min
            # (tmp_all), other[np.argmin(tmp_all)])


            pred_fair = pd.DataFrame(prob_test.copy())
            pred_fair['calibrated'] = CALIBRATED

            if (np.sum(pred_fair['calibrated'] < 0) + np.sum(pred_fair['calibrated'] > 1)) > 0 :
                min_val = pred_fair['calibrated'].min()
                pred_fair['calibrated'] = pred_fair['calibrated']  - min_val
                max_val = pred_fair['calibrated'].max()
                pred_fair['calibrated'] = pred_fair['calibrated'] / max_val


            DSP_EOD2, DSP_EO2, DSP_DP2, auc_all2, Frac = _stats_(sens_test,pred_fair['calibrated'], y_test)

            print('After: ', rpt)
            print(task, model, round(DSP_DP2,2), '| eo',round(DSP_EO2,2), '| eod', round(DSP_EOD2,2), auc_all2)



            print()
            if rpt == 'EO':
                K = 'EO_min'
            else:
                K = 'EOD_min'

            RES[task][model][K] = {
                'before':{
                    'DP':DSP_DP,
                    'EO':DSP_EO,
                    'EOD': DSP_EOD,
                    'auc':auc_all
                },
                'after':{
                    'gamma':TH,
                    'DP':DSP_DP2,
                    'EO':DSP_EO2,
                    'EOD': DSP_EOD2,
                    'auc':auc_all2
                }
            }

            plots_res[task][model][K] = {'TH':frac, 'tmp_all':tmp_all}

        print('-------')


Before: 0.21052631578947367
Fodors-Zagat deepmatcher 2.86 5.56 5.81 100.0
After:  EOD
Fodors-Zagat deepmatcher 2.52 | eo 1.0 | eod 1.14 100.0

Before: 0.21052631578947367
Fodors-Zagat deepmatcher 2.86 5.56 5.81 100.0
After:  EO
Fodors-Zagat deepmatcher 2.52 | eo 1.0 | eod 1.14 100.0

-------
Before: 0.7894736842105263
Fodors-Zagat DITTO 3.11 8.02 8.63 99.97278170930866
After:  EOD
Fodors-Zagat DITTO 1.0 | eo 6.38 | eod 7.79 99.93195427327164

Before: 0.7894736842105263
Fodors-Zagat DITTO 3.11 8.02 8.63 99.97278170930866
After:  EO
Fodors-Zagat DITTO 1.0 | eo 6.38 | eod 7.79 99.93195427327164

-------
Before: 0.05263157894736842
Fodors-Zagat EMTransformer 3.13 0.0 0.09 100.0
After:  EOD
Fodors-Zagat EMTransformer 3.67 | eo 0.0 | eod 0.7 100.0

Before: 0.05263157894736842
Fodors-Zagat EMTransformer 3.13 0.0 0.09 100.0
After:  EO
Fodors-Zagat EMTransformer 3.67 | eo 0.0 | eod 0.7 100.0

-------
Before: 0.05263157894736842
Fodors-Zagat HierGAT 2.3 7.02 7.02 100.0
After:  EOD
Fodors-Zagat H

In [5]:
# Dump the object to a pickle file
with open('repair_label_wise_result.pkl', 'wb') as file:
    pickle.dump(RES, file)

# Dump the object to a pickle file
with open('repair_label_wise_result_plot.pkl', 'wb') as file:
    pickle.dump(plots_res, file)

