#Imports and Dataset processing

In [1]:
import pandas as pd
import time
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import metrics
import copy
from sympy import Symbol
from sympy.solvers import solve
pd.options.mode.chained_assignment = None
import csv
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter('ignore')

Import Dataset

In [2]:

url = "https://raw.githubusercontent.com/niceIrene/remedy/main/datasets/bar_pass_new.csv"
data = pd.read_csv(url)
data = data.drop(columns=["gender"])
data

Unnamed: 0,decile1b,decile3,decile1,sex,lsat,ugpa,grad,bar1,fulltime,fam_inc,age,race1,tier
0,3.0,2.0,3.0,2.0,0,0,0,0,1.0,1.0,0,0,3.0
1,7.0,5.0,7.0,1.0,1,0,0,1,1.0,2.0,0,1,1.0
2,2.0,2.0,3.0,1.0,1,0,0,1,1.0,4.0,0,0,2.0
3,5.0,3.0,5.0,1.0,1,0,0,1,1.0,4.0,0,0,5.0
4,5.0,5.0,5.0,1.0,2,0,0,0,1.0,3.0,0,0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4500,9.0,6.0,9.0,2.0,1,1,0,1,1.0,2.0,2,2,1.0
4501,1.0,1.0,1.0,2.0,1,1,0,1,1.0,3.0,2,4,2.0
4502,3.0,1.0,3.0,2.0,1,1,0,1,1.0,2.0,0,2,1.0
4503,2.0,1.0,3.0,2.0,1,1,0,1,1.0,3.0,1,2,1.0


In [3]:
# get training and testing set

# protected attributes
columns_compas = ["sex", "age", "race1", "fam_inc"]

# all columns of dataset
columns_all = ['decile1b', 'decile3', 'decile1', 'sex', 'lsat', 'ugpa', 'grad', 'fulltime', 'fam_inc', 'race1', 'tier', 'age']
compas_y = 'bar1'
def split_train_test(data,test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices],data.iloc[test_indices]

def get_train_test(data, split, list_cols, y_label):
  all_list = copy.deepcopy(list_cols)
  all_list.append(y_label)
  data = pd.DataFrame(data, columns = all_list)
  train_set,test_set = split_train_test(data,split)
  print(len(train_set), "train +", len(test_set), "test")
  train_x = pd.DataFrame(train_set, columns = list_cols)
  train_label = train_set[y_label]
  test_x = pd.DataFrame(test_set, columns = list_cols)
  test_label = test_set[y_label]
  return train_x, test_x, train_label, test_label, train_set, test_set

In [4]:
train_x, test_x, train_label, test_label, train_set, test_set  = get_train_test(data, 0.3, columns_all, compas_y)

3154 train + 1351 test


In [5]:
def fpr_onegroup(true, predict):
    fp = 0
    tn = 0
    for i in range(len(true)):
        if (true[i] == 0 and predict[i] == 1):
            fp += 1
        if(true[i] == 0 and predict[i] == 0):
            tn += 1
    return fp/(fp+tn)


def fnr_onegroup(true, predict):
    fn = 0
    tp = 0
    for i in range(len(true)):
        if (true[i] == 1 and predict[i] == 0):
            fn += 1
        if(true[i] == 1 and predict[i] == 1):
            tp += 1
    return fn/(fn+tp)

Initialize ML models of Choice

In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, make_scorer


filter_count = 30



scoring = make_scorer(accuracy_score)


# #####################
param_gridlg = {
    'penalty': ['l1', 'l2'],
    'C': [0.01, 0.1, 1, 10, 100]
}
logreg = LogisticRegression(random_state=42, max_iter=1000)
gridlg = GridSearchCV(logreg, param_grid=param_gridlg, scoring=scoring, cv=5)
# #####################
param_griddt = {
    'max_depth': [2, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
dt = DecisionTreeClassifier(random_state=42)
griddt = GridSearchCV(dt, param_grid=param_griddt, scoring=scoring, cv=5)

# #####################
param_gridrf = {'criterion': ['gini', 'entropy'], 'max_depth': [10, 20, 30, 40, 50, 100], 'random_state':[17]}
# param_gridrf = {
#     'max_depth': [10, 20, 30],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4],
# }
rf = RandomForestClassifier(random_state=42)
gridrf = GridSearchCV(rf, param_grid=param_gridrf, scoring=scoring, cv=5)




# Divexplorer

In [7]:
!pip install DivExplorer==0.1.1



In [8]:
def fairness_score_computation(d, metrics):
    sum_of_score = 0
    for idx, row in d.iterrows():
      sum_of_score += row['support'] * row[metrics]
    return sum_of_score

In [9]:
def get_test_predict(gridalg, train_x, alg_name):
  gridalg.fit(train_x, train_label)
  print("best {}".format(alg_name), gridalg.best_score_)
  test_predict = gridalg.predict(test_x)
  data_all = pd.concat([train_x,test_x])
  data_predict = gridalg.predict(data_all)
  test_predict = gridalg.predict(test_x)
  data['predicted'] = data_predict
  test_set['predicted'] = test_predict

In [10]:

# # for SVC
clf = SVC(kernel='rbf', C=6.0, gamma = 100, random_state =42)
# clf.fit(train_x, train_label)
# test_predict = clf.predict(test_x)
# test_set['predicted'] = test_predict

In [11]:
with open('Adult_results.csv', 'w', newline='') as file:
  writer = csv.writer(file)
  writer.writerow(["Dataset","Remedy", "Algorithm","d_fpr","d_fnr", "d_acc", "model_acc"])
  # writer.writerow(["Adult", "Original", "DT", dfpr, dfnr, dacc, accuracy])

# For entire dataset


In [12]:
import itertools
def get_unfair_group(list_parse, entire = 1):
  unfair_group = []
  unfair_dict = {}
  names = []
  for col in columns_compas:
    found = False
    for item in list_parse:
      attr_given = item.split("=")[0]
      if col == attr_given:
        unfair_group.append(int(item.split("=")[1]))
        names.append(attr_given)
        unfair_dict[attr_given] = int(item.split("=")[1])
        found = True
  # if use the entire dataset
  if entire:
    return unfair_group, names, columns_compas, unfair_dict
        # break
    # if found == False:
    #   unfair_group.append(-1)
  return unfair_group, names, list(set(columns_compas).symmetric_difference(set(names))), unfair_dict
def candidate_groups(skew_candidates, unfair_dict, ordering, names):
  candidate_combos = []
  candidate_ind = {}
  num = 0
  for i in range(len(skew_candidates)+1):
    temp_candidate = list(itertools.combinations(skew_candidates, i))
    for tc in temp_candidate:
      candidate_ind[num] = list(tc)
      num += 1
  return candidate_ind

def name_val_dict(train_set,names):
  names_values = {}
  for n in names:
    names_values[n] = list(train_set[n].unique())
  return names_values



In [13]:
def get_temp(train_set, names, y_label):
  names2 = copy.deepcopy(names)
  names2.append(y_label)
  temp = train_set[names2]
  temp['cnt'] = 0
  temp2 = temp.groupby(names2)['cnt'].count().reset_index()
  temp2['cnt'].sum()
  return temp2, names
temp2, names = get_temp(train_set, columns_compas, compas_y)
temp2

Unnamed: 0,sex,age,race1,fam_inc,bar1,cnt
0,1.0,0,0,1.0,0,4
1,1.0,0,0,1.0,1,4
2,1.0,0,0,2.0,0,20
3,1.0,0,0,2.0,1,17
4,1.0,0,0,3.0,0,140
...,...,...,...,...,...,...
220,2.0,4,0,1.0,0,1
221,2.0,4,0,2.0,0,1
222,2.0,4,0,3.0,1,1
223,2.0,4,0,4.0,1,1


In [14]:
def get_temp_g(train_set, names, y_label):
  names2 = copy.deepcopy(names)
  names2.append(y_label)
  temp = train_set[names2]
  temp['cnt'] = 0
  temp_g = temp.groupby(names)['cnt'].count().reset_index()
  return temp, temp_g

In [15]:
unfair_group, unfair_names, skew_candidates, unfair_dict = get_unfair_group([])
print(unfair_group, unfair_names, skew_candidates, unfair_dict)
all_names = candidate_groups(skew_candidates, unfair_dict, columns_compas, unfair_names)
names_values = name_val_dict(train_set, names)

all_names_lst = list(all_names.keys())[1:] # CHANGED HERE
all_names_lst.reverse()
all_names_lst

[] [] ['sex', 'age', 'race1', 'fam_inc'] {}


[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

# Helper Functions

## General Helper Functions


In [16]:
def get_one_degree_neighbors(temp2, names, group_lst):
    result = []
    for i in range(len(group_lst)):
        d = copy.copy(temp2)
        for k in range(len(group_lst)):
            if k != i:
                d = d[d[names[k]] == group_lst[k]]
            else:
                d = d[d[names[k]] != group_lst[k]]
        # print(d)
        result.append(d)
    return result

In [17]:
# compute the pos/neg ration of this neighbor
def compute_neighbors(group_lst, result):
    # compute the ratio of positive and negative records
    start2 = time.time()
    pos = 0
    neg = 0
    for r in result:
        total  = r['cnt'].sum()
        r = r[r[compas_y] == 1]
        pos += r['cnt'].sum()
        neg += total - r['cnt'].sum()
    if(neg == 0):
        return (pos, neg, -1)
    end2 = time.time()
    # print("The time to compute the neighbor counts for " +  str(group_lst) +" is " + str(end2-start2))
    return(pos, neg, pos/neg)

In [18]:
def compute_diff_add_and_remove(group_lst, temp2, need_positive_or_negative, label, names):
    d = copy.copy(temp2)
    for i in range(len(group_lst)):
        d = d[d[names[i]] == group_lst[i]]
        # print(len(d))
    total =  d['cnt'].sum()
    # Total here was 0: here, errors when this is commented out
    if total == 0:
      return -1
    d = d[d[label] == 1]
    pos = d['cnt'].sum()
    # print(d, group_lst)
    neg = total - pos
    result = get_one_degree_neighbors(temp2,names, group_lst)
    neighbors = compute_neighbors(group_lst, result)
    if(need_positive_or_negative == 1):
        # need pos
        x = Symbol('x')
        try:
          diff = solve((pos + x)/ (neg - x) - neighbors[2])[0]
        except:
          return -1

    else:
        #need negative
        x = Symbol('x')
        try:
          diff = solve((pos - x)/ (neg + x) - neighbors[2])[0]
        except:
          return -1
    return diff

In [19]:
def compute_diff_add(group_lst, temp2, names, label_y, need_positive_or_negative):
    d = copy.copy(temp2)

    for i in range(len(group_lst)):
        d = d[d[names[i]] == group_lst[i]]
    total =  d['cnt'].sum()
    d = d[d[label_y] == 1]
    pos = d['cnt'].sum()
    neg = total - pos
    result = get_one_degree_neighbors(temp2, names, group_lst)
    neighbors = compute_neighbors(group_lst, result)
    if(need_positive_or_negative == 1):

        x = Symbol('x')
        try:
          diff = solve((pos + x)/ neg -  neighbors[2])[0]
        except:
          return -1

        print(neighbors[2], pos, neg, diff)
    else:
        #need negative
        x = Symbol('x')
        try:
          diff = solve(pos/ (neg + x) -  neighbors[2])[0]
        except:
          return -1
    print(neighbors[2], pos, neg, diff)
    return diff

def compute_diff_remove(group_lst, temp2, names, label_y, need_positive_or_negative):
    d = copy.copy(temp2)
    for i in range(len(group_lst)):
        d = d[d[names[i]] == group_lst[i]]
    total =  d['cnt'].sum()
    d = d[d[label_y] == 1]
    pos = d['cnt'].sum()
    neg = total - pos
    result = get_one_degree_neighbors(temp2, names, group_lst)
    neighbors = compute_neighbors(group_lst, result)
    if(need_positive_or_negative == 1):
        # need pos, remove some neg
        x = Symbol('x')
        try:
          diff = solve( pos/ (neg - x) -  neighbors[2])[0]
        except:
          return -1
        print(neighbors[2], pos, neg, diff)
    else:
        #need negative
        x = Symbol('x')
        try:
          diff = solve((pos -x )/ neg -  neighbors[2])[0]
        except:
          return -1
        print(neighbors[2], pos, neg, diff)
    return diff


In [20]:
from divexplorer.FP_DivergenceExplorer import FP_DivergenceExplorer

def div_results(db, remedy, algo):
  columns_compas.extend([compas_y, "predicted"])

  df = pd.DataFrame(test_set, columns = columns_compas)

  columns_compas.remove(compas_y)
  columns_compas.remove('predicted')
  class_map={'N': 0, 'P': 1}

  min_sup=0.1



  fp_diver=FP_DivergenceExplorer(df,compas_y, "predicted", class_map=class_map)
  FP_fm=fp_diver.getFrequentPatternDivergence(min_support=min_sup, metrics=["d_fpr", "d_fnr", "d_accuracy"])
  from divexplorer.FP_Divergence import FP_Divergence
  fp_divergence_fpr=FP_Divergence(FP_fm, "d_fpr")
  fp_divergence_fnr=FP_Divergence(FP_fm, "d_fnr")
  fp_divergence_acc=FP_Divergence(FP_fm, "d_accuracy")

  INFO_VIZ=["support", "itemsets",  fp_divergence_fpr.metric, fp_divergence_fpr.t_value_col]
  INFO_VIZ2=["support", "itemsets",  fp_divergence_fnr.metric, fp_divergence_fnr.t_value_col]
  INFO_VIZ3=["support", "itemsets",  fp_divergence_acc.metric, fp_divergence_acc.t_value_col]

  K=200
  # summerization
  eps=0.01



  d = fp_divergence_fpr.getDivergence(th_redundancy=eps)[INFO_VIZ]
  d2 = fp_divergence_fnr.getDivergence(th_redundancy=eps)[INFO_VIZ2]
  d3 = fp_divergence_acc.getDivergence(th_redundancy=eps)[INFO_VIZ3]


  d= d[d['d_fpr'] > 0]
  d2= d2[d2['d_fnr'] > 0]
  d3= d3[d3['d_accuracy'] > 0]

  dfpr = fairness_score_computation(d, 'd_fpr')
  dfnr = fairness_score_computation(d2, 'd_fnr')
  dacc = fairness_score_computation(d3, 'd_accuracy')
  print(dfpr)
  print(dfnr)
  print(dacc)
  accuracy = accuracy_score(test_label, test_set['predicted'])
  print("accuracy is " , accuracy)
  writelist = [db,remedy,algo, dfpr, dfnr, dacc, accuracy]
  with open('Adult_results.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(writelist)
  print(writelist)
  return d,d2,d3

## Optimized Helper Function


In [21]:
# helper function for optimized
def compute_neighbors_opt(group_lst,lst_of_counts, pos, neg):
    #start2 = time.time()
    times = len(group_lst)
    pos_cnt = 0
    neg_cnt = 0
    for i in range(times):
        df_groupby = lst_of_counts[i]
        temp_group_lst_pos = copy.copy(group_lst)
        temp_group_lst_neg = copy.copy(group_lst)
        del temp_group_lst_pos[i]
        del temp_group_lst_neg[i]
        # count positive
        temp_group_lst_pos.append(1)
        group_tuple_pos = tuple(temp_group_lst_pos)
        if group_tuple_pos in df_groupby.keys():
            pos_cnt += df_groupby[group_tuple_pos]
        else:
            pos_cnt += 0
        # count negative
        temp_group_lst_neg.append(0)
        group_tuple_neg = tuple(temp_group_lst_neg)
        if group_tuple_neg in df_groupby.keys():
            neg_cnt += df_groupby[group_tuple_neg]
        else:
            neg_cnt += 0
    pos_val = pos_cnt - times* pos
    neg_val = neg_cnt - times* neg
    #end2 = time.time()
    #print("The time to compute the neighbor counts for " +  str(group_lst) +" is " + str(end2-start2))
    if neg_val == -1 or (neg_val == 0 and pos_val == 0):
        return (pos_val, neg_val, -1)
    if pos_val == 0 or neg_val == 0:
        return (pos_val, neg_val, 0)
    # print("here", pos_val, neg_val, pos_val/neg_val)

    return (pos_val, neg_val, pos_val/neg_val)

  and should_run_async(code)


In [22]:
# get the list of neighbors
def get_one_degree_neighbors_opt(group_lst):
    start1 = time.time()
    result = []
    for i in range(len(group_lst)):
        d = copy.copy(group_lst)
        d[i] = 'x'
        result.append(d)
    end1 = time.time()
    return result

  and should_run_async(code)


In [23]:
def determine_problematic_opt(group_lst, names, temp2, lst_of_counts, label, threshold= 0.3):
    #0: ok group, 1: need negative records, 2: need positive records
    d = copy.copy(temp2)
    for i in range(len(group_lst)):
        d = d[d[names[i]] == group_lst[i]]
    total =  d['cnt'].sum()
    d = d[d[label] == 1]
    pos = d['cnt'].sum()
    neg = total - pos
    neighbors = compute_neighbors_opt(group_lst,lst_of_counts, pos, neg)
    if(neighbors[2] == -1):
        # there is no neighbors
        return 0
    if(total > 30):
        # need to be large enough, need to adjust with different datasets.
        if neg == 0:
            if (pos > neighbors[2]):
                return 1
            if(pos <= neighbors[2]):
                return 0
        if (pos/(neg) - neighbors[2] > threshold):
            # too many positive records
            return 1
        if (neighbors[2] - pos/(neg) > threshold):
            return 2
    return 0

  and should_run_async(code)


In [24]:
def compute_problematic_opt(temp2, temp_g, names, label, lst_of_counts):
    need_pos = []
    need_neg = []
    for index, row in temp_g.iterrows():
        group_lst = []
        for n in names:
            group_lst.append(row[n])
        problematic = determine_problematic_opt(group_lst, names, temp2, lst_of_counts,label)
#         #print(problematic)
        if(problematic == 1):
            if group_lst not in need_neg:
                need_neg.append(group_lst)
        if(problematic == 2):
            if group_lst not in need_pos:
                need_pos.append(group_lst)
    return need_pos, need_neg

  and should_run_async(code)


In [25]:
# build the list of X00
def compute_lst_of_counts(temp, names, label):
    # get the list of group-by attributes
    lst_of_counts = []
    for i in range(len(names)):
        grp_names = copy.copy(names)
        del grp_names[i]
        grp_names.append(label)
        temp_df = temp.groupby(grp_names)['cnt'].count()
        lst_of_counts.append(temp_df)
    return lst_of_counts

def get_tuple(group_lst):
    return tuple(group_lst)


def get_temp_g(train_set, names, y_label):
  names2 = copy.deepcopy(names)
  names2.append(y_label)
  temp = train_set[names2]
  temp['cnt'] = 0
  temp_g = temp.groupby(names)['cnt'].count().reset_index()
  return temp, temp_g

  and should_run_async(code)


# Preferential Sampling

In [26]:
from sklearn.naive_bayes import MultinomialNB
def pref_sampling_opt(train_set, cols_given, label, need_pos, need_neg):
    if len(need_pos)+ len(need_neg) > 0:
        temp_train_x = pd.DataFrame(train_set, columns = columns_all)
        temp_train_label = pd.DataFrame(train_set, columns = [label])
        temp_train_label = temp_train_label[label]
        temp_train_label = temp_train_label.astype('int')
        mnb = MultinomialNB()
        mnb = mnb.fit(temp_train_x, temp_train_label)
        probs = mnb.predict_proba(temp_train_x)[:,0]
        train_set["prob"] = abs(probs - 0.5)
        # get the set of
    new_train_set = pd.DataFrame(columns = list(train_set.columns))
    updated_pos = 0
    for i in need_pos:
        # needs to updated more positive records
        # print(i)
        temp_df = copy.deepcopy(train_set)
        for n in range(len(i)):
          temp_df = temp_df[temp_df[cols_given[n]] == i[n]]
        # update the skew and diff
        idx = list(temp_df.index)
        train_set.loc[idx, 'skewed'] = 1
        idx_pos = list(temp_df[(getattr(temp_df, label) == 1)].index)
        if(len(idx_pos) == 0):
          # if there is no positive
          idx_neg = list(temp_df[(getattr(temp_df, label) == 0)].index)
          neg_ranked = train_set.loc[idx_neg].sort_values(by="prob", ascending=True)
          new_train_set = pd.concat([new_train_set, neg_ranked], ignore_index=True)
          continue
        idx_neg = list(temp_df[(getattr(temp_df, label) == 0)].index)
        pos_ranked = train_set.loc[idx_pos].sort_values(by="prob", ascending=True)
        neg_ranked = train_set.loc[idx_neg].sort_values(by="prob", ascending=True)
        diff = compute_diff_add_and_remove(i, temp2,  1, compas_y, names)
        if diff == -1:
          new_train_set = pd.concat([new_train_set, pos_ranked], ignore_index=True)
          new_train_set = pd.concat([new_train_set, neg_ranked], ignore_index=True)
          continue
        train_set.loc[idx, 'diff'] = int(diff)
        cnt = int(train_set.loc[idx_pos[0]]["diff"])
        updated_pos += cnt * 2
        # add more records when there are not enough available records
        new_train_set = pd.concat([new_train_set, pos_ranked], ignore_index=True)
        temp_cnt = cnt
        if len(pos_ranked) >= temp_cnt:
            new_train_set = pd.concat([new_train_set,pos_ranked[0:cnt]], ignore_index=True)
        else:
            while(temp_cnt > 0 ):
                new_train_set = pd.concat([new_train_set,pos_ranked[0:temp_cnt]], ignore_index=True)
            # duplicate the dataframe
                temp_cnt = temp_cnt - len(pos_ranked)
        # duplicate the top cnt records from the pos
        # remove the top cnt records from the neg
        if cnt == 0:
          new_train_set = pd.concat([new_train_set, neg_ranked], ignore_index=True)
          # print("+++++++++++++++++++++++++++++++++++++++++++++")
          # print(i)
          # print(len(pos_ranked)+cnt)
          # print(len(neg_ranked))
        else:
          new_train_set = pd.concat([new_train_set, neg_ranked[cnt-1:-1]], ignore_index=True)
          # print("+++++++++++++++++++++++++++++++++++++++++++++")
          # print(i)
          # print(len(pos_ranked)+cnt)
          # print(len(neg_ranked[cnt-1:-1]))
    print("updated {} positive records".format(str(updated_pos)))
    updated_neg = 0
    # adding more records to the need_neg set
    for i in need_neg:
        # print(i)
        # list of idx belongs to this group
        temp_df = copy.deepcopy(train_set)
        for n in range(len(i)):
          temp_df = temp_df[temp_df[cols_given[n]] == i[n]]
        # update the skew and diff
        idx = list(temp_df.index)
        train_set.loc[idx, 'skewed'] = 1
        idx_pos = list(temp_df[(getattr(temp_df, label) == 1)].index)
        idx_neg = list(temp_df[(getattr(temp_df, label) == 0)].index)
        if(len(idx_neg) == 0):
          pos_ranked = train_set.loc[idx_pos].sort_values(by="prob", ascending=True)
          new_train_set = pd.concat([new_train_set, pos_ranked], ignore_index=True)
          continue
        pos_ranked = train_set.loc[idx_pos].sort_values(by="prob", ascending=True)
        neg_ranked = train_set.loc[idx_neg].sort_values(by="prob", ascending=True)
        diff = compute_diff_add_and_remove(i, temp2, 0, compas_y, names)
        if diff == -1:
          new_train_set = pd.concat([new_train_set, neg_ranked], ignore_index=True)
          new_train_set = pd.concat([new_train_set, pos_ranked], ignore_index=True)
          continue
        train_set.loc[idx, 'diff'] = int(diff)
        cnt = int(train_set.loc[idx_pos[0]]["diff"])
        updated_neg += cnt * 2
        # add more records when there are not enough available records
        new_train_set = pd.concat([new_train_set, neg_ranked], ignore_index=True)
        temp_cnt = cnt
        if len(neg_ranked) >= temp_cnt:
            new_train_set = pd.concat([new_train_set,neg_ranked[0:cnt]], ignore_index=True)
        else:
            while(temp_cnt > 0 ):
                new_train_set = pd.concat([new_train_set,neg_ranked[0:temp_cnt]], ignore_index=True)
            # duplicate the dataframe
                temp_cnt = temp_cnt - len(neg_ranked)
        # duplicate the top cnt records from the pos
        # remove the top cnt records from the neg
        if cnt ==0:
          new_train_set = pd.concat([new_train_set, pos_ranked], ignore_index=True)
          # print("+++++++++++++++++++++++++++++++++++++++++++++")
          # print(i)
          # print(len(neg_ranked)+cnt)
          # print(len(pos_ranked))
        else:
          new_train_set = pd.concat([new_train_set, pos_ranked[cnt-1:-1]], ignore_index=True)
          # print("+++++++++++++++++++++++++++++++++++++++++++++")
          # print(i)
          # print(len(neg_ranked)+cnt)
          # print(len(pos_ranked[cnt-1:-1]))
        #print(len(new_train_set[new_train_set['income'] == 1]), len(new_train_set[new_train_set['income'] == 0]))
        # print(train_set.loc[idx_neg])
    print("updated {} negative records".format(str(updated_neg)))
    # add the other irrelavant items:
    idx_irr = list(train_set[train_set['skewed'] == 0].index)
    irr_df = train_set.loc[idx_irr]
    new_train_set = pd.concat([new_train_set, irr_df], ignore_index=True)
    print("The new dataset contains {} rows.".format(str(len(new_train_set))))
    new_train_set.reset_index()
    return new_train_set



  and should_run_async(code)


In [27]:


def find_top(all_names):
  all_names_lst_top = []
  for all in range(len(all_names)):
    if len(all_names[all]) == 1: # CHANGED HERE
      all_names_lst_top.append(all)
  return all_names_lst_top

  and should_run_async(code)


## Run Algorithm Lattice


In [28]:

#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in all_names_lst:
# for a in [all_names_lst[0]]: # leaf
# for a in [all_names_lst[-1:x]]: # top
  print("/////////////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  # start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  # end = time.time()
  # excute_time = end - start
  # print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started pref sampling")
  new_train_data = pref_sampling_opt(new_train_data, names, compas_y, need_pos, need_neg)
  # print(new_train_data)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


/////////////
15
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 122 negative records
The new dataset contains 3154 rows.
0    1608
1    1546
Name: bar1, dtype: int64
/////////////
14
The sets of need pos and neg are
[[1.0, 0.0, 2.0]]
[[0.0, 1.0, 2.0], [0.0, 1.0, 3.0], [0.0, 1.0, 4.0], [0.0, 2.0, 2.0], [0.0, 2.0, 3.0], [0.0, 2.0, 4.0], [0.0, 3.0, 3.0], [1.0, 2.0, 3.0], [2.0, 0.0, 4.0]]
started pref sampling


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 8 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 168 negative records
The new dataset contains 3154 rows.
0    1688
1    1466
Name: bar1, dtype: int64
/////////////
13
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0], [1.0, 2.0, 2.0], [1.0, 2.0, 4.0], [2.0, 1.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 3.0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 86 negative records
The new dataset contains 3154 rows.
0    1731
1    1423
Name: bar1, dtype: int64
/////////////
12
The sets of need pos and neg are
[]
[[1.0, 1.0, 2.0], [1.0, 1.0, 4.0], [1.0, 2.0, 3.0], [2.0, 1.0, 3.0]]
started pref sampling


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 74 negative records
The new dataset contains 3154 rows.
0    1768
1    1386
Name: bar1, dtype: int64
/////////////
11
The sets of need pos and neg are
[]
[[1.0, 0.0, 1.0], [1.0, 0.0, 3.0], [1.0, 1.0, 2.0], [2.0, 0.0, 3.0], [2.0, 1.0, 2.0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 88 negative records
The new dataset contains 3154 rows.
0    1812
1    1342
Name: bar1, dtype: int64
/////////////
10
The sets of need pos and neg are
[]
[[1.0, 3.0], [2.0, 1.0]]
started pref sampling


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 32 negative records
The new dataset contains 3154 rows.
0    1828
1    1326
Name: bar1, dtype: int64
/////////////
9
The sets of need pos and neg are
[]
[[2.0, 2.0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 6 negative records
The new dataset contains 3154 rows.
0    1831
1    1323
Name: bar1, dtype: int64
/////////////
8
The sets of need pos and neg are
[]
[[0, 4], [1, 1], [3, 0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 62 negative records
The new dataset contains 3154 rows.
0    1862
1    1292
Name: bar1, dtype: int64
/////////////
7
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
0    1862
1    1292
Name: bar1, dtype: int64
/////////////
6
The sets of need pos and neg are
[]
[[1.0, 4.0], [2.0, 4.0]]
started pref sampling


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 0 positive records
updated 14 negative records
The new dataset contains 3154 rows.


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

0    1869
1    1285
Name: bar1, dtype: int64
/////////////
5
The sets of need pos and neg are
[]
[[2.0, 2.0]]
started pref sampling
updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 18 negative records
The new dataset contains 3154 rows.
0    1878
1    1276
Name: bar1, dtype: int64
/////////////
4
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
0    1878
1    1276
Name: bar1, dtype: int64
/////////////
3
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
0    1878
1    1276
Name: bar1, dtype: int64
/////////////
2
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
0    1878
1    1276
Name: bar1, dtype: int64
/////////////
1
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
0    1878
1    1276
Name: bar1, dtype: int64


### Results Lattice

In [29]:
# test_set['predicted'] = test_predict
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label,test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Lattice","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label,test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Lattice","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label,test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Lattice","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
# print("best", svc.best_score_)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Preferential Sampling-Lattice","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.7650562221719116
fpr and fnr
0.15946348733233978
0.4147058823529412
accuracy
0.7120651369356032


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.08630204433496184
0.49271300890496733
0.15372169969225308
accuracy is  0.7120651369356032
['Adult', 'Preferential Sampling-Lattice', 'DT', 0.08630204433496184, 0.49271300890496733, 0.15372169969225308, 0.7120651369356032]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7698146051870298
fpr and fnr
0.16244411326378538
0.3588235294117647
accuracy
0.7387120651369357


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.06626489274913255
0.25843773104682993
0.10107982627666741
accuracy is  0.7387120651369357
['Adult', 'Preferential Sampling-Lattice', 'RF', 0.06626489274913255, 0.25843773104682993, 0.10107982627666741, 0.7387120651369357]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7546036777098585
fpr and fnr
0.18479880774962743
0.3323529411764706
accuracy
0.7409326424870466


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.06908981675733784
0.23581539918249408
0.059549057884583805
accuracy is  0.7409326424870466
['Adult', 'Preferential Sampling-Lattice', 'LG', 0.06908981675733784, 0.23581539918249408, 0.059549057884583805, 0.7409326424870466]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.019374068554396422
0.9235294117647059
accuracy
0.5255366395262768
0.01201847502705398
0.08240379460965347
0.4056627187909715
accuracy is  0.5255366395262768
['Adult', 'Preferential Sampling-Lattice', 'SVM', 0.01201847502705398, 0.08240379460965347, 0.4056627187909715, 0.5255366395262768]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Leaf

In [30]:

#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in [all_names_lst[0]]:
  print("/////////////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)

  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started pref sampling")
  new_train_data = pref_sampling_opt(new_train_data, names, compas_y, need_pos, need_neg)

  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


/////////////
15
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
started pref sampling


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


updated 0 positive records


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

updated 122 negative records
The new dataset contains 3154 rows.
0    1608
1    1546
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

### Results Leaf

In [31]:
# test_set['predicted'] = test_predict
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Leaf","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Leaf","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Leaf","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
# print("best", svc.best_score_)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Preferential Sampling-Leaf","SVM")

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea


dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7723502628732423
fpr and fnr
0.21162444113263784
0.275
accuracy
0.7564766839378239


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11518271265964142
0.4979943568094654
0.04318812010293655
accuracy is  0.7564766839378239
['Adult', 'Preferential Sampling-Leaf', 'DT', 0.11518271265964142, 0.4979943568094654, 0.04318812010293655, 0.7564766839378239]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7764752345734913
fpr and fnr
0.22056631892697467
0.21911764705882353
accuracy
0.7801628423390081


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.12398707665121793
0.31174202051656597
0.03909925536968267
accuracy is  0.7801628423390081
['Adult', 'Preferential Sampling-Leaf', 'RF', 0.12398707665121793, 0.31174202051656597, 0.03909925536968267, 0.7801628423390081]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7733046562523583
fpr and fnr
0.2414307004470939
0.20588235294117646
accuracy
0.7764618800888231


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11733958615883232
0.4181937689699128
0.005174224647038881
accuracy is  0.7764618800888231
['Adult', 'Preferential Sampling-Leaf', 'LG', 0.11733958615883232, 0.4181937689699128, 0.005174224647038881, 0.7764618800888231]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.019374068554396422
0.9220588235294118
accuracy
0.5262768319763138
0.01201847502705398
0.08433522908535661
0.4071052996354924
accuracy is  0.5262768319763138
['Adult', 'Preferential Sampling-Leaf', 'SVM', 0.01201847502705398, 0.08433522908535661, 0.4071052996354924, 0.5262768319763138]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Top

In [32]:

#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)
all_names_lst_top = find_top(all_names)
#iterate over all the names to get the temp2 df for each name
for a in all_names_lst_top:
  print("/////////////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start

  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started pref sampling")
  new_train_data = pref_sampling_opt(new_train_data, names, compas_y, need_pos, need_neg)

  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


/////////////
1
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
1    1607
0    1547
Name: bar1, dtype: int64
/////////////
2
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
1    1607
0    1547
Name: bar1, dtype: int64
/////////////
3
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
1    1607
0    1547
Name: bar1, dtype: int64

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])



/////////////
4
The sets of need pos and neg are
[]
[]
started pref sampling
updated 0 positive records
updated 0 negative records
The new dataset contains 3154 rows.
1    1607
0    1547
Name: bar1, dtype: int64


### Results Top

In [33]:
# test_set['predicted'] = test_predict
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Top","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Top","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Preferential Sampling-Top","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Preferential Sampling-Top","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.7739375644605438
fpr and fnr
0.21758569299552907
0.2676470588235294
accuracy
0.7572168763878608


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11788290396195722
0.5754751835980634
0.050142422670160745
accuracy is  0.7572168763878608
['Adult', 'Preferential Sampling-Top', 'DT', 0.11788290396195722, 0.5754751835980634, 0.050142422670160745, 0.7572168763878608]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7853530551153372
fpr and fnr
0.23248882265275708
0.18676470588235294
accuracy
0.7905255366395263


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.1604661128216605
0.374160894824387
0.0453045993290602
accuracy is  0.7905255366395263
['Adult', 'Preferential Sampling-Top', 'RF', 0.1604661128216605, 0.374160894824387, 0.0453045993290602, 0.7905255366395263]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7824969184715618
fpr and fnr
0.2503725782414307
0.18676470588235294
accuracy
0.7816432272390822


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.10978304087999356
0.44524589088943134
0.0077870875591236136
accuracy is  0.7816432272390822
['Adult', 'Preferential Sampling-Top', 'LG', 0.10978304087999356, 0.44524589088943134, 0.0077870875591236136, 0.7816432272390822]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
accuracy
0.5240562546262029
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Preferential Sampling-Top', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

# Duplication

In [34]:
def round_int(x):
    if x in [float("-inf"),float("inf")]: return 0
    return int(round(x))


def make_duplicate(d, group_lst, diff, label_y, names, need_positive_or_negative):
    selected = copy.deepcopy(d)
    print("names ", names, group_lst)
    for i in range(len(group_lst)):
        att_name = names[i]
        selected = selected[(selected[att_name] == group_lst[i])]
    selected = selected[(selected[label_y] == need_positive_or_negative)]

    if len(selected) == 0:
        return pd.DataFrame()
    while(len(selected) < diff):
        # duplicate the dataframe
        select_copy = selected.copy(deep=True)
        selected = pd.concat([selected, select_copy])

        # the number needed is more than the not needed numbers.

    generated = selected.sample(n = diff, replace = False, axis = 0)

    return generated


def naive_duplicate(d, temp2, names, need_pos, need_neg, label_y):
    # add more records for all groups
    # The smote algorithm to boost the coverage
    for r in need_pos:
    # add more positive records
        # determine how many points to add
        print("pos_vals", r)
        diff = compute_diff_add(r, temp2, names, label_y, 1)
        if diff == -1:
          continue
        diff = round_int(diff)
        # add more records
        print("Adding " + str(diff) +" positive records")
        samples_to_add = make_duplicate(d, r, diff, label_y, names, need_positive_or_negative = 1)
        d = pd.concat([d, samples_to_add], ignore_index=True)
    for k in need_neg:
        print("neg_vals", k)
        diff = compute_diff_add(k, temp2, names, label_y, need_positive_or_negative = 0)
        if diff == -1:
          continue
        diff = round_int(diff)
        print("Adding " + str(diff) +" negative records")
        samples_to_add = make_duplicate(d, k, diff, label_y, names, need_positive_or_negative = 0)
        d = pd.concat([d, samples_to_add], ignore_index=True)
    return d

  and should_run_async(code)


## Run Algorithm Lattice

In [35]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in all_names_lst:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started duplication")
  new_train_data = naive_duplicate(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print("label y ", new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
15
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
started duplication
neg_vals [1.0, 0.0, 2.0, 3.0]
1.5314285714285714 70 16 29.7089552238806
Adding 30 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [1.0, 0.0, 2.0, 3.0]
neg_vals [1.0, 2.0, 0.0, 4.0]
0.7933579335793358 18 13 9.68837209302326
Adding 10 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [1.0, 2.0, 0.0, 4.0]
neg_vals [2.0, 0.0, 1.0, 3.0]
1.0822510822510822 21 12 7.40400000000000
Adding 7 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 0.0, 1.0, 3.0]
neg_vals [2.0, 0.0, 2.0, 3.0]
1.2550607287449393 36 5 23.6838709677419
Adding 24 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 0.0, 2.0, 3.0]
neg_vals [2.0, 1.0, 0.0, 4.0]
0.6695095948827292 64 52 43.5923566878981
Adding 44 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 1.0,

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[[0.0, 1.0, 2.0], [0.0, 1.0, 3.0], [0.0, 1.0, 4.0], [0.0, 2.0, 2.0], [0.0, 2.0, 4.0], [0.0, 3.0, 3.0], [1.0, 2.0, 3.0]]
started duplication
neg_vals [0.0, 1.0, 2.0]
1.6283185840707965 28 8 9.19565217391301
Adding 9 negative records
names  ['age', 'race1', 'fam_inc'] [0.0, 1.0, 2.0]
neg_vals [0.0, 1.0, 3.0]
1.0503282275711159 38 23 13.1791666666665
Adding 13 negative records
names  ['age', 'race1', 'fam_inc'] [0.0, 1.0, 3.0]
neg_vals [0.0, 1.0, 4.0]
0.7548387096774194 27 14 21.7692307692308
Adding 22 negative records
names  ['age', 'race1', 'fam_inc'] [0.0, 1.0, 4.0]
neg_vals [0.0, 2.0, 2.0]
1.7025316455696202 40 11 12.4944237918216
Adding 12 negative records
names  ['age', 'race1', 'fam_inc'] [0.0, 2.0, 2.0]
neg_vals [0.0, 2.0, 4.0]
0.8140556368960469 39 11 36.9082733812950
Adding 37 negative records
names  ['age', 'race1', 'fam_inc'] [0.0, 2.0, 4.0]
neg_vals [0.0, 3.0, 3.0]
0.9345991561181435 39 13 28.7291196388262
Adding 29 negative records
names  

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[[1.0, 1.0, 3.0], [1.0, 1.0, 4.0], [1.0, 2.0, 2.0], [1.0, 2.0, 4.0], [2.0, 1.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 3.0], [2.0, 3.0, 3.0]]
started duplication
neg_vals [1.0, 1.0, 3.0]
1.0635359116022098 26 6 18.4467532467532
Adding 18 negative records
names  ['sex', 'race1', 'fam_inc'] [1.0, 1.0, 3.0]
neg_vals [1.0, 1.0, 4.0]
0.8614958448753463 22 11 14.5369774919614
Adding 15 negative records
names  ['sex', 'race1', 'fam_inc'] [1.0, 1.0, 4.0]
neg_vals [1.0, 2.0, 2.0]
1.5868263473053892 38 16 7.94716981132074
Adding 8 negative records
names  ['sex', 'race1', 'fam_inc'] [1.0, 2.0, 2.0]
neg_vals [1.0, 2.0, 4.0]
0.9735576923076923 42 28 15.1407407407407
Adding 15 negative records
names  ['sex', 'race1', 'fam_inc'] [1.0, 2.0, 4.0]
neg_vals [2.0, 1.0, 2.0]
1.0066666666666666 28 13 14.8145695364238
Adding 15 negative records
names  ['sex', 'race1', 'fam_inc'] [2.0, 1.0, 2.0]
neg_vals [2.0, 2.0, 2.0]
1.355263157894737 32 11 12.6116504854369
Adding 13 negative

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0], [1.0, 1.0, 2.0], [1.0, 1.0, 4.0], [1.0, 2.0, 3.0], [2.0, 1.0, 3.0]]
started duplication
neg_vals [1.0, 0.0, 2.0]
0.8310291858678955 64 53 24.0129390018484
Adding 24 negative records
names  ['sex', 'age', 'fam_inc'] [1.0, 0.0, 2.0]
neg_vals [1.0, 1.0, 2.0]
1.2008733624454149 31 12 13.8145454545455
Adding 14 negative records
names  ['sex', 'age', 'fam_inc'] [1.0, 1.0, 2.0]
neg_vals [1.0, 1.0, 4.0]
0.8062015503875969 62 42 34.9038461538462
Adding 35 negative records
names  ['sex', 'age', 'fam_inc'] [1.0, 1.0, 4.0]
neg_vals [1.0, 2.0, 3.0]
1.0026455026455026 25 12 12.9340369393140
Adding 13 negative records
names  ['sex', 'age', 'fam_inc'] [1.0, 2.0, 3.0]
neg_vals [2.0, 1.0, 3.0]
0.7923211169284468 77 63 34.1828193832599
Adding 34 negative records
names  ['sex', 'age', 'fam_inc'] [2.0, 1.0, 3.0]
label y  0    2068
1    1607
Name: bar1, dtype: int64
?????/////
11


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[[1.0, 0.0, 3.0], [1.0, 1.0, 2.0], [2.0, 1.0, 1.0], [2.0, 1.0, 2.0]]
started duplication
neg_vals [1.0, 0.0, 3.0]
0.7925696594427245 42 38 14.9921875000000
Adding 15 negative records
names  ['sex', 'age', 'race1'] [1.0, 0.0, 3.0]
neg_vals [1.0, 1.0, 2.0]
1.0982456140350878 58 39 13.8115015974440
Adding 14 negative records
names  ['sex', 'age', 'race1'] [1.0, 1.0, 2.0]
neg_vals [2.0, 1.0, 1.0]
0.8274760383386581 24 14 15.0038610038610
Adding 15 negative records
names  ['sex', 'age', 'race1'] [2.0, 1.0, 1.0]
neg_vals [2.0, 1.0, 2.0]
0.843915343915344 35 7 34.4733542319749
Adding 34 negative records
names  ['sex', 'age', 'race1'] [2.0, 1.0, 2.0]
label y  0    2146
1    1607
Name: bar1, dtype: int64
?????/////
10


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[[2.0, 1.0]]
started duplication
neg_vals [2.0, 1.0]
0.9619883040935673 30 8 23.1854103343465
Adding 23 negative records
names  ['race1', 'fam_inc'] [2.0, 1.0]
label y  0    2169
1    1607
Name: bar1, dtype: int64
?????/////
9
The sets of need pos and neg are
[]
[[1.0, 1.0], [2.0, 2.0], [2.0, 4.0]]
started duplication
neg_vals [1.0, 1.0]
0.7835820895522388 21 13 13.8000000000000
Adding 14 negative records
names  ['age', 'fam_inc'] [1.0, 1.0]
neg_vals [2.0, 2.0]
0.8410404624277457 25 17 12.7250859106529
Adding 13 negative records
names  ['age', 'fam_inc'] [2.0, 2.0]
neg_vals [2.0, 4.0]
0.6608247422680412 36 31 23.4773790951638
Adding 23 negative records
names  ['age', 'fam_inc'] [2.0, 4.0]
label y  0    2219
1    1607
Name: bar1, dtype: int64
?????/////
8
The sets of need pos and neg are
[]
[[0, 4], [1, 1], [1, 3], [2, 2], [3, 0]]
started duplication
neg_vals [0, 4]
0.6888595912986157 26 18 19.7435406698564
Adding 20 negative records
names  ['age', 'r

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The sets of need pos and neg are
[]
[]
started duplication
label y  0    2313
1    1607
Name: bar1, dtype: int64
?????/////
6
The sets of need pos and neg are
[]
[[1.0, 4.0]]
started duplication
neg_vals [1.0, 4.0]
0.7674190382728164 25 18 14.5767263427110
Adding 15 negative records
names  ['sex', 'race1'] [1.0, 4.0]
label y  0    2328
1    1607
Name: bar1, dtype: int64
?????/////
5
The sets of need pos and neg are
[]
[]
started duplication
label y  0    2328
1    1607
Name: bar1, dtype: int64
?????/////
4
The sets of need pos and neg are
[]
[]
started duplication
label y  0    2328
1    1607
Name: bar1, dtype: int64
?????/////
3
The sets of need pos and neg are
[]
[]
started duplication
label y  0    2328
1    1607
Name: bar1, dtype: int64
?????/////
2
The sets of need pos and neg are
[]
[]
started duplication
label y  0    2328
1    1607
Name: bar1, dtype: int64
?????/////
1
The sets of need pos and neg are
[]
[]
started duplication
label y  0    2328
1    1607
Name: bar1, dtype: int

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Lattice


In [36]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
d,d1,d2 = div_results("Adult","Duplication-Lattice","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")

print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Lattice","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Lattice","L")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Duplication-Lattice","SVM")

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])



dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7903430749682337
fpr and fnr
0.29508196721311475
0.31470588235294117
accuracy
0.695040710584752


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.10681848677132257
0.2673404121980464
0.03625518504537306
accuracy is  0.695040710584752
['Adult', 'Duplication-Lattice', 'DT', 0.10681848677132257, 0.2673404121980464, 0.03625518504537306, 0.695040710584752]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.8340533672172808
fpr and fnr
0.22205663189269748
0.25882352941176473


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.08174037557977756
0.21835835815097734
0.020336938233104163
accuracy is  0.7594374537379719
['Adult', 'Duplication-Lattice', 'RF', 0.08174037557977756, 0.21835835815097734, 0.020336938233104163, 0.7594374537379719]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7306226175349428
fpr and fnr
0.18628912071535023
0.3426470588235294


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.05756428629141457
0.15680507238632643
0.04601411022676419
accuracy is  0.7350111028867505
['Adult', 'Duplication-Lattice', 'L', 0.05756428629141457, 0.15680507238632643, 0.04601411022676419, 0.7350111028867505]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Duplication-Lattice', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

In [37]:
def fairness_score_computation(d, metrics):
    sum_of_score = 0
    for idx, row in d.iterrows():
      sum_of_score += row['support'] * row[metrics]
    return sum_of_score

print(fairness_score_computation(d, 'd_fpr'))

0.10681848677132257


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


## Run Algorithm Leaf

In [38]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in [all_names_lst[0]]:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)

  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started duplication")
  new_train_data = naive_duplicate(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print("label y", new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
15
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
started duplication
neg_vals [1.0, 0.0, 2.0, 3.0]
1.5314285714285714 70 16 29.7089552238806
Adding 30 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [1.0, 0.0, 2.0, 3.0]
neg_vals [1.0, 2.0, 0.0, 4.0]
0.7933579335793358 18 13 9.68837209302326
Adding 10 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [1.0, 2.0, 0.0, 4.0]
neg_vals [2.0, 0.0, 1.0, 3.0]
1.0822510822510822 21 12 7.40400000000000
Adding 7 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 0.0, 1.0, 3.0]
neg_vals [2.0, 0.0, 2.0, 3.0]
1.2550607287449393 36 5 23.6838709677419
Adding 24 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 0.0, 2.0, 3.0]
neg_vals [2.0, 1.0, 0.0, 4.0]
0.6695095948827292 64 52 43.5923566878981
Adding 44 negative records
names  ['sex', 'age', 'race1', 'fam_inc'] [2.0, 1.0,

### Results Leaf

In [39]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
d,d1,d2 = div_results("Adult","Duplication-Leaf","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Leaf","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Leaf","L")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Duplication-Leaf","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.7666578765966698
fpr and fnr
0.21758569299552907
0.23823529411764705
accuracy
0.772020725388601


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.10753887609110939
0.3686186961088254
0.0220677065156112
accuracy is  0.772020725388601
['Adult', 'Duplication-Leaf', 'DT', 0.10753887609110939, 0.3686186961088254, 0.0220677065156112, 0.772020725388601]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7855329326911955
fpr and fnr
0.23845007451564829
0.2338235294117647


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.11336407489071816
0.24606242039548326
0.02111767416301006
accuracy is  0.7638786084381939
['Adult', 'Duplication-Leaf', 'RF', 0.11336407489071816, 0.24606242039548326, 0.02111767416301006, 0.7638786084381939]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7718319893778943
fpr and fnr
0.23695976154992549
0.21470588235294116


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.11324460488901579
0.44545113534284503
0.004838371225963603
accuracy is  0.7742413027387121
['Adult', 'Duplication-Leaf', 'L', 0.11324460488901579, 0.44545113534284503, 0.004838371225963603, 0.7742413027387121]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Duplication-Leaf', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Top

In [40]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
all_names_lst_top = find_top(all_names)
for a in all_names_lst_top:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)

  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  print("started duplication")
  new_train_data = naive_duplicate(new_train_data, temp2, names, need_pos, need_neg, compas_y)

  print("Label y", new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
1
The sets of need pos and neg are
[]
[]
started duplication
Label y 1    1607
0    1547
Name: bar1, dtype: int64
?????/////
2
The sets of need pos and neg are
[]
[]
started duplication
Label y 1    1607
0    1547
Name: bar1, dtype: int64
?????/////
3
The sets of need pos and neg are
[]
[]
started duplication
Label y 1    1607
0    1547
Name: bar1, dtype: int64
?????/////
4
The sets of need pos and neg are
[]
[]
started duplication
Label y 1    1607
0    1547
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Top

In [41]:
test_set['predicted'] = test_predict
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
d,d1,d2 = div_results("Adult","Duplication-Top","DT")


print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
# print(fpr_onegroup(new_labels.tolist(), predict))
# print(fnr_onegroup(new_labels.tolist(), predict))
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Top","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Duplication-Top","L")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Duplication-Top","SVM")

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])



dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7739375644605438
fpr and fnr
0.21758569299552907
0.2676470588235294
accuracy
0.7572168763878608


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11788290396195722
0.5754751835980634
0.050142422670160745
accuracy is  0.7572168763878608
['Adult', 'Duplication-Top', 'DT', 0.11788290396195722, 0.5754751835980634, 0.050142422670160745, 0.7572168763878608]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7853530551153372
fpr and fnr
0.23248882265275708
0.18676470588235294


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.1604661128216605
0.374160894824387
0.0453045993290602
accuracy is  0.7905255366395263
['Adult', 'Duplication-Top', 'RF', 0.1604661128216605, 0.374160894824387, 0.0453045993290602, 0.7905255366395263]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7824969184715618
fpr and fnr
0.2503725782414307
0.18676470588235294


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.10978304087999356
0.44524589088943134
0.0077870875591236136
accuracy is  0.7816432272390822
['Adult', 'Duplication-Top', 'L', 0.10978304087999356, 0.44524589088943134, 0.0077870875591236136, 0.7816432272390822]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Duplication-Top', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

#Down-sampling

In [42]:
def round_int(x):
    if x in [float("-inf"),float("inf")]: return 0
    return int(round(x))


def make_remove(d, group_lst, diff, names, label_y, need_positive_or_negative):
    temp = copy.deepcopy(d)
    for i in range(len(group_lst)):
        att_name = names[i]
        temp = temp[(temp[att_name] == group_lst[i])]
    temp = temp[(temp[label_y] == need_positive_or_negative)]
    # randomly generated diff samples
    # the number needed is more than the not needed numbers.
    if(diff>len(temp)):
        diff = len(temp)
    generated = temp.sample(n = diff, replace = False, axis = 0)
    return generated.index


def naive_downsampling(d, temp2, names, need_pos, need_neg, label_y):
    # add more records for all groups
    # The smote algorithm to boost the coverage
    for r in need_pos:
        print("removing more negative")
    # add more positive records
        # determine how many points to add
        print(r)
        diff = compute_diff_remove(r, temp2, names, label_y, need_positive_or_negative = 1)
        if diff == -1:
          continue
        diff = round_int(diff)
        # add more records
        print("Removed " + str(diff) +" negative records")
        samples_to_remove = make_remove(d, r, diff, names, label_y, need_positive_or_negative = 0)
        d.drop(index  = samples_to_remove, inplace = True)
    for k in need_neg:
        print(k)
        diff = compute_diff_remove(k, temp2, names, label_y, need_positive_or_negative = 0)
        if diff == -1:
          continue
        diff = round_int(diff)
        print("Removed " + str(diff) +" positive records")
        samples_to_remove = make_remove(d, k, diff, names, label_y, need_positive_or_negative = 1)
        d.drop(index  = samples_to_remove, inplace = True)
    return d

  and should_run_async(code)


## Run Algorithm Lattice

In [43]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in all_names_lst:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  new_train_data = naive_downsampling(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
15
The time to compute unfair group is 0.1430964469909668
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
[1.0, 0.0, 2.0, 3.0]
1.5314285714285714 70 16 45.4971428571429
Removed 45 positive records
[1.0, 2.0, 0.0, 4.0]
0.7933579335793358 18 13 7.68634686346864
Removed 8 positive records
[2.0, 0.0, 1.0, 3.0]
1.0822510822510822 21 12 8.01298701298702
Removed 8 positive records
[2.0, 0.0, 2.0, 3.0]
1.2550607287449393 36 5 29.7246963562753
Removed 30 positive records
[2.0, 1.0, 0.0, 4.0]
0.6695095948827292 64 52 29.1855010660981
Removed 29 positive records
[2.0, 2.0, 0.0, 3.0]
0.7692307692307693 23 12 13.7692307692308
Removed 14 positive records
0    1547
1    1473
Name: bar1, dtype: int64
?????/////
14
The time to compute unfair group is 0.0948188304901123
The sets of need pos and neg are
[[1.0, 0.0, 2.0]]
[[0.0, 1.0, 2.0], [0.0, 1.0, 3.0], [0.0, 1.0, 4.0], [0

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


1.6603773584905661 28 8 14.7169811320755
Removed 15 positive records
[0.0, 1.0, 3.0]
1.0049627791563276 30 16 13.9205955334988
Removed 14 positive records
[0.0, 1.0, 4.0]
0.7504078303425775 27 14 16.4942903752039
Removed 16 positive records
[0.0, 2.0, 2.0]
1.8653846153846154 40 11 19.4807692307692
Removed 19 positive records
[0.0, 2.0, 3.0]
1.1571072319201996 31 21 6.70074812967582
Removed 7 positive records
[0.0, 2.0, 4.0]
0.7647058823529411 39 11 30.5882352941176
Removed 31 positive records
[0.0, 3.0, 3.0]
0.8716707021791767 39 13 27.6682808716707
Removed 28 positive records
[1.0, 2.0, 3.0]
1.4785714285714286 41 3 36.5642857142856
Removed 37 positive records
[2.0, 0.0, 4.0]
0.6597014925373135 21 21 7.14626865671643
Removed 7 positive records
0    1537
1    1299
Name: bar1, dtype: int64
?????/////
13


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.09312558174133301
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0], [1.0, 2.0, 2.0], [2.0, 2.0, 2.0]]
[1.0, 0.0, 2.0]
0.8177083333333334 40 32 13.8333333333333
Removed 14 positive records
[1.0, 2.0, 2.0]
2.0384615384615383 27 8 10.6923076923077
Removed 11 positive records
[2.0, 2.0, 2.0]
1.3617021276595744 24 7 14.4680851063830
Removed 14 positive records
0    1537
1    1260
Name: bar1, dtype: int64
?????/////
12


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.19759225845336914
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0], [1.0, 1.0, 2.0], [1.0, 1.0, 4.0], [1.0, 2.0, 3.0]]
[1.0, 0.0, 2.0]
0.8007968127490039 36 32 10.3745019920319
Removed 10 positive records
[1.0, 1.0, 2.0]
1.3048780487804879 24 8 13.5609756097561
Removed 14 positive records
[1.0, 1.0, 4.0]
0.8556149732620321 62 41 26.9197860962567
Removed 27 positive records
[1.0, 2.0, 3.0]
0.9767441860465116 25 12 13.2790697674418
Removed 13 positive records
0    1537
1    1196
Name: bar1, dtype: int64
?????/////
11


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.1851823329925537
The sets of need pos and neg are
[]
[[1.0, 0.0, 1.0], [1.0, 0.0, 2.0], [1.0, 0.0, 3.0], [1.0, 1.0, 2.0], [2.0, 0.0, 2.0], [2.0, 0.0, 3.0], [2.0, 1.0, 1.0]]
[1.0, 0.0, 1.0]
0.7653061224489796 25 15 13.5204081632653
Removed 14 positive records
[1.0, 0.0, 2.0]
0.8343949044585988 35 30 9.96815286624204
Removed 10 positive records
[1.0, 0.0, 3.0]
0.7828810020876826 25 21 8.55949895615867
Removed 9 positive records
[1.0, 1.0, 2.0]
1.1884057971014492 25 7 16.6811594202898
Removed 17 positive records
[2.0, 0.0, 2.0]
0.6526458616010855 22 17 10.9050203527815
Removed 11 positive records
[2.0, 0.0, 3.0]
0.6312154696132597 25 24 9.85082872928177
Removed 10 positive records
[2.0, 1.0, 1.0]
0.9770114942528736 24 7 17.1609195402299
Removed 17 positive records
0    1537
1    1108
Name: bar1, dtype: int64
?????/////
10


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.05745697021484375
The sets of need pos and neg are
[]
[[2.0, 4.0]]
[2.0, 4.0]
0.6459709379128138 20 14 10.9564068692206
Removed 11 positive records
0    1537
1    1097
Name: bar1, dtype: int64
?????/////
9
The time to compute unfair group is 0.07400798797607422
The sets of need pos and neg are
[]
[[0.0, 1.0]]
[0.0, 1.0]
0.6422558922558923 20 20 7.15488215488216
Removed 7 positive records
0    1537
1    1090
Name: bar1, dtype: int64
?????/////
8
The time to compute unfair group is 0.03268837928771973
The sets of need pos and neg are
[]
[[0, 4], [3, 0]]
[0, 4]
0.6293469041560644 24 18 12.6717557251908
Removed 13 positive records
[3, 0]
0.6426966292134831 21 11 13.9303370786517
Removed 14 positive records
0    1537
1    1063
Name: bar1, dtype: int64
?????/////
7


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.04627370834350586
The sets of need pos and neg are
[]
[[1.0, 1.0]]
[1.0, 1.0]
0.731629392971246 24 18 10.8306709265176
Removed 11 positive records
0    1537
1    1052
Name: bar1, dtype: int64
?????/////
6
The time to compute unfair group is 0.03650212287902832
The sets of need pos and neg are
[]
[[1.0, 3.0], [2.0, 1.0], [2.0, 2.0]]
[1.0, 3.0]
0.719435736677116 23 22 7.17241379310345
Removed 7 positive records
[2.0, 1.0]
0.6480446927374302 36 35 13.3184357541899
Removed 13 positive records
[2.0, 2.0]
0.652267818574514 28 23 12.9978401727862
Removed 13 positive records
0    1537
1    1019
Name: bar1, dtype: int64
?????/////
5


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.020398855209350586
The sets of need pos and neg are
[]
[[2.0, 2.0]]
[2.0, 2.0]
0.6281628162816282 32 34 10.6424642464246
Removed 11 positive records
0    1537
1    1008
Name: bar1, dtype: int64
?????/////
4
The time to compute unfair group is 0.013121843338012695
The sets of need pos and neg are
[]
[]
0    1537
1    1008
Name: bar1, dtype: int64
?????/////
3
The time to compute unfair group is 0.012837648391723633
The sets of need pos and neg are
[]
[]
0    1537
1    1008
Name: bar1, dtype: int64
?????/////
2
The time to compute unfair group is 0.00939321517944336
The sets of need pos and neg are
[]
[]
0    1537
1    1008
Name: bar1, dtype: int64
?????/////
1
The time to compute unfair group is 0.010644912719726562
The sets of need pos and neg are
[]
[]
0    1537
1    1008
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Lattice

In [44]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Lattice","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Lattice","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Lattice","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Down Sampling-Lattice","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.7559921414538311
fpr and fnr
0.21907600596125187
0.29558823529411765
accuracy
0.7424130273871207


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.07460860835308102
0.15559280250424823
0.055978492231814475
accuracy is  0.7424130273871207
['Adult', 'Down Sampling-Lattice', 'DT', 0.07460860835308102, 0.15559280250424823, 0.055978492231814475, 0.7424130273871207]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7677799607072692
fpr and fnr
0.19523099850968703
0.29558823529411765
accuracy
0.7542561065877128


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.09578370841504646
0.06864244986580195
0.07456000736357263
accuracy is  0.7542561065877128
['Adult', 'Down Sampling-Lattice', 'RF', 0.09578370841504646, 0.06864244986580195, 0.07456000736357263, 0.7542561065877128]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7579567779960706
fpr and fnr
0.19970193740685543
0.3029411764705882
accuracy
0.7483345669874167


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.0860665528595352
0.2677339346468935
0.014662494706062523
accuracy is  0.7483345669874167
['Adult', 'Down Sampling-Lattice', 'LG', 0.0860665528595352, 0.2677339346468935, 0.014662494706062523, 0.7483345669874167]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.013412816691505217
0.9573529411764706
accuracy
0.5114729829755736
0.006851961615269457
0.018449536582035353
0.4596474580059948
accuracy is  0.5114729829755736
['Adult', 'Down Sampling-Lattice', 'SVM', 0.006851961615269457, 0.018449536582035353, 0.4596474580059948, 0.5114729829755736]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Leaf

In [45]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in [all_names_lst[0]]:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0

  new_train_data = naive_downsampling(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
15
The time to compute unfair group is 0.14596009254455566
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
[1.0, 0.0, 2.0, 3.0]
1.5314285714285714 70 16 45.4971428571429
Removed 45 positive records
[1.0, 2.0, 0.0, 4.0]
0.7933579335793358 18 13 7.68634686346864
Removed 8 positive records
[2.0, 0.0, 1.0, 3.0]
1.0822510822510822 21 12 8.01298701298702
Removed 8 positive records
[2.0, 0.0, 2.0, 3.0]
1.2550607287449393 36 5 29.7246963562753
Removed 30 positive records
[2.0, 1.0, 0.0, 4.0]
0.6695095948827292 64 52 29.1855010660981
Removed 29 positive records
[2.0, 2.0, 0.0, 3.0]
0.7692307692307693 23 12 13.7692307692308
Removed 14 positive records
0    1547
1    1473
Name: bar1, dtype: int64


### Results Leaf

In [46]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Leaf","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Leaf","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Leaf","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Down Sampling-Leaf","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.7764900662251656
fpr and fnr
0.19076005961251863
0.3029411764705882
accuracy
0.7527757216876387


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11644717252064606
0.4039573552604242
0.039006114942957096
accuracy is  0.7527757216876387
['Adult', 'Down Sampling-Leaf', 'DT', 0.11644717252064606, 0.4039573552604242, 0.039006114942957096, 0.7527757216876387]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7824503311258278
fpr and fnr
0.21907600596125187
0.20588235294117646
accuracy
0.7875647668393783


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.13220787041452256
0.3413994867570331
0.03530679634736117
accuracy is  0.7875647668393783
['Adult', 'Down Sampling-Leaf', 'RF', 0.13220787041452256, 0.3413994867570331, 0.03530679634736117, 0.7875647668393783]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7798013245033112
fpr and fnr
0.2414307004470939
0.19705882352941176
accuracy
0.7809030347890451


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.1172732311886628
0.3790572995303081
0.00851467865730954
accuracy is  0.7809030347890451
['Adult', 'Down Sampling-Leaf', 'LG', 0.1172732311886628, 0.3790572995303081, 0.00851467865730954, 0.7809030347890451]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.01788375558867362
0.9352941176470588
accuracy
0.5203552923760177
0.009692190760535778
0.04169734725562466
0.4339642592788412
accuracy is  0.5203552923760177
['Adult', 'Down Sampling-Leaf', 'SVM', 0.009692190760535778, 0.04169734725562466, 0.4339642592788412, 0.5203552923760177]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Top

In [47]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
all_names_lst_top = find_top(all_names)
for a in all_names_lst_top:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  new_train_data = naive_downsampling(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
1
The time to compute unfair group is 0.013412237167358398
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
2
The time to compute unfair group is 0.014632701873779297
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
3
The time to compute unfair group is 0.017813920974731445
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
4
The time to compute unfair group is 0.022414207458496094
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Top

In [48]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Top","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Top","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Down Sampling-Top","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Down Sampling-Top","SVM")

  and should_run_async(code)





See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7739375644605438
fpr and fnr
0.21758569299552907
0.2676470588235294
accuracy
0.7572168763878608


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11788290396195722
0.5754751835980634
0.050142422670160745
accuracy is  0.7572168763878608
['Adult', 'Down Sampling-Top', 'DT', 0.11788290396195722, 0.5754751835980634, 0.050142422670160745, 0.7572168763878608]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7853530551153372
fpr and fnr
0.23248882265275708
0.18676470588235294
accuracy
0.7905255366395263


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.1604661128216605
0.374160894824387
0.0453045993290602
accuracy is  0.7905255366395263
['Adult', 'Down Sampling-Top', 'RF', 0.1604661128216605, 0.374160894824387, 0.0453045993290602, 0.7905255366395263]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7824969184715618
fpr and fnr
0.2503725782414307
0.18676470588235294
accuracy
0.7816432272390822


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.10978304087999356
0.44524589088943134
0.0077870875591236136
accuracy is  0.7816432272390822
['Adult', 'Down Sampling-Top', 'LG', 0.10978304087999356, 0.44524589088943134, 0.0077870875591236136, 0.7816432272390822]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
accuracy
0.5240562546262029
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Down Sampling-Top', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

# Massaging

In [49]:
from sklearn.naive_bayes import MultinomialNB
def round_int(x):
    if x in [float("-inf"),float("inf")]: return 0
    return int(round(x))

def get_depromotion(d, diff, group_lst, names, label_y, flag_depro):
    input_test = pd.DataFrame(d, columns = columns_compas)

    clf = MultinomialNB()
    temp_train_label = pd.DataFrame(d, columns = [label_y])
    temp_train_label = temp_train_label[label_y]
    temp_train_label = temp_train_label.astype('int')
    clf = clf.fit(input_test, temp_train_label)
    prob  = clf.predict_proba(input_test)[:,0]
    select = copy.deepcopy(d)
    select['prob'] = prob # the higher the probablity is, the more likely for it to be 0
    # filter out those belongs to this group
    for i in range(len(group_lst)):
        att_name = names[i]
        select = select[(select[att_name] == group_lst[i])]
    select = select[(select[label_y] == flag_depro)]
    # rank them according to the probability
    # filp the records and remove the records from d
    if (flag_depro == 0):
        select.sort_values(by="prob", ascending=True, inplace=True)
        select[label_y] = 1
    else:
        select.sort_values(by="prob", ascending=False, inplace=True)
        select[label_y] = 0
    head = select.head(diff)
    index_list = []
    index_list = list(head.index)
    d.drop(index_list,inplace = True)
    head.drop(columns = ['prob'],inplace = True)
    return head



def naive_massaging(d, temp2, names, need_pos, need_neg,label_y):
    # add more records for all groups
    # The smote algorithm to boost the coverage
    for r in need_pos:
        print("adding more positive")
    # add more positive records
        # determine how many points to add
        print(r)
        diff = compute_diff_add_and_remove(r, temp2, 1, label_y, names)
        diff =  round_int(diff)
        # add more records
        #0 for promotion
        samples_to_add = get_depromotion(d, diff, r, names, label_y, flag_depro = 0)
        print("Changed " + str(len(samples_to_add)) +" records")
        d = pd.concat([d, samples_to_add])
        print(len(d))
    for k in need_neg:
        print(k)
        print("adding more negative")
        diff = compute_diff_add_and_remove(k, temp2, 0, label_y, names)
        diff =  round_int(diff)
        #1 for demotion
        samples_to_add = get_depromotion(d, diff, k, names, label_y, flag_depro = 1)
        print("Changed " + str(len(samples_to_add)) +" records")
        d = pd.concat([d, samples_to_add])
        print(len(d))
    return d

  and should_run_async(code)


## Run Algorithm Lattice

In [50]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in all_names_lst:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]

  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  new_train_data = naive_massaging(new_train_data, temp2, names, need_pos, need_neg, compas_y)

  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
15
The time to compute unfair group is 0.08091044425964355
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
[1.0, 0.0, 2.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 18 records
3154
[1.0, 2.0, 0.0, 4.0]
adding more negative
Changed 4 records
3154
[2.0, 0.0, 1.0, 3.0]
adding more negative
Changed 4 records
3154
[2.0, 0.0, 2.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 13 records
3154
[2.0, 1.0, 0.0, 4.0]
adding more negative
Changed 17 records
3154
[2.0, 2.0, 0.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 8 records
3154
0    1611
1    1543
Name: bar1, dtype: int64
?????/////
14
The time to compute unfair group is 0.12360930442810059
The sets of need pos and neg are
[[1.0, 0.0, 2.0]]
[[0.0, 1.0, 2.0], [0.0, 1.0, 3.0], [0.0, 1.0, 4.0], [0.0, 2.0, 2.0], [0.0, 2.0, 4.0], [0.0, 3.0, 3.0], [1.0, 2.0, 3.0], [2.0, 0.0, 4.0]]
adding more positive
[1.0, 0.0, 2.0]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 5 records
3154
[0.0, 1.0, 2.0]
adding more negative
Changed 6 records
3154
[0.0, 1.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 7 records
3154
[0.0, 1.0, 4.0]
adding more negative
Changed 9 records
3154
[0.0, 2.0, 2.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 7 records
3154
[0.0, 2.0, 4.0]
adding more negative
Changed 17 records
3154
[0.0, 3.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 14 records
3154
[1.0, 2.0, 3.0]
adding more negative
Changed 15 records
3154
[2.0, 0.0, 4.0]
adding more negative
Changed 5 records
3154
0    1686
1    1468
Name: bar1, dtype: int64
?????/////
13


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.08560466766357422
The sets of need pos and neg are
[[2.0, 3.0, 3.0]]
[[1.0, 0.0, 2.0], [1.0, 1.0, 3.0], [1.0, 2.0, 2.0], [1.0, 2.0, 3.0], [1.0, 2.0, 4.0], [1.0, 3.0, 3.0], [2.0, 1.0, 2.0], [2.0, 2.0, 2.0], [2.0, 3.0, 4.0]]
adding more positive
[2.0, 3.0, 3.0]
Changed 4 records
3154
[1.0, 0.0, 2.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 10 records
3154
[1.0, 1.0, 3.0]
adding more negative
Changed 9 records
3154
[1.0, 2.0, 2.0]
adding more negative
Changed 6 records
3154
[1.0, 2.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 13 records
3154
[1.0, 2.0, 4.0]
adding more negative
Changed 15 records
3154
[1.0, 3.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 6 records
3154
[2.0, 1.0, 2.0]
adding more negative
Changed 4 records
3154
[2.0, 2.0, 2.0]
adding more negative
Changed 5 records
3154
[2.0, 3.0, 4.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 4 records
3154
0    1754
1    1400
Name: bar1, dtype: int64
?????/////
12
The time to compute unfair group is 0.12528729438781738
The sets of need pos and neg are
[]
[[1.0, 1.0, 2.0], [1.0, 1.0, 3.0], [1.0, 1.0, 4.0], [1.0, 2.0, 3.0]]
[1.0, 1.0, 2.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 11 records
3154
[1.0, 1.0, 3.0]
adding more negative
Changed 13 records
3154
[1.0, 1.0, 4.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 12 records
3154
[1.0, 2.0, 3.0]
adding more negative
Changed 6 records
3154
0    1796
1    1358
Name: bar1, dtype: int64
?????/////
11


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.1843857765197754
The sets of need pos and neg are
[]
[[1.0, 0.0, 1.0], [1.0, 0.0, 3.0], [1.0, 1.0, 2.0], [2.0, 1.0, 1.0]]
[1.0, 0.0, 1.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 15 records
3154
[1.0, 0.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 9 records
3154
[1.0, 1.0, 2.0]
adding more negative
Changed 28 records
3154
[2.0, 1.0, 1.0]
adding more negative
Changed 10 records
3154
0    1858
1    1296
Name: bar1, dtype: int64
?????/////
10


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.08079385757446289
The sets of need pos and neg are
[[2.0, 4.0]]
[[1.0, 2.0], [2.0, 1.0], [2.0, 2.0]]
adding more positive
[2.0, 4.0]
Changed 11 records
3154
[1.0, 2.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 9 records
3154
[2.0, 1.0]
adding more negative
Changed 13 records
3154
[2.0, 2.0]
adding more negative
Changed 14 records
3154
0    1883
1    1271
Name: bar1, dtype: int64
?????/////
9


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.047528982162475586
The sets of need pos and neg are
[]
[[1.0, 2.0], [2.0, 2.0]]
[1.0, 2.0]
adding more negative
Changed 14 records
3154
[2.0, 2.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 7 records
3154
0    1904
1    1250
Name: bar1, dtype: int64
?????/////
8
The time to compute unfair group is 0.0529322624206543
The sets of need pos and neg are
[]
[[0, 4], [1, 1], [1, 2], [3, 0]]
[0, 4]
adding more negative
Changed 10 records
3154
[1, 1]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 10 records
3154
[1, 2]
adding more negative
Changed 17 records
3154
[3, 0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 10 records
3154
0    1951
1    1203
Name: bar1, dtype: int64
?????/////
7
The time to compute unfair group is 0.0482783317565918
The sets of need pos and neg are
[]
[[1.0, 1.0]]
[1.0, 1.0]
adding more negative
Changed 11 records
3154
0    1962
1    1192
Name: bar1, dtype: int64
?????/////
6


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.05885457992553711
The sets of need pos and neg are
[]
[[1.0, 4.0]]
[1.0, 4.0]
adding more negative
Changed 11 records
3154
0    1973
1    1181
Name: bar1, dtype: int64
?????/////
5


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.0376741886138916
The sets of need pos and neg are
[]
[[1.0, 2.0]]
[1.0, 2.0]
adding more negative
Changed 17 records
3154
0    1990
1    1164
Name: bar1, dtype: int64
?????/////
4
The time to compute unfair group is 0.020951032638549805
The sets of need pos and neg are
[]
[]
0    1990
1    1164
Name: bar1, dtype: int64
?????/////
3
The time to compute unfair group is 0.015684843063354492
The sets of need pos and neg are
[]
[]
0    1990
1    1164
Name: bar1, dtype: int64
?????/////
2
The time to compute unfair group is 0.024616718292236328
The sets of need pos and neg are
[]
[]
0    1990
1    1164
Name: bar1, dtype: int64
?????/////
1
The time to compute unfair group is 0.006467342376708984
The sets of need pos and neg are
[]
[]
0    1990
1    1164
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Lattice

In [51]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Lattice","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label,test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Lattice","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Lattice","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Massage-Lattice","SVM")


dt


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/relea

best 0.6835267778532438
fpr and fnr
0.18777943368107303
0.5455882352941176
accuracy
0.6321243523316062


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.07227534654279902
0.09991369100628093
0.245391055560456
accuracy is  0.6321243523316062
['Adult', 'Massage-Lattice', 'DT', 0.07227534654279902, 0.09991369100628093, 0.245391055560456, 0.6321243523316062]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.677145372676276
fpr and fnr
0.14008941877794337
0.575
accuracy
0.6410066617320503


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.04742360356004342
0.10259416119927696
0.2921289216913643
accuracy is  0.6410066617320503
['Adult', 'Massage-Lattice', 'RF', 0.04742360356004342, 0.10259416119927696, 0.2921289216913643, 0.6410066617320503]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.6628822981913315
fpr and fnr
0.12220566318926974
0.4985294117647059
accuracy
0.688378978534419


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.04128900238249335
0.08568923362185123
0.2654217261550917
accuracy is  0.688378978534419
['Adult', 'Massage-Lattice', 'LG', 0.04128900238249335, 0.08568923362185123, 0.2654217261550917, 0.688378978534419]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.01639344262295082
0.95
accuracy
0.5136935603256847
0.007626684875474478
0.02502674659073397
0.43351828100028433
accuracy is  0.5136935603256847
['Adult', 'Massage-Lattice', 'SVM', 0.007626684875474478, 0.02502674659073397, 0.43351828100028433, 0.5136935603256847]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

In [52]:
r3

  and should_run_async(code)


Unnamed: 0,support,itemsets,d_accuracy,t_value_tp_tn
28,0.149519,"(sex=2.0, fam_inc=4.0, race1=0, age=0)",0.118552,3.803156
36,0.120651,"(race1=0, sex=1.0, fam_inc=3.0)",0.090762,2.523673
21,0.19097,"(sex=2.0, fam_inc=4.0, race1=0)",0.082939,2.801528
25,0.177646,"(sex=2.0, fam_inc=4.0, age=0)",0.082454,2.697535
14,0.293856,"(sex=2.0, race1=0, age=0)",0.082402,3.314386
4,0.512953,"(race1=0, age=0)",0.07064,3.416522
15,0.256847,"(race1=0, fam_inc=3.0)",0.066664,2.49451
1,0.723908,(race1=0),0.055997,2.969888
18,0.225019,"(sex=2.0, fam_inc=4.0)",0.055042,1.920941
31,0.136195,"(sex=2.0, race1=0, fam_inc=3.0)",0.045317,1.238733


  and should_run_async(code)


## Run Algorithm Leaf

In [53]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
for a in [all_names_lst[0]]:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]

  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  new_train_data = naive_massaging(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)


?????/////
15


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


The time to compute unfair group is 0.12587976455688477
The sets of need pos and neg are
[]
[[1.0, 0.0, 2.0, 3.0], [1.0, 2.0, 0.0, 4.0], [2.0, 0.0, 1.0, 3.0], [2.0, 0.0, 2.0, 3.0], [2.0, 1.0, 0.0, 4.0], [2.0, 2.0, 0.0, 3.0]]
[1.0, 0.0, 2.0, 3.0]
adding more negative
Changed 18 records
3154
[1.0, 2.0, 0.0, 4.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 4 records
3154
[2.0, 0.0, 1.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 4 records
3154
[2.0, 0.0, 2.0, 3.0]
adding more negative


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Changed 13 records
3154
[2.0, 1.0, 0.0, 4.0]
adding more negative
Changed 17 records
3154
[2.0, 2.0, 0.0, 3.0]
adding more negative
Changed 8 records
3154
0    1611
1    1543
Name: bar1, dtype: int64


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Results Leaf

In [54]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Leaf","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Leaf","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Leaf","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Massage-Leaf","SVM")

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])



dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7574306341659749
fpr and fnr
0.23397913561847988
0.24705882352941178
accuracy
0.7594374537379719


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.1184731503309366
0.44081546771279323
0.07916278809840671
accuracy is  0.7594374537379719
['Adult', 'Massage-Leaf', 'DT', 0.1184731503309366, 0.44081546771279323, 0.07916278809840671, 0.7594374537379719]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7713802731869294
fpr and fnr
0.22801788375558868
0.2
accuracy
0.7860843819393042


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.15002694816697806
0.27384671566771135
0.03176198128315739
accuracy is  0.7860843819393042
['Adult', 'Massage-Leaf', 'RF', 0.15002694816697806, 0.27384671566771135, 0.03176198128315739, 0.7860843819393042]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7650411289713983
fpr and fnr
0.22801788375558868
0.2073529411764706
accuracy
0.7823834196891192


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.12153058909193175
0.4655093984186144
0.009361708655649423
accuracy is  0.7823834196891192
['Adult', 'Massage-Leaf', 'LG', 0.12153058909193175, 0.4655093984186144, 0.009361708655649423, 0.7823834196891192]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.01788375558867362
0.9308823529411765
accuracy
0.5225758697261288
0.009692190760535778
0.05443648296031961
0.4180410815028043
accuracy is  0.5225758697261288
['Adult', 'Massage-Leaf', 'SVM', 0.009692190760535778, 0.05443648296031961, 0.4180410815028043, 0.5225758697261288]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

## Run Algorithm Top

In [55]:
#get all of the candidate groups possible with the combos and names

new_train_data = copy.deepcopy(train_set)

#iterate over all the names to get the temp2 df for each name
all_names_lst_top = find_top(all_names)
for a in all_names_lst_top:
  print("?????/////")
  print(a)
  temp2, names = get_temp(new_train_data, all_names[a], compas_y)
  temp, temp_g = get_temp_g(new_train_data, names, compas_y)
  temp_g = temp_g[temp_g['cnt'] > filter_count]
  lst_of_counts = compute_lst_of_counts(temp, names, compas_y)
  start = time.time()
  need_pos, need_neg = compute_problematic_opt(temp2, temp_g, names, compas_y, lst_of_counts)
  end = time.time()
  excute_time = end - start
  print("The time to compute unfair group is {}".format(str(excute_time)))
  print("The sets of need pos and neg are")
  print(need_pos)
  print(need_neg)
  new_train_data['skewed'] = 0
  new_train_data["diff"] = 0
  new_train_data = naive_massaging(new_train_data, temp2, names, need_pos, need_neg, compas_y)
  print(new_train_data[compas_y].value_counts())
new_train_x = pd.DataFrame(new_train_data, columns = columns_all)
new_train_label = pd.DataFrame(new_train_data, columns = [compas_y])
new_train_label = new_train_label[compas_y]
new_train_label = new_train_label.astype('int')

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


?????/////
1
The time to compute unfair group is 0.0029287338256835938
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
2
The time to compute unfair group is 0.004440784454345703
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
3
The time to compute unfair group is 0.012560606002807617
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64
?????/////
4
The time to compute unfair group is 0.006961822509765625
The sets of need pos and neg are
[]
[]
1    1607
0    1547
Name: bar1, dtype: int64


### Results Top

In [56]:
print()
print("dt")
griddt.fit(new_train_x, new_train_label)
print("best", griddt.best_score_)

test_predict = griddt.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Top","DT")

print()
print("rf")
gridrf.fit(new_train_x, new_train_label)
print("best", gridrf.best_score_)
test_predict = gridrf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Top","RF")

print()
print("logistic")
gridlg.fit(new_train_x, new_train_label)
print("best", gridlg.best_score_)
test_predict = gridlg.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
r,r2,r3 = div_results("Adult","Massage-Top","LG")

print()
print("svm")
clf.fit(new_train_x, new_train_label)
test_predict = clf.predict(test_x)
print("fpr and fnr")
print(fpr_onegroup(list(test_label), test_predict))
print(fnr_onegroup(list(test_label), test_predict))
accuracy = accuracy_score(test_label, test_predict)
print("accuracy")
print(accuracy)
test_set['predicted'] = test_predict
s,s2,s3 = div_results("Adult","Massage-Top","SVM")

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])



dt


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7739375644605438
fpr and fnr
0.21758569299552907
0.2676470588235294
accuracy
0.7572168763878608


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.11788290396195722
0.5754751835980634
0.050142422670160745
accuracy is  0.7572168763878608
['Adult', 'Massage-Top', 'DT', 0.11788290396195722, 0.5754751835980634, 0.050142422670160745, 0.7572168763878608]

rf


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7853530551153372
fpr and fnr
0.23248882265275708
0.18676470588235294
accuracy
0.7905255366395263


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/rele

0.1604661128216605
0.374160894824387
0.0453045993290602
accuracy is  0.7905255366395263
['Adult', 'Massage-Top', 'RF', 0.1604661128216605, 0.374160894824387, 0.0453045993290602, 0.7905255366395263]

logistic


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

best 0.7824969184715618
fpr and fnr
0.2503725782414307
0.18676470588235294
accuracy
0.7816432272390822


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

0.10978304087999356
0.44524589088943134
0.0077870875591236136
accuracy is  0.7816432272390822
['Adult', 'Massage-Top', 'LG', 0.10978304087999356, 0.44524589088943134, 0.0077870875591236136, 0.7816432272390822]

svm


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


fpr and fnr
0.9418777943368107
0.016176470588235296
accuracy
0.5240562546262029
0.03768139557635621
0.02296350573751334
0.05736902401434138
accuracy is  0.5240562546262029
['Adult', 'Massage-Top', 'SVM', 0.03768139557635621, 0.02296350573751334, 0.05736902401434138, 0.5240562546262029]


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])
See 

  and should_run_async(code)
