In [2]:
import torch
import torch.nn as nn
import torch.nn.init as init
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [3]:
class VecDataset_All(Dataset):
    def __init__(self, info_df_path_mimic, data_path_mimic):
        self.PRED_LABEL= [
            'No Finding',
            'Enlarged Cardiomediastinum',
            'Cardiomegaly',
            'Lung Lesion',
            'Lung Opacity',
            'Edema',
            'Consolidation',
            'Pneumonia',
            'Atelectasis',
            'Pneumothorax',
            'Pleural Effusion',
            'Pleural Other',
            'Fracture',
            'Support Devices']

        self.info_df_mimic = pd.read_csv(info_df_path_mimic)
        self.lables_mimic = self.info_df_mimic[self.PRED_LABEL]
        self.samples_path_mimic = self.info_df_mimic['path']
        self.data_path_mimic = data_path_mimic
        self.len_mimic = len(self.info_df_mimic)

    def __len__(self):
        return self.len_mimic
    
    def __getitem__(self, idx):
        path = self.samples_path_mimic.iloc[idx].split(".")[0]
        path = path + ".npy"
        full_item_path = self.data_path_mimic + path
        item = np.load(full_item_path)
        #convert to tensor
        item = torch.from_numpy(item)
        # read labels from labels_df
        label = self.lables_mimic.iloc[idx]
        label = list(label)
        label = torch.tensor(label, dtype=torch.float32)

        return {'data': item, 'labels': label}
    

val_dataset = VecDataset_All(info_df_path_mimic="/local/home/mikailk/fairness_on_embeddings/dataframes/mimic_validation_df.csv", data_path_mimic="/local/home/mikailk/fairness_on_embeddings/embedding_data/generalized-image-embeddings-for-the-mimic-chest-x-ray-dataset-1.0/")
test_dataset = VecDataset_All(info_df_path_mimic="/local/home/mikailk/fairness_on_embeddings/dataframes/mimic_test_df.csv", data_path_mimic="/local/home/mikailk/fairness_on_embeddings/embedding_data/generalized-image-embeddings-for-the-mimic-chest-x-ray-dataset-1.0/")

val_dataloader = DataLoader(val_dataset, batch_size=48)
test_dataloader = DataLoader(test_dataset)

In [4]:
class VecModel(nn.Module):
    def __init__(self, embeddings_size, hidden_layer_sizes, dropout_rate, num_classes):
        super(VecModel, self).__init__()

        layers = []
        input_size = embeddings_size

        for size in hidden_layer_sizes:
            linear_layer = nn.Linear(input_size, size)
            init.kaiming_uniform_(linear_layer.weight, nonlinearity='relu')
            layers.append(linear_layer)
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(size))
            layers.append(nn.Dropout(dropout_rate))
            input_size = size

        layers.append(nn.Linear(input_size, num_classes))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [5]:
model_path = '/local/home/mikailk/fairness_on_embeddings/results/vector_train_all_test_all_40/checkpoints/best_val_loss-epoch=18-val_loss=0.2491.ckpt'
model_seed = '40'
checkpoint = torch.load(model_path)
model = VecModel(embeddings_size=1376, hidden_layer_sizes=[768, 256], dropout_rate=0.3, num_classes=14)

new_state_dict = {}
for key in checkpoint['state_dict']:
    new_key = key.replace('model.', '', 1)
    new_state_dict[new_key] = checkpoint['state_dict'][key]

model.load_state_dict(new_state_dict)

  checkpoint = torch.load(model_path)


<All keys matched successfully>

In [262]:
# model.to("cuda")
# model.eval()

# predictions = []
# true_labels = []

# for batch in val_dataloader:
#     inputs = batch['data'].to("cuda")
#     labels = batch['labels'].to("cuda")
#     outputs = model(inputs)
#     predictions.extend(torch.sigmoid(outputs).cpu().detach().numpy())
#     true_labels.extend(labels.cpu().detach().numpy())

# auc_score = roc_auc_score(true_labels, predictions)

# print(f"AUC Score: {auc_score}")

In [6]:
model.to("cuda")
model.eval()

def get_predictions(model, data_loader, threshold=0.36988158226):
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for batch in data_loader:
            inputs = batch['data'].to("cuda")
            labels = batch['labels'].to("cuda")

            outputs = model(inputs)

            probabilities = torch.sigmoid(outputs)

            predictions = (probabilities > threshold).int()

            all_predictions.append(predictions.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    return all_predictions, all_labels


test_predictions, test_labels = get_predictions(model, test_dataloader)

test_predictions = np.concatenate(test_predictions, axis=0)
test_labels = np.concatenate(test_labels, axis=0)

In [7]:
PRED_LABEL = test_dataset.PRED_LABEL

df_predictions = pd.DataFrame(test_predictions, columns=[f'PRED {label}' for label in PRED_LABEL])
df_labels = pd.DataFrame(test_labels, columns=[f'PRED {label}' for label in PRED_LABEL])

df_test_set = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/dataframes/mimic_test_df.csv")

In [8]:
combined_df = pd.concat([df_test_set, df_predictions], axis=1)

white_df = combined_df[(combined_df['race'] == 'WHITE')]
black_df = combined_df[(combined_df['race'] == 'BLACK/AFRICAN AMERICAN')]

white_df = white_df[:3685]

combined_df = white_df._append(black_df)

df_gender = combined_df[['gender', 'No Finding', 'PRED No Finding']]
df_age = combined_df[['age_decile', 'No Finding', 'PRED No Finding']]
df_race = combined_df[['race', 'No Finding', 'PRED No Finding']]
df_insurance = combined_df[['insurance', 'No Finding', 'PRED No Finding']]

In [9]:
gender_groups = ['M', 'F']
gender_data = []
gender_csv_title = []

for gender_group in gender_groups:
    df_group = df_gender[df_gender['gender'] == gender_group]
    
    fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
    tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
    fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
    tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
    
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    gender_data.append(fpr)
    gender_data.append(fnr)

    gender_csv_title.append('FPR_' + gender_group)
    gender_csv_title.append('FNR_' + gender_group)


df = pd.DataFrame([gender_data], columns=gender_csv_title)
output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FPR_FNR_NF_gender.csv'
df.to_csv(output_path)

age_groups = ['80+', '60-80', '40-60', '20-40', '0-20']
age_data = []
age_csv_title = []

for age_group in age_groups:
    df_group = df_age[df_age['age_decile'] == age_group]
    
    fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
    tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
    fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
    tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
    
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    age_data.append(fpr)
    age_data.append(fnr)

    age_csv_title.append('FPR_' + age_group)
    age_csv_title.append('FNR_' + age_group)

df = pd.DataFrame([age_data], columns=age_csv_title)
output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FPR_FNR_NF_age.csv'
df.to_csv(output_path)


race_groups = ['WHITE', 'BLACK/AFRICAN AMERICAN']
race_data = []
race_csv_title = []

for race_group in race_groups:
    df_group = df_race[df_race['race'] == race_group]
    
    fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
    tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
    fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
    tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
    
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    race_data.append(fpr)
    race_data.append(fnr)

    race_csv_title.append('FPR_' + race_group)
    race_csv_title.append('FNR_' + race_group)

df = pd.DataFrame([race_data], columns=race_csv_title)
output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FPR_FNR_NF_race.csv'
df.to_csv(output_path)


insurance_groups = ['Medicare', 'Other', 'Medicaid']
insurance_data = []
insurance_csv_title = []

for insurance_group in insurance_groups:
    df_group = df_insurance[df_insurance['insurance'] == insurance_group]
    
    fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
    tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
    fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
    tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
    
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)

    insurance_data.append(fpr)
    insurance_data.append(fnr)

    insurance_csv_title.append('FPR_' + insurance_group)
    insurance_csv_title.append('FNR_' + insurance_group)


df = pd.DataFrame([insurance_data], columns=insurance_csv_title)
output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FPR_FNR_NF_insurance.csv'
df.to_csv(output_path)

In [10]:
# Insurance Age
ins_age_titles = ['age', 'FPR_Medicare', 'FNR_Medicare', 'FPR_Other', 'FNR_Other', 'FPR_Medicaid', 'FNR_Medicaid']
ins_age_data = []
for age_group in age_groups:
    temp = [age_group]
    for insurance_group in insurance_groups:
        df_group = df_insurance.join(df_age['age_decile'])
        df_group = df_group[(df_group['age_decile'] == age_group) & (df_group['insurance'] == insurance_group)]
        
        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    ins_age_data.append(temp)


df = pd.DataFrame(ins_age_data, columns=ins_age_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_InsAge.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['age', 'FPR_Medicare', 'FPR_Other', 'FPR_Medicaid']]
renamed_columns = selected_columns.rename(columns={'age': 'age', 'FPR_Medicare': 'Medicare', 'FPR_Other': 'Other', 'FPR_Medicaid': 'Medicaid'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_InsAge.csv'
renamed_columns.to_csv(output_path)




# Insurance Gender
ins_gen_titles = ['Insurance', 'FPR_M', 'FNR_M', 'FPR_F', 'FNR_F']
ins_gen_data = []
for insurance_group in insurance_groups:
    temp = [insurance_group]
    for gender_group in gender_groups:
        df_group = df_gender.join(df_insurance['insurance'])
        df_group = df_group[(df_group['insurance'] == insurance_group) & (df_group['gender'] == gender_group)]
        
        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    ins_gen_data.append(temp)

df = pd.DataFrame(ins_gen_data, columns=ins_gen_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_InsGen.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['Insurance', 'FPR_M', 'FPR_F']]
renamed_columns = selected_columns.rename(columns={'Insurance': 'Insurance', 'FPR_M': 'M', 'FPR_F': 'F'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_InsGen.csv'
renamed_columns.to_csv(output_path)


# Insurance Race
ins_race_titles = ['race', 'FPR_Medicare', 'FNR_Medicare', 'FPR_Other', 'FNR_Other', 'FPR_Medicaid', 'FNR_Medicaid']
ins_race_data = []
for race_group in race_groups:
    temp = [race_group]
    for insurance_group in insurance_groups:
        df_group = df_insurance.join(df_race['race'])
        df_group = df_group[(df_group['race'] == race_group) & (df_group['insurance'] == insurance_group)]
        
        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    ins_race_data.append(temp)


df = pd.DataFrame(ins_race_data, columns=ins_race_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_InsRace.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['race', 'FPR_Medicare', 'FPR_Other', 'FPR_Medicaid']]
renamed_columns = selected_columns.rename(columns={'race': 'race', 'FPR_Medicare': 'Medicare', 'FPR_Other': 'Other', 'FPR_Medicaid': 'Medicaid'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_InsRace.csv'
renamed_columns.to_csv(output_path)


# Race Age
race_age_titles = ['age', 'FPR_White', 'FNR_White', 'FPR_Black', 'FNR_Black']
race_age_data = []
for age_group in age_groups:
    temp = [age_group]
    for race_group in race_groups:
        df_group = df_race.join(df_age['age_decile'])
        df_group = df_group[(df_group['age_decile'] == age_group) & (df_group['race'] == race_group)]

        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    race_age_data.append(temp)


df = pd.DataFrame(race_age_data, columns=race_age_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_RaceAge.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['age', 'FPR_White', 'FPR_Black']]
renamed_columns = selected_columns.rename(columns={'age': 'age', 'FPR_White': 'White', 'FPR_Black': 'Black'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_RaceAge.csv'
renamed_columns.to_csv(output_path)


# Race Gender
race_gen_titles = ['race', 'FPR_M', 'FNR_M', 'FPR_F', 'FNR_F']
race_gen_data = []
for race_group in race_groups:
    temp = [race_group]
    for gender_group in gender_groups:
        df_group = df_gender.join(df_race['race'])
        df_group = df_group[(df_group['race'] == race_group) & (df_group['gender'] == gender_group)]
        
        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    race_gen_data.append(temp)


df = pd.DataFrame(race_gen_data, columns=race_gen_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_RaceGen.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['race', 'FPR_M', 'FPR_F']]
renamed_columns = selected_columns.rename(columns={'race': 'race', 'FPR_M': 'M', 'FPR_F': 'F'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_RaceGen.csv'
renamed_columns.to_csv(output_path)


# Age Gender
age_gen_titles = ['age', 'FPR_M', 'FNR_M', 'FPR_F', 'FNR_F']
age_gen_data = []
for age_group in age_groups:
    temp = [age_group]
    for gender_group in gender_groups:
        df_group = df_gender.join(df_age['age_decile'])
        df_group = df_group[(df_group['age_decile'] == age_group) & (df_group['gender'] == gender_group)]
        
        fp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 0)])
        tn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 0)])
        fn = len(df_group[(df_group['PRED No Finding'] == 0) & (df_group['No Finding'] == 1)])
        tp = len(df_group[(df_group['PRED No Finding'] == 1) & (df_group['No Finding'] == 1)])
        
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        
        temp.append(fpr)
        temp.append(fnr)

    age_gen_data.append(temp)


df = pd.DataFrame(age_gen_data, columns=age_gen_titles)
# output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_FN_AgeGen.csv'
# df.to_csv(output_path)

selected_columns = df.loc[:, ['age', 'FPR_M', 'FPR_F']]
renamed_columns = selected_columns.rename(columns={'age': 'age', 'FPR_M': 'M', 'FPR_F': 'F'})

output_path = '/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_' + model_seed + '/FP_AgeGen.csv'
renamed_columns.to_csv(output_path)

In [35]:
# Age Gender
FP5_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_AgeGen.csv")
FP4_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_AgeGen.csv")
FP3_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_AgeGen.csv")
FP2_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_AgeGen.csv")
FP1_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_AgeGen.csv")
FP_agesex = FP1_agesex._append([FP2_agesex, FP3_agesex, FP4_agesex, FP5_agesex])
FP_AgeSex =FP_agesex.groupby("age")   
FP_AgSx_df = FP_AgeSex.describe()

factors = ['F', 'M']
age =['0-20', '20-40', '40-60', '60-80','80-']
AgeSex_df = pd.DataFrame(age, columns=["Age"])

def FiveRun(factors, want_df, df):
    for factor in factors:    
        dfM0 = round(df[factor]['mean'],3)
        dfM2 = round(1.96 * df[factor]["std"] / np.sqrt(5),3)
        want_df[factor] = pd.DataFrame(dfM0.values.tolist(),columns =[factor])
        want_df['CI_'+factor] = pd.DataFrame(dfM2.values.tolist(),columns =['CI_'+factor])
        
    return want_df

want = FiveRun(factors,AgeSex_df,FP_AgSx_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_AgeSex.csv')

In [269]:

# FP_NF_3_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_AgeGen.csv")
# FP_NF_2_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_AgeGen.csv")
# FP_NF_1_agesex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_AgeGen.csv")
# FP_NF_agesex = FP_NF_1_agesex._append([FP_NF_2_agesex, FP_NF_3_agesex])
# FP_NF_AgeSex =FP_NF_agesex.groupby("age")   
# FP_NF_AgSx_df = FP_NF_AgeSex.describe()

# factors = ['FPR_M','FNR_M','FPR_F','FNR_F']
# age =['0-20', '20-40', '40-60', '60-80','80-']
# AgeSex_df = pd.DataFrame(age, columns=["Age"])


# def FiveRun_FP_NF(factors, want_df, df):
    
#     for factor in factors:    
#         dfM0 = round(df[factor]['mean'],3)
#         dfM2 = round(1.96 * df[factor]["std"] / np.sqrt(5),3)
#         want_df[factor] = pd.DataFrame(dfM0.values.tolist(),columns =[factor])
#         want_df['CI_'+factor] = pd.DataFrame(dfM2.values.tolist(),columns =['CI_'+factor])
    
#     return want_df

# want = FiveRun_FP_NF(factors, AgeSex_df, FP_NF_AgSx_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_AgeSex_FNR_FPR.csv')

In [36]:
# Insurance Gender
FP5_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_InsGen.csv")
FP4_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_InsGen.csv")
FP3_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_InsGen.csv")
FP2_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_InsGen.csv")
FP1_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_InsGen.csv")
FP_InsSex = FP1_InsSex._append([FP2_InsSex, FP3_InsSex, FP4_InsSex, FP5_InsSex])
FP_InSx =FP_InsSex.groupby("Insurance")
FP_InSx_df = FP_InSx.describe()

factors = ['F', 'M']
Insurance = ['Medicaid','Medicare','Other']
SexIns_df = pd.DataFrame(Insurance, columns=["Insurance"])

want = FiveRun(factors,SexIns_df,FP_InSx_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_SexIns.csv')

In [271]:
# FP3_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_InsGen.csv")
# FP2_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_InsGen.csv")
# FP1_InsSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_InsGen.csv")
# FP_InsSex = FP1_InsSex._append([FP2_InsSex, FP3_InsSex])
# FP_InSx =FP_InsSex.groupby("Insurance")
# FP_InSx_df = FP_InSx.describe()

# factors = ['FPR_M','FNR_M','FPR_F','FNR_F']
# Insurance = ['Medicaid','Medicare','Other']
# SexIns_df = pd.DataFrame(Insurance, columns=["Insurance"])

# want = FiveRun_FP_NF(factors, SexIns_df, FP_InSx_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_SexIns_FNR_FPR.csv')

In [37]:
# Insurance Race
FP5_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_InsRace.csv")
FP4_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_InsRace.csv")
FP3_InsRace  = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_InsRace.csv")
FP2_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_InsRace.csv")
FP1_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_InsRace.csv")
FP_InsRace = FP1_InsRace._append([FP2_InsRace, FP3_InsRace, FP4_InsRace, FP5_InsRace])
FP_InsRace =FP_InsRace.groupby("race")
FP_InRa_df =FP_InsRace.describe()

factors = ['Medicaid', 'Other','Medicare']
race = ['BLACK/AFRICAN AMERICAN', 'WHITE']
RaceIns_df = pd.DataFrame(race, columns=["race"])

want = FiveRun(factors,RaceIns_df,FP_InRa_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_RaceIns.csv')

In [273]:
# FP3_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_InsRace.csv")
# FP2_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_InsRace.csv")
# FP1_InsRace = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_InsRace.csv")
# FP_InsRace = FP1_InsRace._append([FP2_InsRace, FP3_InsRace])
# FP_InsRace = FP_InsRace.groupby("race")
# FP_InRa_df = FP_InsRace.describe()

# factors = ['FPR_Medicare','FNR_Medicare','FPR_Other','FNR_Other', 'FPR_Medicaid', 'FNR_Medicaid']
# race = ['AMERICAN INDIAN/ALASKA NATIVE','ASIAN','BLACK/AFRICAN AMERICAN','HISPANIC/LATINO','OTHER','WHITE']
# RaceIns_df = pd.DataFrame(race, columns=["race"])

# want = FiveRun_FP_NF(factors, RaceIns_df, FP_InRa_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_RaceIns_FNR_FPR.csv')

In [38]:
# Race Sex
FP5_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_RaceGen.csv")
FP4_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_RaceGen.csv")
FP3_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_RaceGen.csv")
FP2_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_RaceGen.csv")
FP1_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_RaceGen.csv")
FP_IRaceSex = FP1_RaceSex._append([FP2_RaceSex, FP3_RaceSex, FP4_RaceSex, FP5_RaceSex])
FP_RaceSex = FP_IRaceSex.groupby("race")
FP_RaceSex_df =FP_RaceSex.describe()

factors = ['F', 'M']
race =['BLACK/AFRICAN AMERICAN', 'WHITE']
RaceSex_df = pd.DataFrame(race, columns=["race"])

want = FiveRun(factors,RaceSex_df,FP_RaceSex_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_RaceSex.csv')

In [275]:
# FP3_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_RaceGen.csv")
# FP2_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_RaceGen.csv")
# FP1_RaceSex = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_RaceGen.csv")
# FP_IRaceSex = FP1_RaceSex._append([FP2_RaceSex, FP3_RaceSex])
# FP_RaceSex =FP_IRaceSex.groupby("race")
# FP_RaceSex_df =FP_RaceSex.describe()

# factors = ['FPR_M','FNR_M','FPR_F','FNR_F']
# race = ['AMERICAN INDIAN/ALASKA NATIVE','ASIAN','BLACK/AFRICAN AMERICAN','HISPANIC/LATINO','OTHER','WHITE']
# RaceSex_df = pd.DataFrame(race, columns=["race"])

# want = FiveRun_FP_NF(factors, RaceSex_df, FP_RaceSex_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_RaceSex_FNR_FPR.csv')

In [39]:
# Insurance Age
FP5_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_InsAge.csv")
FP4_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_InsAge.csv")
FP3_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_InsAge.csv")
FP2_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_InsAge.csv")
FP1_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_InsAge.csv")
FP_InsAge = FP1_InsAge._append([FP2_InsAge, FP3_InsAge, FP4_InsAge, FP5_InsAge])
FP_InsAge = FP_InsAge.groupby("age")
FP_InsAge_df =FP_InsAge.describe()

factors = ['Medicaid', 'Other','Medicare']
age =['0-20', '20-40', '40-60', '60-80','80-']
AgeIns_df = pd.DataFrame(age, columns=["age"])

want = FiveRun(factors,AgeIns_df,FP_InsAge_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_AgeIns.csv')

In [277]:
# FP3_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_InsAge.csv")
# FP2_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_InsAge.csv")
# FP1_InsAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_InsAge.csv")
# FP_InsAge = FP1_InsAge._append([FP2_InsAge, FP3_InsAge])
# FP_InsAge =FP_InsAge.groupby("age")
# FP_InsAge_df =FP_InsAge.describe()

# factors = ['FPR_Medicare','FNR_Medicare','FPR_Other','FNR_Other', 'FPR_Medicaid', 'FNR_Medicaid']
# age = ['0-20','20-40','40-60','60-80','80-']
# AgeIns_df = pd.DataFrame(age, columns=["age"])

# want = FiveRun_FP_NF(factors, AgeIns_df, FP_InsAge_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_AgeIns_FNR_FPR.csv')

In [40]:
# Race Age
FP5_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_77/FP_RaceAge.csv")
FP4_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_RaceAge.csv")
FP3_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_RaceAge.csv")
FP2_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_RaceAge.csv")
FP1_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_RaceAge.csv")
FP_RaceAge = FP1_RaceAge._append([FP2_RaceAge, FP3_RaceAge, FP4_RaceAge, FP5_RaceAge])
FP_RaceAge = FP_RaceAge.groupby("age")
FP_RaceAge_df =FP_RaceAge.describe()

factors = ['White', 'Black']
age =['0-20', '20-40', '40-60', '60-80','80-']
RaceAge_df = pd.DataFrame(age, columns=["age"])

want = FiveRun(factors,RaceAge_df,FP_RaceAge_df)
want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_RaceAge.csv')

In [280]:
# FP4_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_56/FP_FN_RaceAge.csv")
# FP3_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_40/FP_FN_RaceAge.csv")
# FP2_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_19/FP_FN_RaceAge.csv")
# FP1_RaceAge = pd.read_csv("/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/seed_0/FP_FN_RaceAge.csv")
# FP_RaceAge = FP1_RaceAge._append([FP2_RaceAge, FP3_RaceAge, FP4_RaceAge])
# FP_RaceAge = FP_RaceAge.groupby("age")
# FP_RaceAge_df = FP_RaceAge.describe()

# factors = ['FPR_White', 'FNR_White', 'FPR_Black', 'FNR_Black', 'FPR_Hisp', 'FNR_Hisp', 'FPR_Other', 'FNR_Other', 'FPR_Asian', 'FNR_Asian', 'FPR_American', 'FNR_American']
# age =['0-20', '20-40', '40-60', '60-80','80-']
# AgeRace_df = pd.DataFrame(age, columns=["age"])

# want = FiveRun_FP_NF(factors, AgeRace_df, FP_RaceAge_df)
# want.to_csv('/local/home/mikailk/fairness_on_embeddings/imbalanced_vec_graph_data/all_seed/Inter_AgeRace_FNR_FPR.csv')