In [5]:
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [6]:
  # Read files
  test_df = pd.read_csv('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_3.csv')
  preds_df = pd.read_csv('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_3.csv')
  admission_df = pd.read_csv('/content/drive/MyDrive/UM2ii/CXR Granular Bias/admissions.csv.gz', compression='gzip')
  admission_df = admission_df[['subject_id', 'race']]
  admission_df = admission_df.rename(columns={"race": "Race"})
  admission_df = admission_df.drop_duplicates()
  df = pd.merge(test_df, preds_df)
  df['subject_id'] = [i[2][1:] for i in df.path.str.split('/')]
  df['subject_id'] = df['subject_id'].astype(int)

  # There are individuals who inconsitently report their race (granular and course); exclude
  admission_df = admission_df[~admission_df['subject_id'].duplicated(keep=False)]
  df = pd.merge(df, admission_df, how = "left", on = 'subject_id')
  df = df[df.Race != 'OTHER']
  df = df[df.Race != 'UNKNOWN']
  df = df[df.Race != 'UNABLE TO OBTAIN']
  df = df[df.Race != 'MULTIPLE RACE/ETHNICITY']
  df = df[df.Race != 'PATIENT DECLINED TO ANSWER']
  df = df[df.Race != 'AMERICAN INDIAN/ALASKA NATIVE']
  df = df[df.Race != 'NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER']

print("In our test set, we have " + str(sum(df.Race.value_counts())) + " individuals with a documented race/ethnicity." )
print("The proportion of each individuals in each documented race/ethnicity is as follows:")
print(df.Race.value_counts()/sum(df.Race.value_counts()))
print()
print("The number of each individuals in each documented race/ethnicity is as follows:")
print(df.Race.value_counts())

In our test set, we have 270136 individuals with a documented race/ethnicity.
The proportion of each individuals in each documented race/ethnicity is as follows:
WHITE                                 0.693954
BLACK/AFRICAN AMERICAN                0.163355
HISPANIC/LATINO - PUERTO RICAN        0.018517
WHITE - OTHER EUROPEAN                0.018372
ASIAN - CHINESE                       0.014763
WHITE - RUSSIAN                       0.012990
BLACK/CAPE VERDEAN                    0.012660
HISPANIC/LATINO - DOMINICAN           0.011324
ASIAN                                 0.009925
BLACK/CARIBBEAN ISLAND                0.006819
BLACK/AFRICAN                         0.005023
HISPANIC OR LATINO                    0.004612
ASIAN - SOUTH EAST ASIAN              0.004128
PORTUGUESE                            0.003247
ASIAN - ASIAN INDIAN                  0.002891
HISPANIC/LATINO - GUATEMALAN          0.002713
WHITE - BRAZILIAN                     0.002362
WHITE - EASTERN EUROPEAN              0

In [7]:
print("The number of 'No Finding' Labels in each documented race/ethnicity is as follows:")
df = df[df.Race.notna()]
for race in sorted(df.Race.unique()):
  temp_df = df[df.Race==race]
  print(str(race) + ' ' + str(sum(temp_df['No Finding']==1)))

The number of 'No Finding' Labels in each documented race/ethnicity is as follows:
ASIAN 1017
ASIAN - ASIAN INDIAN 323
ASIAN - CHINESE 1375
ASIAN - KOREAN 84
ASIAN - SOUTH EAST ASIAN 385
BLACK/AFRICAN 680
BLACK/AFRICAN AMERICAN 20375
BLACK/CAPE VERDEAN 1537
BLACK/CARIBBEAN ISLAND 670
HISPANIC OR LATINO 729
HISPANIC/LATINO - CENTRAL AMERICAN 131
HISPANIC/LATINO - COLUMBIAN 179
HISPANIC/LATINO - CUBAN 149
HISPANIC/LATINO - DOMINICAN 1344
HISPANIC/LATINO - GUATEMALAN 364
HISPANIC/LATINO - HONDURAN 134
HISPANIC/LATINO - MEXICAN 188
HISPANIC/LATINO - PUERTO RICAN 2407
HISPANIC/LATINO - SALVADORAN 223
PORTUGUESE 217
SOUTH AMERICAN 141
WHITE 64886
WHITE - BRAZILIAN 241
WHITE - EASTERN EUROPEAN 239
WHITE - OTHER EUROPEAN 1696
WHITE - RUSSIAN 1415


In [8]:
from scipy import stats
def wauc(true, preds):
  test_df = pd.read_csv(true)
  preds_df = pd.read_csv(preds)
  return metrics.roc_auc_score(y_true = test_df.drop('path', axis=1), y_score = preds_df.drop('path', axis=1),  average='macro', multi_class='ovr')


auc_3 = wauc('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_3.csv', '/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_3.csv')
auc_6 = wauc('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_6.csv', '/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_6.csv')
auc_14 = wauc('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_14.csv', '/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_14.csv')
auc_96 = wauc('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_96.csv', '/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_96.csv')
auc_99 = wauc('/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/True_99.csv', '/content/drive/MyDrive/UM2ii/CXR Granular Bias/CheXpert/preds_99.csv')

print('Average AUC of CheXpert-trained models applied onto MIMIC dataset: ' + str(np.mean([auc_3, auc_6, auc_14, auc_96, auc_99])))
confidence_interval = stats.t.interval(0.95, len([auc_3, auc_6, auc_14, auc_96, auc_99])-1, loc=np.mean([auc_3, auc_6, auc_14, auc_96, auc_99]), scale=stats.sem([auc_3, auc_6, auc_14, auc_96, auc_99]))

# Print the confidence interval
print("95% Confidence Interval:", confidence_interval)

Average AUC of CheXpert-trained models applied onto MIMIC dataset: 0.7432132342403976
95% Confidence Interval: (0.7378593711757045, 0.7485670973050907)
