In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy import stats

In [2]:
def noise(s, p = 0.25):
    return s if np.random.random() > p else 1 - s

In [3]:
# create combined dataframe of expert + model predictions
human_label_path = 'nih_full_raw_data/all_findings_expert_labels/test_individual_readers.csv'
n_experts = 5
annotator_id_cols = [4343880271, 4343882583, 4343882785, 4343883593, 4343883996]
human_labels = pd.read_csv(human_label_path)
human_labels.reset_index(drop=True, inplace=True)
annotations = human_labels.pivot(
    index='Image ID', columns='Reader ID', values='Abnormal'
).reset_index()

m = pd.read_csv("cxr_model_predictions.csv", index_col=0)
m['img_name'] = m['img_name'].apply(lambda x: x[13:])
m['score'] = m['score'].apply(lambda x: float(x[1:-1]))

df = annotations.merge(m, left_on='Image ID', right_on='img_name')
df = df.drop('Image ID', axis=1)
df['model_pred_noisy'] = df['score'].apply(noise)
df['Y_H'] = df.apply(lambda x: [x[c] for c in annotator_id_cols], axis=1)
df['consensus'] = df['Y_H'].apply(lambda x: stats.mode(x)[0][0])
df.columns = [
    'expert1', 'expert2', 'expert3', 'expert4', 'expert5', 'img_name',
                'model_pred', 'model_pred_noisy', 'Y_H', 'consensus',
]
df.to_csv('nih_processed.csv', index=None)
df.head()

Unnamed: 0,expert1,expert2,expert3,expert4,expert5,img_name,model_pred,model_pred_noisy,Y_H,consensus
0,1,1,1,1,1,00000211_006.png,0.999332,0.999332,"[1, 1, 1, 1, 1]",1
1,1,1,1,1,1,00000211_022.png,0.98691,0.01309,"[1, 1, 1, 1, 1]",1
2,1,1,1,1,1,00000211_028.png,0.993304,0.993304,"[1, 1, 1, 1, 1]",1
3,1,1,1,1,1,00000211_029.png,0.999482,0.000518,"[1, 1, 1, 1, 1]",1
4,1,1,1,1,1,00000218_002.png,0.999544,0.000456,"[1, 1, 1, 1, 1]",1


In [4]:
# save data dict for our model
Y_H = np.array(list(df['Y_H'])) + 1
Y_M = [[[1 - s, s]] for s in df['model_pred']]

data_dict = {
    'Y_M' : Y_M,
    'Y_H' : Y_H.tolist(),
    'n_models' : 1,
    'n_humans' : n_experts,
    'K' : 2
}

with open('nih.pickle', 'wb') as handle:
    pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [5]:
# save data dict for our model with noisy predictions
Y_M_noisy = [[[1 - s, s]] for s in df['model_pred_noisy']]

data_dict_noisy = {
    'Y_M' : Y_M_noisy,
    'Y_H' : Y_H.tolist(),
    'n_models' : 1,
    'n_humans' : n_experts,
    'K' : 2
}

with open('nih_noisy_model.pickle', 'wb') as handle:
    pickle.dump(data_dict_noisy, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
# get accuracies
n = 250
print('accuracy on test set (n={}):'.format(n))
df['model_pred_int'] = df['model_pred'].apply(lambda x: 0 if x<0.5 else 1)
df['noisy_pred_int'] = df['model_pred_noisy'].apply(lambda x: 0 if x<0.5 else 1)

test_accuracy = sum(df[:n]['model_pred_int']==df[:n]['consensus'])/n
print("\tclassifier: {}".format(test_accuracy))

test_accuracy_noisy = sum(df[:n]['noisy_pred_int']==df[:n]['consensus'])/n
print("\tnoisy classifier: {}".format(test_accuracy_noisy))

for e in range(n_experts):
    expert_acc = sum(df[:n]['expert'+str(e+1)]==df[:n]['consensus'])/n
    print ("\texpert {}: {}".format(e+1, expert_acc))

accuracy on test set (n=250):
	classifier: 0.816
	noisy classifier: 0.672
	expert 1: 0.904
	expert 2: 0.88
	expert 3: 0.848
	expert 4: 0.932
	expert 5: 0.84


In [7]:
# create data dict for INFEXP model
y_h = np.array(df[['expert'+str(i+1) for i in range(n_experts)]])
y_h = y_h.reshape((n_experts,len(df)))
d_new = np.array(df['consensus'])

y_m_new = np.array([[1 - s, s] for s in df['model_pred']])
model_confs = np.array([y_m_new])
model_preds = np.array([[np.argmax(i) for i in j] for j in model_confs])

df['model_correct'] = df['model_pred_int']==df['consensus']
model_perf = np.array([[df['model_correct'].mean()]])
class_wise_perf = np.array(
    df.groupby(
        'consensus'
    ).aggregate(
        {'model_correct':'mean'}
    )['model_correct']
)

n_models = 1
model_preds_dict_new = {
    'model_confs' : model_confs,
    'model_preds' : model_preds,
    'true_targets' : d_new,
    'expert_preds' : y_h,
    'chosen_models' : np.array([0]),
    'model_perf' : model_perf,
    'model_perf_per_class' : class_wise_perf
}

with open('nih_infexp.pickle', 'wb') as handle:
    pickle.dump(model_preds_dict_new, handle, protocol=pickle.HIGHEST_PROTOCOL)