In [1]:
import pandas as pd
import pickle
import numpy as np
from scipy import stats

In [2]:
# funcs for combining expert opinions
def get_minority_opinion(x):
    modes = stats.mode(x)[0]
    for val in x:
        if val not in modes:
            return val
    return np.random.choice(list(x))

def add_minority(df, start, end):
    col_name = 'minority{}-{}'.format(start, end)
    include_vals = [i for i in range(start, end+1)]
    df[col_name] = df.apply(
        lambda x: get_minority_opinion(x[include_vals]), axis=1
    )

def add_consensus(df, start, end):
    col_name = 'consensus{}-{}'.format(start, end)
    include_vals = [i for i in range(start, end+1)]
    df[col_name] = df.apply(lambda x: stats.mode(x[include_vals])[0][0], axis=1)
    
def create_experts(row, exp1_c, exp2_c, exp3_c):
    if row['consensus'] in exp1_c:
        row['expert1'] = row['consensus0-10']
    else:
        row['expert1'] = row['minority0-15']
    if row['consensus'] in exp2_c:
        row['expert2'] = row['consensus16-26']
    else:
        row['expert2'] = row['minority16-32'] 
    if row['consensus'] in exp3_c:
        row['expert3'] = row['consensus33-43']
    else:
        row['expert3'] = row['minority33-48']
    return row

In [3]:
# funcs for merging classes into super-classes
def combine_classes(row, class1, class2, class3):
    for col in ['expert1', 'expert2', 'expert3','consensus']:
        if row[col] in class1:
            row[col] = 0
        elif row[col] in class2:
            row[col] = 1
        else:
            row[col] = 2
    return row

def combine_model_classes(row, c1, c2, c3):
    m1 = row[0]
    c1_prob = min(1.0, sum(m1[c1]))
    c2_prob = min(1.0, sum(m1[c2]))
    c3_prob = min(1.0, sum(m1[c3]))
    return [[c1_prob, c2_prob, c3_prob]]

In [4]:
# create dataframe of expert and model predictions
# new classes (e.g., class c1 is original classes 0, 1, and 2)
c1 = [0,1,2]
c2 = [3,4,5]
c3 = [6,7,8,9]
n_experts = 3

expert_preds = pd.read_pickle(r'cifar_raw_annotations.pkl')
df = pd.DataFrame(expert_preds)
df['consensus'] = df.apply(lambda x: stats.mode(x)[0][0], axis=1)

d = pd.read_pickle(r'cifar_raw_model_data.pkl')
chosen_model = 55 # used 28, 46, 55
n_models = 1
chosen_model_preds = d['model_confs'][chosen_model]
y_m = chosen_model_preds.reshape((10000,n_models,10))
y_m_new = np.array([combine_model_classes(x, c1, c2, c3) for x in y_m])
for i in range(3):
    df['model_p'+str(i)] = y_m_new[:,0,i]
df['model_pred_int'] = np.argmax([df['model_p'+str(i)] for i in range(3)], axis=0)

  df['consensus'] = df.apply(lambda x: stats.mode(x)[0][0], axis=1)


In [5]:
# create experts

# expert 1 (annotations 0-15)
add_consensus(df, 0, 10)
add_minority(df, 0, 15)
# expert 2 (annotations 16-32)
add_consensus(df, 16, 26)
add_minority(df, 16, 32)
# expert 3 (annotations 33-48)
add_consensus(df, 33, 43)
add_minority(df, 33, 48)

df = df.apply(lambda x: create_experts(x, c1 + c3, c1 + c2, c2 + c3), axis=1)
df = df.apply(lambda x: combine_classes(x, c1, c2, c3), axis=1)

  df[col_name] = df.apply(lambda x: stats.mode(x[include_vals])[0][0], axis=1)
  modes = stats.mode(x)[0]


In [6]:
df = df[['expert1', 'expert2', 'expert3', 'consensus', 'model_p0', 'model_p1', 'model_p2', 'model_pred_int']].copy()
for c in ['expert1', 'expert2', 'expert3', 'consensus']:
    df[c] = df[c].astype(int)
df.to_csv('cifar_processed.csv')

In [7]:
df.head()

Unnamed: 0,expert1,expert2,expert3,consensus,model_p0,model_p1,model_p2,model_pred_int
0,0,1,1,1,8.721072e-09,1.0,2.001451e-09,1.0
1,2,2,2,2,9.041309e-05,1.120718e-08,0.9999095,2.0
2,2,2,2,2,0.1709112,9.374868e-07,0.8290878,2.0
3,0,0,2,0,0.347034,0.5637769,0.08918913,1.0
4,2,0,2,2,9.286677e-08,2.337285e-07,0.9999996,2.0


In [8]:
# get accuracies
n = 750
print('accuracy on test set (n={}):'.format(n))

df['model_correct'] = df['model_pred_int']==df['consensus']
test_accuracy = np.mean(df[:n]['model_correct'])
class_wise_accs = list(df[:n].groupby('consensus').aggregate(
        {'model_correct':'mean'}
)['model_correct'])
print("\tclassifier (overall): {}".format(test_accuracy))
print("\t\t " + str(class_wise_accs))

for e in range(n_experts):
    e_corr_col = 'expert{}_correct'.format(e+1)
    df[e_corr_col] = df['expert'+str(e+1)]==df['consensus']
    expert_acc = sum(df[:n]['expert'+str(e+1)]==df[:n]['consensus'])/n
    class_wise_accs = list(df[:n].groupby('consensus').aggregate(
            {e_corr_col:'mean'}
    )[e_corr_col])
    print ("\texpert {}: {}".format(e+1, expert_acc))
    print("\t\t " + str(class_wise_accs))

accuracy on test set (n=750):
	classifier (overall): 0.9226666666666666
		 [0.821256038647343, 0.9620853080568721, 0.9608433734939759]
	expert 1: 0.9386666666666666
		 [1.0, 0.7867298578199052, 0.9969879518072289]
	expert 2: 0.9093333333333333
		 [1.0, 1.0, 0.7951807228915663]
	expert 3: 0.9
		 [0.642512077294686, 1.0, 0.9969879518072289]


In [9]:
# save data dict for our model
y_h = np.array(df[['expert1','expert2','expert3']]) + 1
y_m = [[df['model_p'+str(i)]] for i in range(3)]
y_m = np.array(y_m).reshape((10000, 1, 3))

out_dict = {
    'Y_H' : y_h.tolist(),
    'Y_M' : y_m.tolist(),
    'n_models': n_models,
    'n_humans': n_experts,
    'K': 3
}

with open('data.pickle', 'wb') as handle:
    pickle.dump(out_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [10]:
# create data dict for INFEXP model
for start_point in [0, 250, 500]:
    y_h = np.array(df[['expert'+str(i+1) for i in range(n_experts)]])
    y_h = y_h.reshape((n_experts,len(df)))
    d_new = np.array(df['consensus'])

    y_m_new = np.array([df['model_p'+str(i)] for i in range(3)])
    model_confs = np.array([y_m_new])
    model_preds = np.array([[np.argmax(i) for i in j] for j in model_confs])

    df['model_correct'] = df['model_pred_int']==df['consensus']
    model_perf = np.array([[df['model_correct'].mean()]])
    class_wise_perf = np.array(
        df.groupby(
            'consensus'
        ).aggregate(
            {'model_correct':'mean'}
        )['model_correct']
    )

    n_models = 1
    n_tests = 250
    model_preds_dict_new = {
        'model_confs' : model_confs[:,start_point:start_point+n_tests],
        'model_preds' : model_preds[:,start_point:start_point+n_tests],
        'true_targets' : d_new[start_point:start_point+n_tests],
        'expert_preds' : y_h[:,start_point:start_point+n_tests],
        'chosen_models' : np.array([0]),
        'model_perf' : model_perf,
        'model_perf_per_class' : class_wise_perf
    }

    with open('cifar_infexp{}.pickle'.format(start_point), 'wb') as handle:
        pickle.dump(model_preds_dict_new, handle, protocol=pickle.HIGHEST_PROTOCOL)