In [1]:
import pandas as pd
import pickle
import numpy as np
from scipy import stats
import calibration as cal

In [7]:
# funcs for combining expert opinions
def get_minority_opinion(x):
    modes = stats.mode(x, keepdims=True)[0]
    for val in x:
        if val not in modes:
            return val
    return np.random.choice(list(x))

def add_minority(df, start, end):
    col_name = 'minority{}-{}'.format(start, end)
    include_vals = [i for i in range(start, end+1)]
    df[col_name] = df.apply(
        lambda x: get_minority_opinion(x[include_vals]), axis=1
    )

def add_consensus(df, start, end):
    col_name = 'consensus{}-{}'.format(start, end)
    include_vals = [i for i in range(start, end+1)]
    df[col_name] = df.apply(lambda x: stats.mode(
        x[include_vals], keepdims=True
    )[0][0], axis=1)
    
def create_experts(row, exp1_c, exp2_c, exp3_c):
    if row['consensus'] in exp1_c:
        row['expert1'] = row['consensus0-10']
    else:
        row['expert1'] = row['minority0-15']
    if row['consensus'] in exp2_c:
        row['expert2'] = row['consensus16-26']
    else:
        row['expert2'] = row['minority16-32'] 
    if row['consensus'] in exp3_c:
        row['expert3'] = row['consensus33-43']
    else:
        row['expert3'] = row['minority33-48']
    return row

In [8]:
# funcs for merging classes into super-classes
def combine_classes(row, class1, class2, class3):
    for col in ['expert1', 'expert2', 'expert3','consensus']:
        if row[col] in class1:
            row[col] = 0
        elif row[col] in class2:
            row[col] = 1
        else:
            row[col] = 2
    return row

def combine_model_classes(row, c1, c2, c3):
    m1 = row[0]
    c1_prob = min(1.0, sum(m1[c1]))
    c2_prob = min(1.0, sum(m1[c2]))
    c3_prob = min(1.0, sum(m1[c3]))
    return [[c1_prob, c2_prob, c3_prob]]

In [9]:
# create dataframe of expert and model predictions
# new classes (e.g., class c1 is original classes 0, 1, and 2)
c1 = [0,1,2]
c2 = [3,4,5]
c3 = [6,7,8,9]
n_experts = 3

expert_preds = pd.read_pickle(r'cifar_raw_annotations.pkl')
df = pd.DataFrame(expert_preds)
df['consensus'] = df.apply(lambda x: stats.mode(x, keepdims=True)[0][0], axis=1)

d = pd.read_pickle(r'cifar_raw_model_data.pkl')
chosen_model = 55 # used 28, 46, 55
n_models = 1
chosen_model_preds = d['model_confs'][chosen_model]
y_m = chosen_model_preds.reshape((10000,n_models,10))
y_m_new = np.array([combine_model_classes(x, c1, c2, c3) for x in y_m])
for i in range(3):
    df['model_p'+str(i)] = y_m_new[:,0,i]

In [10]:
# create experts

# expert 1 (annotations 0-15)
add_consensus(df, 0, 10)
add_minority(df, 0, 15)
# expert 2 (annotations 16-32)
add_consensus(df, 16, 26)
add_minority(df, 16, 32)
# expert 3 (annotations 33-48)
add_consensus(df, 33, 43)
add_minority(df, 33, 48)

df = df.apply(lambda x: create_experts(x, c1 + c3, c1 + c2, c2 + c3), axis=1)
df = df.apply(lambda x: combine_classes(x, c1, c2, c3), axis=1)

In [6]:
df = df[['expert1', 'expert2', 'expert3', 'consensus', 'model_p0', 'model_p1', 'model_p2']].copy()

for c in ['expert1', 'expert2', 'expert3', 'consensus']:
    df[c] = df[c].astype(int)

def has_true_consensus(row):
    if row['expert1']!=row['expert2'] and row['expert2']!=row['expert3'] and row['expert1']!=row['expert3']:
        return False
    return True

cons = df.apply(has_true_consensus, axis=1)
df = df[cons]
df.to_csv('data_clean.csv')
df.head()

10000
