In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy import stats
import calibration as cal

In [2]:
def get_minority_opinion(row, cols):
    votes = [v for v in row[cols].tolist() if pd.notnull(v)]
    modes = stats.mode(votes, keepdims=True)[0]
    for val in votes:
        if val not in modes:
            return val
    return np.random.choice(list(votes))

def add_minority(df, use_cols, start, end, noise_levels=[110,125]):
    col_name = '_minority{}-{}'.format(start, end)
    include_vals = [i for i in use_cols if int(i[0])<end and int(i[0])>=start]
    include_vals = [i for i in include_vals if int(i[1]) in noise_levels]
    df[col_name] = df.apply(
        lambda x: get_minority_opinion(x, include_vals), axis=1
    )

def add_consensus(df, use_cols, start, end,noise_levels=[110,125]):
    def find_consensus(row, cols):
        votes = [v for v in row[cols].tolist() if pd.notnull(v)]
        return stats.mode(votes, keepdims=True)[0][0]
    col_name = '_consensus{}-{}'.format(start, end)
    include_vals = [i for i in use_cols if int(i[0])<end and int(i[0])>start]
    include_vals = [i for i in include_vals if int(i[1]) in noise_levels]
    df[col_name] = df.apply(lambda x: find_consensus(x, include_vals), axis=1)
    
def create_experts(row, exp1_c, exp2_c, exp3_c, s1, e1, s2, e2, s3, e3):
    if row['_consensus'] in exp1_c:
        row['expert1'] = row['_consensus{}-{}'.format(s1, e1)]
    else:
        row['expert1'] = row['_minority{}-{}'.format(s1, e1)]
    if row['_consensus'] in exp2_c:
        row['expert2'] = row['_consensus{}-{}'.format(s2, e2)]
    else:
        row['expert2'] = row['_minority{}-{}'.format(s2, e2)] 
    if row['_consensus'] in exp3_c:
        row['expert3'] = row['_consensus{}-{}'.format(s3, e3)]
    else:
        row['expert3'] = row['_minority{}-{}'.format(s3, e3)]
    return row

In [3]:
def get_consensus(y):
    most = max(list(map(y.count, y)))
    modes = list(set(filter(lambda x: y.count(x) == most, y)))
    if len(modes) > 1:
        return None
    return modes[0]

def get_consensus_df(row):
    y = row.tolist()
    most = max(list(map(y.count, y)))
    modes = list(set(filter(lambda x: y.count(x) == most, y)))
    if len(modes) > 1:
        return None
    return modes[0]

def process_expert_votes(row):
    votes = [v for v in row.tolist() if pd.notnull(v)]
    row['_consensus'] = get_consensus(votes)
    return row

In [4]:
class1 = ['clock', 'knife', 'oven', 'chair', 'bottle', 'keyboard']
class2 = ['cat', 'elephant', 'dog', 'bird', 'bear']
class3 = ['airplane', 'boat', 'car', 'truck', 'bicycle']

def convert_to_tri_class(x, c1, c2, c3):
    if x in c1:
        c = 0
    elif x in c2:
        c = 1
    else:
        assert x in c3
        c = 2
    return c

def convert_prob_to_tri_class(row, c1, c2, c3):
    c1_sum = 0
    for v in c1:
        c1_sum += row[v]
    c2_sum = 0
    for v in c2:
        c2_sum += row[v]
    c3_sum = 0
    for v in c3:
        c3_sum += row[v]
    row['model_p0'] = min(1.0, c1_sum)
    row['model_p1'] = min(1.0, c2_sum)
    row['model_p2'] = min(1.0, c3_sum)
    return row

In [5]:
# create dataframe of expert and model predictions
n_experts = 3
df_model = pd.read_csv("model_preds_raw.csv")
df_human = pd.read_csv("annotations_raw.csv")

df_model = df_model[df_model['noise_level']==120]

df_human = df_human[[
 'participant_id', 'image_id', 'image_name', 'noise_level', 'image_category',
 'participant_classification', 'confidence', 'correct', 'total_accuracy'
]]
for c in ['participant_classification', 'image_category']:
    df_human[c+"_new"] = df_human[c].apply(
        convert_to_tri_class, args=(class1, class2, class3,)
    )
    
df_model = df_model.apply(
    convert_prob_to_tri_class, args=(class1, class2, class3,), axis=1
)

model_name='alexnet'
dn_df = df_model[df_model['model_name']==model_name].copy()
dn_df = dn_df[['image_name', 'noise_level','model_p0','model_p1', 'model_p2']]

In [6]:
df_human = df_human.drop_duplicates(subset=['participant_id','noise_level','image_name'])
df_human_p = df_human.pivot(
    columns=['participant_id','noise_level'],
    index='image_name',
    values='participant_classification_new'
)

In [7]:
use_cols = df_human_p.columns
df_human_f = df_human_p.apply(process_expert_votes, axis=1)

start1 = 0; end1 = 67
start2 = 67; end2 = 133
start3 = 133; end3 = 200

add_consensus(df_human_f,use_cols,start1,end1,noise_levels=[95,110])
add_minority(df_human_f,use_cols,start1,end1,noise_levels=[95,110])
add_consensus(df_human_f,use_cols,start2,end2,noise_levels=[110,125])
add_minority(df_human_f,use_cols,start2,end2,noise_levels=[110,125])
add_consensus(df_human_f,use_cols,start3,end3,noise_levels=[95,110])
add_minority(df_human_f,use_cols,start3,end3,noise_levels=[95,110])

In [8]:
df_human_f.columns = [
    str(i[0])+'-'+str(i[1]) if i[1]!='' else str(i[0]) 
        for i in df_human_f.columns
]
df_human_f = df_human_f[pd.notnull(df_human_f["_consensus"])]

c1=[0]; c2=[1]; c3=[2]
df_human_f = df_human_f.apply(lambda x: create_experts(
    x, c1 + c3, c1 + c2, c2 + c3,
    start1, end1, start2, end2, start3, end3
), axis=1)

In [9]:
df_human_f['consensus'] = df_human_f[['expert1','expert2','expert3']].apply(
    get_consensus_df, axis=1
)
df_human_f.reset_index(inplace=True)
df_human_f = df_human_f[pd.notnull(df_human_f["consensus"])]
df_human_f['consensus'] = df_human_f['consensus'].astype(int)
cols = ['expert1','expert2','expert3']
for col in cols:
    df_human_f[col] = df_human_f[col].astype(int)
df_human_final = df_human_f[['image_name','expert1','expert2','expert3','consensus']]
df = df_human_final.merge(dn_df, on=['image_name'], how='inner')

In [10]:
# shuffle rows
np.random.seed(1)
order = np.array([i for i in range(len(df))])
np.random.shuffle(order)
df = df.loc[order]
df.reset_index(inplace=True, drop=True)


df.to_csv('data_clean.csv')
df.head()

Unnamed: 0,image_name,expert1,expert2,expert3,consensus,noise_level,model_p0,model_p1,model_p2
0,n04099969_6944,0,0,0,0,120,0.79481,0.165056,0.040133
1,n02504013_1455,1,1,2,1,120,0.012092,0.984549,0.003358
2,n04111531_10555,0,2,0,0,120,0.916009,0.003535,0.080456
3,n02504013_6118,1,1,1,1,120,0.007704,0.990873,0.001423
4,n02132136_7584,1,1,1,1,120,0.016555,0.978589,0.004857


In [19]:
# create distribution-shift versions
df_model = pd.read_csv("model_preds_raw.csv")
df_model = df_model.apply(
    convert_prob_to_tri_class, args=(class1, class2, class3,), axis=1
)
model_name='alexnet'
dn_df = df_model[df_model['model_name']==model_name].copy()
dn_df = dn_df[['image_name', 'noise_level','model_p0','model_p1', 'model_p2']]
dn_df_before_ds = dn_df[dn_df['noise_level']==80].reset_index(drop=True)
dn_df_after_ds = dn_df[dn_df['noise_level']==125].reset_index(drop=True)

start1 = 0; end1 = 67
start2 = 67; end2 = 133
start3 = 133; end3 = 200

df_human_base = df_human_p.apply(process_expert_votes, axis=1)
df_human_f_before = df_human_base.copy()
df_human_f_after = df_human_base.copy()

for dat, noise_level in zip([df_human_f_before, df_human_f_after],[80, 125]):
    add_consensus(dat,use_cols,start1,end1,noise_levels=[noise_level])
    add_minority(dat,use_cols,start1,end1,noise_levels=[noise_level])
    add_consensus(dat,use_cols,start2,end2,noise_levels=[noise_level])
    add_minority(dat,use_cols,start2,end2,noise_levels=[noise_level])
    add_consensus(dat,use_cols,start3,end3,noise_levels=[noise_level])
    add_minority(dat,use_cols,start3,end3,noise_levels=[noise_level])
    
    dat.columns = [
        str(i[0])+'-'+str(i[1]) if i[1]!='' else str(i[0]) 
            for i in dat.columns
    ]

    dat = dat[pd.notnull(dat["_consensus"])]

    c1=[0]; c2=[1]; c3=[2]
    dat = dat.apply(lambda x: create_experts(
        x, c1 + c3, c1 + c2, c2 + c3,
        start1, end1, start2, end2, start3, end3
    ), axis=1)

    dat['consensus'] = dat[['expert1','expert2','expert3']].apply(
        get_consensus_df, axis=1
    )
    dat.reset_index(inplace=True)
    dat = dat[pd.notnull(dat["consensus"])]
    dat['consensus'] = dat['consensus'].astype(int)
    cols = ['expert1','expert2','expert3']
    for col in cols:
        print(col)
        dat = dat[pd.notnull(dat[col])]
        dat[col] = dat[col].astype(int)
    dat = dat[['image_name','expert1','expert2','expert3','consensus']]

df_before_ds = df_human_f_before.merge(dn_df_before_ds, on=['image_name'], how='inner')
df_after_ds = df_human_f_after.merge(dn_df_after_ds, on=['image_name'], how='inner')

df_before_ds.to_csv('data_before_ds_clean.csv')
df_after_ds.to_csv('data_after_ds_clean.csv')

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


expert1
expert2
expert3


ValueError: 'a' cannot be empty unless no samples are taken

In [None]:
# create data dict for INFEXP model
for start_point in [0, 250, 500]:
    y_h = np.array(df[['expert'+str(i+1) for i in range(n_experts)]])
    y_h = y_h.transpose()
    d_new = np.array(df['consensus'])

    y_m_new = np.array(df[['model_p'+str(i) for i in range(3)]])
    model_confs = np.array([y_m_new])
    model_preds = np.array([[np.argmax(i) for i in j] for j in model_confs])

    df['model_correct'] = df['model_pred_int']==df['consensus']
    model_perf = np.array([[df['model_correct'].mean()]])
    class_wise_perf = np.array(
        df.groupby(
            'consensus'
        ).aggregate(
            {'model_correct':'mean'}
        )['model_correct']
    )

    n_models = 1
    n_tests = 250
    infexp_dict = {
        'model_confs' : model_confs[:,start_point:start_point+n_tests],
        'model_preds' : model_preds[:,start_point:start_point+n_tests],
        'targets' : d_new[start_point:start_point+n_tests],
        'true_targets' : d_new[start_point:start_point+n_tests],
        'expert_preds' : y_h[:,start_point:start_point+n_tests],
        'chosen_models' : np.array([0]),
        'model_perf' : model_perf,
        'model_perf_per_class' : class_wise_perf
    }

    with open('imagenet_infexp{}_exp.pickle'.format(start_point), 'wb') as handle:
        pickle.dump(infexp_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)