In [60]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import pandas as pd
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Activation, Dropout, Flatten, Dense, merge, Reshape, Permute, Multiply, Dot,dot, Concatenate, Add
from keras.layers import Input
from keras import backend as K
from keras.engine.topology import Layer
import keras as keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os
# packages for learning from crowds
from crowd_layer.crowd_layers import CrowdsClassification, MaskedMultiCrossEntropy, CrowdsClassificationSModel, \
    CrowdsClassificationCModelSingleWeight, CrowdsClassificationCModel, MaskedMultiCrossEntropyCosSim, \
    MaskedMultiCrossEntropyBaseChannel, MaskedMultiCrossEntropyBaseChannelConst, CrowdsClassificationSModelChannelMatrix, \
    MaskedMultiCrossEntropyCurriculumChannelMatrix
from crowd_layer.crowd_aggregators import CrowdsCategoricalAggregator

from numpy.random import seed
seed(1)

from tensorflow import set_random_seed
set_random_seed(2)

In [10]:
# prevent tensorflow from allocating the entire GPU memory at once
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

In [11]:
# Prepare data
def load_data(filename):
    f = open(filename, 'rb')
    data = np.load(f)
    f.close()
    return data

def one_hot(target, n_classes):
    targets = np.array([target]).reshape(-1)
    one_hot_targets = np.eye(n_classes)[targets]
    return one_hot_targets

def get_data(DATA_PATH, N_CLASSES):
    
    print("\nLoading train data...")
    # images processed by VGG16
    data_train_vgg16 = load_data(DATA_PATH+"data_train.npy")
    print(data_train_vgg16.shape)

    # ground truth labels
    labels_train = load_data(DATA_PATH+"labels_train.npy")
    print(labels_train.shape)

    # labels obtained from majority voting
    labels_train_mv = load_data(DATA_PATH+"labels_train_mv.npy")
    print(labels_train_mv.shape)

#     # labels obtained by using the approach by Dawid and Skene
#     labels_train_ds = load_data(DATA_PATH+"labels_train_DS.npy")
#     print(labels_train_ds.shape)

    # data from Amazon Mechanical Turk
    print("\nLoading AMT data...")
    answers = load_data(DATA_PATH+"answers.npy")
    print(answers.shape)
    N_ANNOT = answers.shape[1]
    print("N_CLASSES:", N_CLASSES)
    print("N_ANNOT:", N_ANNOT)

    # load test data
    print("\nLoading test data...")

    # images processed by VGG16
    data_test_vgg16 = load_data(DATA_PATH+"data_test.npy")
    print(data_test_vgg16.shape)

    # test labels
    labels_test = load_data(DATA_PATH+"labels_test.npy")
    print(labels_test.shape)

    print("\nLoading validation data...")
    # images processed by VGG16
    data_valid_vgg16 = load_data(DATA_PATH+"data_valid.npy")
    print(data_valid_vgg16.shape)

    # validation labels
    labels_valid = load_data(DATA_PATH+"labels_valid.npy")
    print(labels_valid.shape)

    labels_train_bin = one_hot(labels_train, N_CLASSES)
    labels_train_mv_bin = one_hot(labels_train_mv, N_CLASSES)
#     labels_train_ds_bin = one_hot(labels_train_ds, N_CLASSES)
#     print(labels_train_ds_bin.shape)
    labels_test_bin = one_hot(labels_test, N_CLASSES)
    labels_valid_bin = one_hot(labels_valid, N_CLASSES)


    answers_bin_missings = []
    for i in range(len(answers)):
        row = []
        for r in range(N_ANNOT):
            if answers[i,r] == -1:
                row.append(-1 * np.ones(N_CLASSES))
            else:
                row.append(one_hot(answers[i,r], N_CLASSES)[0,:])
        answers_bin_missings.append(row)
    answers_bin_missings = np.array(answers_bin_missings).swapaxes(1,2)

    answers_test_bin_missings = np.zeros((len(labels_test), N_CLASSES))
    answers_test_bin_missings[np.arange(len(labels_test)), labels_test] = 1
    answers_test_bin_missings = np.repeat(answers_test_bin_missings.reshape([len(labels_test),N_CLASSES,1]), N_ANNOT, axis=2)

    answers_valid_bin_missings = np.zeros((len(labels_valid), N_CLASSES))
    answers_valid_bin_missings[np.arange(len(labels_valid)), labels_valid] = 1
    answers_valid_bin_missings = np.repeat(answers_valid_bin_missings.reshape([len(labels_valid),N_CLASSES,1]), N_ANNOT, axis=2)
    
    x = {'train': data_train_vgg16, 'test': data_test_vgg16, 'val': data_valid_vgg16}
    y_gt = {'train': labels_train_bin, 'test': labels_test_bin, 'val': labels_valid_bin}
    y_annot = {'train': answers_bin_missings, 'test': answers_test_bin_missings, 'val': answers_valid_bin_missings, 'mv':labels_train_mv_bin}
    return x, y_gt, y_annot

In [12]:
def get_data_with_sample(DATA_PATH, N_CLASSES):
    
    print("\nLoading train data...")
    # images processed by VGG16
    data_train_vgg16 = load_data(DATA_PATH+"data_train_vgg16.npy")
    print(data_train_vgg16.shape)

    # ground truth labels
    labels_train = load_data(DATA_PATH+"labels_train.npy")
    print(labels_train.shape)

    # labels obtained from majority voting
    labels_train_mv = load_data(DATA_PATH+"labels_train_mv.npy")
    print(labels_train_mv.shape)

#     # labels obtained by using the approach by Dawid and Skene
#     labels_train_ds = load_data(DATA_PATH+"labels_train_DS.npy")
#     print(labels_train_ds.shape)



    # data from Amazon Mechanical Turk
    print("\nLoading AMT data...")
    answers = load_data(DATA_PATH+"answers.npy")
    print(answers.shape)
    N_ANNOT = answers.shape[1]
    print("N_CLASSES:", N_CLASSES)
    print("N_ANNOT:", N_ANNOT)

    # load test data
    print("\nLoading test data...")

    # images processed by VGG16
    data_test_vgg16 = load_data(DATA_PATH+"data_test_vgg16.npy")
    print(data_test_vgg16.shape)

    # test labels
    labels_test = load_data(DATA_PATH+"labels_test.npy")
    print(labels_test.shape)

    print("\nLoading validation data...")
    # images processed by VGG16
    data_valid_vgg16 = load_data(DATA_PATH+"data_valid_vgg16.npy")
    print(data_valid_vgg16.shape)

    # validation labels
    labels_valid = load_data(DATA_PATH+"labels_valid.npy")
    print(labels_valid.shape)

    labels_train_bin = one_hot(labels_train, N_CLASSES)
    labels_train_mv_bin = one_hot(labels_train_mv, N_CLASSES)
#     labels_train_ds_bin = one_hot(labels_train_ds, N_CLASSES)
#     print(labels_train_ds_bin.shape)
    labels_test_bin = one_hot(labels_test, N_CLASSES)
    labels_valid_bin = one_hot(labels_valid, N_CLASSES)


    answers_bin_missings = []
    for i in range(len(answers)):
        row = []
        for r in range(N_ANNOT):
            if answers[i,r] == -1:
                row.append(-1 * np.ones(N_CLASSES))
            else:
                row.append(one_hot(answers[i,r], N_CLASSES)[0,:])
        answers_bin_missings.append(row)
    answers_bin_missings = np.array(answers_bin_missings).swapaxes(1,2)

    answers_test_bin_missings = np.zeros((len(labels_test), N_CLASSES))
    answers_test_bin_missings[np.arange(len(labels_test)), labels_test] = 1
    answers_test_bin_missings = np.repeat(answers_test_bin_missings.reshape([len(labels_test),N_CLASSES,1]), N_ANNOT, axis=2)

    answers_valid_bin_missings = np.zeros((len(labels_valid), N_CLASSES))
    answers_valid_bin_missings[np.arange(len(labels_valid)), labels_valid] = 1
    answers_valid_bin_missings = np.repeat(answers_valid_bin_missings.reshape([len(labels_valid),N_CLASSES,1]), N_ANNOT, axis=2)
    
    x = {'train': data_train_vgg16, 'test': data_test_vgg16, 'val': data_valid_vgg16}
    y_gt = {'train': labels_train_bin, 'test': labels_test_bin, 'val': labels_valid_bin}
    y_annot = {'train': answers_bin_missings, 'test': answers_test_bin_missings, 'val': answers_valid_bin_missings, 'mv':labels_train_mv_bin}

    data_train_vgg16_sample = []
    labels_train_sample = []
    answers_mix = []
    for pct in range(1,6):
        data_train_vgg16_sample_dict = x.copy()
        data_train_vgg16_sample_dict['train'] = load_data(DATA_PATH+'data_train_vgg16_%dpct.npy'%pct)
        data_train_vgg16_sample.append(data_train_vgg16_sample_dict)
        labels_train_sample.append(load_data(DATA_PATH+'labels_train_%dpct.npy'%pct))
        answers_mix.append(load_data(DATA_PATH+'data_train_mix_%dpct.npy'%pct))
    
    labels_train_sample_bin_missings_list = []
    answers_mix_bin_missings_list = []
    for j in range(5):
        labels_train_sample_bin = y_gt.copy()
        labels_train_sample_bin['train'] = one_hot(labels_train_sample[j], N_CLASSES)
        labels_train_sample_bin_missings_list.append(labels_train_sample_bin)
        answers_mix_bin_missings = y_annot.copy()
        answers_mix_bin_list = []
        for i in range(len(answers)):
            row = []
            for r in range(N_ANNOT):
                if answers_mix[j][i,r] == -1:
                    row.append(-1 * np.ones(N_CLASSES))
                else:
                    row.append(one_hot(answers_mix[j][i,r], N_CLASSES)[0,:])
            answers_mix_bin_list.append(row)
        answers_mix_bin_missings['train'] = np.array(answers_mix_bin_list).swapaxes(1,2)
        
        answers_mix_bin_missings_list.append(answers_mix_bin_missings)
        
    x_sample = dict(zip(range(1,6), data_train_vgg16_sample))
    y_sample = dict(zip(range(1,6), labels_train_sample_bin_missings_list))
    y_annot_mix = dict(zip(range(1,6), answers_mix_bin_missings_list))
    
    return x, y_gt, y_annot, x_sample, y_sample, y_annot_mix

In [13]:
# Build model
def eval(model,x_test, y_test):
    print('Test dataset results: ')
    mets = dict(zip(model.metrics_names,model.evaluate(x_test,y_test, verbose=False)))
    print(mets)
    return mets


def get_trace(model):

    channel_matrix = model.get_weights()[-1]
    channel_matrix_trace = tf.trace(K.permute_dimensions(channel_matrix, [2, 0, 1]))
    channel_matrix_trace_arr = K.eval(channel_matrix_trace)
    return channel_matrix_trace_arr


def print_single_loss(model):
    import matplotlib.pyplot as plt

    # list all data in history
    print(model.history.keys())
    # summarize history for accuracy
    plt.plot(model.history['baseline_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.show()
    # summarize history for loss
    plt.plot(model.history['baseline_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.show()


def print_history(df, title):
    import matplotlib.pyplot as plt
    # Make a data frame
    df['x'] = range(df.shape[0])

    # style
    plt.style.use('seaborn-darkgrid')

    # create a color palette
    palette = plt.get_cmap('Set1')

    # multiple line plot
    num = 0
    for column in df.drop('x', axis=1):
        num += 1
        plt.plot(df['x'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)

    # Add legend
    plt.legend(loc=2, ncol=2)

    # Add titles
    plt.title(title, loc='left', fontsize=12, fontweight=0, color='orange')
    plt.xlabel("Time")
    plt.ylabel("Score")
    plt.savefig(title+'.png')

In [14]:
def build_base_model(train_data_shape, N_CLASSES):
    base_model = Sequential()
    base_model.add(Flatten(input_shape=train_data_shape[1:]))
    base_model.add(Dense(128, activation='relu'))
    base_model.add(Dropout(0.5))
#     base_model.add(Dense(64, activation='relu'))
#     base_model.add(Dropout(0.4))

    base_model.add(Dense(N_CLASSES))
    base_model.add(Activation('softmax'))
    base_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return base_model

In [15]:
def majority_vote(x, y_gt, y_annot, N_CLASSES, model_path):
    train_data_shape = x['train'].shape
    N_ANNOT = y_annot['train'].shape[2]
    baseline_model = build_base_model(train_data_shape, N_CLASSES)
    checkpoint = ModelCheckpoint(model_path, verbose=1, monitor='val_acc',
                                 save_best_only=True, mode='auto')  
    callbacks = [EarlyStopping(monitor='val_acc', patience=5)]

    history = baseline_model.fit(x['train'], y_annot['mv'], validation_data=(x['val'], y_gt['val']), 
                        epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    mets = eval(baseline_model, x['test'], y_test=y_gt['test'])
    return history, mets

In [16]:
def baseline_gt(x, y_gt, N_CLASSES, model_path):
    train_data_shape = x['train'].shape
    baseline_model = build_base_model(train_data_shape, N_CLASSES)
    checkpoint = ModelCheckpoint(model_path, verbose=1, monitor='val_acc',
                                 save_best_only=True, mode='auto')  
#     callbacks = [EarlyStopping(monitor='val_acc', patience=5)]

    history = baseline_model.fit(x['train'], y_gt['train'], validation_data=(x['val'], y_gt['val']), 
                        epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    mets = eval(baseline_model, x['test'], y_test=y_gt['test'])
    return history, mets

In [29]:
def build_base_crowd_model(train_data_shape, N_CLASSES, N_ANNOT, softmax, trace):
    base_model = Sequential()
    base_model.add(Flatten(input_shape=train_data_shape[1:]))
    base_model.add(Dense(128, activation='relu'))
    base_model.add(Dropout(0.5))
#     base_model.add(Dense(64, activation='relu'))
#     base_model.add(Dropout(0.4))

    train_inputs = Input(shape=(train_data_shape[1:]))
    last_hidden = base_model(train_inputs)
    baseline_output = Dense(N_CLASSES, activation='softmax', name='baseline')(last_hidden)

    if softmax:
        channel_layer = CrowdsClassificationSModelChannelMatrix(N_CLASSES, N_ANNOT, name='CrowdLayer')
        channeled_output = channel_layer([last_hidden, baseline_output])
    else:
        channel_layer = CrowdsClassification(N_CLASSES, N_ANNOT)
        channeled_output = channel_layer(baseline_output)

    model = Model(inputs=train_inputs, outputs=[channeled_output, baseline_output])

    if trace:
        loss = MaskedMultiCrossEntropyCurriculumChannelMatrix(model, 1, 1).loss
    else:
        loss = MaskedMultiCrossEntropy().loss

    # compile model with masked loss and train
    model.compile(optimizer='adam',
                         loss=[loss, 'categorical_crossentropy'],
                         loss_weights=[1, 0],
                         metrics=['accuracy']
                        )
    return model

In [38]:
def crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_path):
    train_data_shape = x['train'].shape
    N_ANNOT = y_annot['train'].shape[2]

    model = build_base_crowd_model(train_data_shape, N_CLASSES, N_ANNOT, softmax, trace)    

    checkpoint = ModelCheckpoint(model_path, verbose=1, 
                                 monitor='val_baseline_acc',save_best_only=True, mode='auto')  
#     callbacks = [EarlyStopping(monitor='val_baseline_acc', patience=5)]

    history = model.fit(x['train'], [y_annot['train'], y_gt['train']], 
                        validation_data=(x['val'],[y_annot['val'], y_gt['val']]), 
                        epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)

    trace_arr = get_trace(model)
    weights = K.squeeze(weights)
    
    weights =  K.permute_dimensions(model.layers[-1].get_weights(), [2, 0, 1])

#     weights = K.permute_dimensions(model.get_weights()[-1], [2, 0, 1])
    mets = eval(model, x['test'], y_test=[y_annot['test'], y_gt['test']])
    return history, trace_arr, mets, weights

In [39]:
def crowd_model_pretrain_with_clean_data(x, y_gt, y_annot, x_sample, y_gt_sample, N_CLASSES, softmax, trace, model_path):
    train_data_shape = x['train'].shape
    N_ANNOT = y_annot['train'].shape[2]

    base_model = Sequential()
    base_model.add(Flatten(input_shape=train_data_shape[1:]))
    base_model.add(Dense(128, activation='relu'))
    base_model.add(Dropout(0.5))
#     base_model.add(Dense(64, activation='relu'))
#     base_model.add(Dropout(0.4))

    train_inputs = Input(shape=(train_data_shape[1:]))
    last_hidden = base_model(train_inputs)
    baseline_output = Dense(N_CLASSES, activation='softmax', name='baseline')(last_hidden)

    if softmax:
        channel_layer = CrowdsClassificationSModelChannelMatrix(N_CLASSES, N_ANNOT)
        channeled_output = channel_layer([last_hidden, baseline_output])
    else:
        channel_layer = CrowdsClassification(N_CLASSES, N_ANNOT)
        channeled_output = channel_layer(baseline_output)

    baseline_model = Model(inputs=train_inputs, outputs=baseline_output)

    # compile model with masked loss and train
    baseline_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['accuracy']
                        )
    
    history = baseline_model.fit(x_sample['train'], y_gt_sample['train'], epochs=N_EPOCHS, shuffle=True,
                              batch_size=BATCH_SIZE, verbose=0)
    
    model = Model(inputs=train_inputs, outputs=[channeled_output, baseline_output])
    
    if trace:
        loss = MaskedMultiCrossEntropyCurriculumChannelMatrix(model, 1, 1).loss
    else:
        loss = MaskedMultiCrossEntropy().loss
    # compile model with masked loss and train
    model.compile(optimizer='adam',
                         loss=[loss, 'categorical_crossentropy'],
                         loss_weights=[1, 0],
                         metrics=['accuracy']
                        )
    model.set_weights(baseline_model.get_weights()) 
    
    checkpoint = ModelCheckpoint(model_path, verbose=1, 
                                 monitor='val_baseline_acc',save_best_only=True, mode='auto')  
#     callbacks = [EarlyStopping(monitor='val_baseline_acc', patience=5)]

    history = model.fit(x['train'], [y_annot['train'], y_gt['train']], 
                        validation_data=(x['val'],[y_annot['val'], y_gt['val']]), 
                        epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)

    trace_arr = get_trace(model)
    mets = eval(model, x['test'], y_test=[y_annot['test'], y_gt['test']])
    return history, trace_arr, mets

In [40]:
N_CLASSES = 2
BATCH_SIZE = 64
N_EPOCHS = 35
N_RUNS = 1
DATA_PATH = "/home/yajingyang/Downloads/PetImages/annot_5_acc_0.65/"
x, y_gt, y_annot, x_sample, y_sample, y_annot_mix = get_data_with_sample(DATA_PATH, N_CLASSES)
N_ANNOT = y_annot['train'].shape[2]

model_dir = "/home/yajingyang/PycharmProjects/CrowdLayer/dogs_and_cats/0.2/"
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)
    
acc_mean = {}
acc_std = {}


Loading train data...
(12499, 4, 4, 512)
(12499,)
(12499,)

Loading AMT data...
(12499, 5)
N_CLASSES: 2
N_ANNOT: 5

Loading test data...
(6250, 4, 4, 512)
(6250,)

Loading validation data...
(6249, 4, 4, 512)
(6249,)


In [89]:
softmax=True
trace = False

print('\nCrowd noise adaptation model with softmax: %s, trace: %s' % (softmax, trace))
test_acc_list = []
acc_dict = {}
loss_dict = {}
trace_dict = {}
for i in range(3):
    history, trace_arr, mets, weights = crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_dir)
    acc_dict[i] = history.history['baseline_acc']
    loss_dict[i] = history.history['baseline_loss']
    trace_dict[i] = trace_arr
    print('weight: ', K.eval(weights))
    test_acc_list.append(mets['baseline_acc'])

# crowd_model_acc_list.append(acc_dict)
# crowd_model_loss_list.append(loss_dict)
# crowd_model_trace_list.append(trace_dict)
# test_acc = np.array(test_acc_list)
# print('acc: ', test_acc.mean(), test_acc.std())


Crowd noise adaptation model with softmax: True, trace: False
Train on 12499 samples, validate on 6249 samples
Epoch 1/2
Epoch 2/2
4
(8192, 128) (128,) (128, 2) (2,) (2, 2, 5)
Test dataset results: 
{'loss': 0.6095808312988281, 'crowds_classification_s_model_channel_matrix_14_loss': 0.6095808312988281, 'baseline_loss': 1.8743485459899902, 'crowds_classification_s_model_channel_matrix_14_acc': 0.08848000000953675, 'baseline_acc': 0.8800000000190735}
weight:  [[[0.48603305 0.17427188]
  [0.18650627 0.47510856]]

 [[0.4944124  0.17922316]
  [0.18784894 0.5043759 ]]

 [[0.4738534  0.17433158]
  [0.18749894 0.4849059 ]]

 [[0.4690315  0.16201812]
  [0.16568421 0.48169658]]

 [[0.49455354 0.166303  ]
  [0.16715573 0.46826968]]]
Train on 12499 samples, validate on 6249 samples
Epoch 1/2
Epoch 2/2
4
(8192, 128) (128,) (128, 2) (2,) (2, 2, 5)
Test dataset results: 
{'loss': 0.6168645944404602, 'crowds_classification_s_model_channel_matrix_15_loss': 0.6168645944404602, 'baseline_loss': 1.719024

In [None]:
def init_identities(shape, dtype=None):
    out = np.zeros(shape)
    for r in range(shape[2]):
        for i in range(shape[0]):
            out[i,i,r] = 1.0
    return out


class CrowdsClassification(Layer):

    def __init__(self, output_dim, num_annotators, conn_type="MW", **kwargs):
        self.output_dim = output_dim
        self.num_annotators = num_annotators
        self.conn_type = conn_type
        super(CrowdsClassification, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.conn_type == "MW":
            # matrix of weights per annotator
            self.kernel = self.add_weight("CrowdLayer", (self.output_dim, self.output_dim, self.num_annotators),
                                            initializer=init_identities,
                                            trainable=True)
        elif self.conn_type == "VW":
            # vector of weights (one scale per class) per annotator
            self.kernel = self.add_weight("CrowdLayer", (self.output_dim, self.num_annotators),
                                            initializer=keras.initializers.Ones(),
                                            trainable=True)
        elif self.conn_type == "VB":
            # two vectors of weights (one scale and one bias per class) per annotator
            self.kernel = []
            self.kernel.append(self.add_weight("CrowdLayer", (self.output_dim, self.num_annotators),
                                            initializer=keras.initializers.Zeros(),
                                            trainable=True))
        elif self.conn_type == "VW+B":
            # two vectors of weights (one scale and one bias per class) per annotator
            self.kernel = []
            self.kernel.append(self.add_weight("CrowdLayer", (self.output_dim, self.num_annotators),
                                            initializer=keras.initializers.Ones(),
                                            trainable=True))
            self.kernel.append(self.add_weight("CrowdLayer", (self.output_dim, self.num_annotators),
                                            initializer=keras.initializers.Zeros(),
                                            trainable=True))
        elif self.conn_type == "SW":
            # single weight value per annotator
            self.kernel = self.add_weight("CrowdLayer", (self.num_annotators,1),
                                            initializer=keras.initializers.Ones(),
                                            trainable=True)
        else:
            raise Exception("Unknown connection type for CrowdsClassification layer!")

        super(CrowdsClassification, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        if self.conn_type == "MW":
            res = K.dot(x, self.kernel)
        elif self.conn_type == "VW" or self.conn_type == "VB" or self.conn_type == "VW+B" or self.conn_type == "SW":
            out = []
            for r in range(self.num_annotators):
                if self.conn_type == "VW":
                    out.append(x * self.kernel[:,r])
                elif self.conn_type == "VB":
                    out.append(x + self.kernel[0][:,r])
                elif self.conn_type == "VW+B":
                    out.append(x * self.kernel[0][:,r] + self.kernel[1][:,r])
                elif self.conn_type == "SW":
                    out.append(x * self.kernel[r,0])
            res = tf.stack(out)
            if len(res.shape) == 3:
                res = tf.transpose(res, [1, 2, 0])
            elif len(res.shape) == 4:
                res = tf.transpose(res, [1, 2, 3, 0])
            else:
                raise Exception("Wrong number of dimensions for output")
        else:
            raise Exception("Unknown connection type for CrowdsClassification layer!")

        return res

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim, self.num_annotators)

In [90]:
def init_identities(shape, dtype=None):
    out = np.zeros(shape)
    for r in range(shape[2]):
        for i in range(shape[0]):
            out[i,i,r] = 1.0
    return out


def bias_weights(N_CLASSES, APRIOR_NOISE=0.4):
    log_bias_weights = (
        np.array([np.array([(1. - APRIOR_NOISE)
                            if i == j else
                            APRIOR_NOISE / (N_CLASSES - 1.)
                            for j in range(N_CLASSES)]) for i in
                  range(N_CLASSES)])
        + 0.01 * np.random.random((N_CLASSES, N_CLASSES)))
    return log_bias_weights


def init_bias(shape, dtype=None):
    out = np.zeros(shape)
    N_CLASSES = shape[0]
    for r in range(shape[2]):
        out[:,:,r] = log_bias_weights(N_CLASSES)
    return out


class CrowdsClassificationSModelChannelMatrix(Layer):

    def __init__(self, output_dim, num_annotators, conn_type="MW", softmax=False, **kwargs):
        self.output_dim = output_dim
        self.num_annotators = num_annotators
        self.conn_type = conn_type
        self.softmax = softmax
        super(CrowdsClassificationSModelChannelMatrix, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.conn_type == "MW":
            # matrix of weights per annotator
            self.kernel = []
            self.kernel.append(self.add_weight("CrowdLayer", (self.output_dim, self.output_dim, self.num_annotators),
                                initializer=init_log_bias,
                                trainable=True))
        else:
            raise Exception("Unknown connection type for CrowdsClassification layer!")

        super(CrowdsClassificationSModelChannelMatrix, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, inputs):
        if self.conn_type == "MW":
            channel_output_l = []
            channel_matrix_l = []
            for r in range(self.num_annotators):
                channel_matrix_w = self.kernel[0][:,:,r]
                channel_matrix_w_l = []
                for c in range(self.output_dim):
                    if self.softmax:
                        channel_matrix_w_c = K.softmax(channel_matrix_w[:,c])
                    else:
                        channel_matrix_w_c = channel_matrix_w[:,c]
                    channel_matrix_w_l.append(channel_matrix_w_c)
                channel_matrix_w = tf.stack(channel_matrix_w_l)
                channel_matrix_l.append(channel_matrix_w)
                channel_output_w = K.dot(inputs[1], channel_matrix_w)
                channel_output_w = K.dropout(channel_output_w, 0.4)
                channel_output_l.append(channel_output_w)
            channel_matrix = tf.stack(channel_matrix_l)
            channel_output = tf.stack(channel_output_l)
            channel_output = K.permute_dimensions(channel_output, (1,2,0))
            self.channel_matrix = channel_matrix

#             res = K.batch_dot(inputs[1], channel_matrix)
        else:
            raise Exception("Unknown connection type for CrowdsClassification layer!")

        return channel_output

    def get_channel_matrix(self):
        return self.channel_matrix

    def compute_output_shape(self, input_shape):
        return (input_shape[1][0], self.output_dim, self.num_annotators)



In [92]:
class MaskedMultiCrossEntropyCurriculumChannelMatrix(object):

    def __init__(self, model, a, b):
        self.t = tf.transpose(model.get_weights()[-1], perm=[2, 0, 1])
        self.a = a
        self.b = b

    def loss(self, y_true, y_pred):

        vec = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true, dim=1)
        # vec_base_channel = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=self.y_pred_broad, dim=1)
        trace = tf.trace(self.t)
        vec = vec - trace * self.b
        mask = tf.equal(y_true[:,0,:], -1)
        zer = tf.zeros_like(vec)
        loss = tf.where(mask, x=zer, y=vec)
        return loss
    
    
def build_base_crowd_model(train_data_shape, N_CLASSES, N_ANNOT, softmax, trace):
    base_model = Sequential()
    base_model.add(Flatten(input_shape=train_data_shape[1:]))
    base_model.add(Dense(128, activation='relu'))
    base_model.add(Dropout(0.5))
#     base_model.add(Dense(64, activation='relu'))
#     base_model.add(Dropout(0.4))

    train_inputs = Input(shape=(train_data_shape[1:]))
    last_hidden = base_model(train_inputs)
    baseline_output = Dense(N_CLASSES, activation='softmax', name='baseline')(last_hidden)

    channel_layer = CrowdsClassificationSModelChannelMatrix(N_CLASSES, N_ANNOT, softmax=True)
    channeled_output = channel_layer([last_hidden, baseline_output])
#     else:
#         channel_layer = CrowdsClassification(N_CLASSES, N_ANNOT)
#         channeled_output = channel_layer(baseline_output)

    model = Model(inputs=train_inputs, outputs=[channeled_output, baseline_output])

    if trace:
        loss = MaskedMultiCrossEntropyCurriculumChannelMatrix(model, 1, 1).loss
    else:
        loss = MaskedMultiCrossEntropy().loss

    # compile model with masked loss and train
    model.compile(optimizer='adam',
                         loss=[loss, 'categorical_crossentropy'],
                         loss_weights=[1, 0],
                         metrics=['accuracy']
                        )
    return model


def crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_path):
    train_data_shape = x['train'].shape
    N_ANNOT = y_annot['train'].shape[2]

    model = build_base_crowd_model(train_data_shape, N_CLASSES, N_ANNOT, softmax, trace)    

    checkpoint = ModelCheckpoint(model_path, verbose=1, 
                                 monitor='val_baseline_acc',save_best_only=True, mode='auto')  
#     callbacks = [EarlyStopping(monitor='val_baseline_acc', patience=5)]

    history = model.fit(x['train'], [y_annot['train'], y_gt['train']], 
                        validation_data=(x['val'],[y_annot['val'], y_gt['val']]), 
                        epochs=2, batch_size=BATCH_SIZE, verbose=1)

    trace_arr = get_trace(model)   
    print(len(model.layers))
    print(model.get_weights()[0].shape, model.get_weights()[1].shape, 
          model.get_weights()[2].shape, model.get_weights()[3].shape, model.get_weights()[4].shape)
    weights =  K.permute_dimensions(tf.squeeze(model.layers[-1].get_weights()[0]), [2, 0, 1])

#     weights = K.permute_dimensions(model.get_weights()[-1], [2, 0, 1])
    mets = eval(model, x['test'], y_test=[y_annot['test'], y_gt['test']])
    return history, trace_arr, mets, weights


softmax=True
trace = False

print('\nCrowd noise adaptation model with softmax: %s, trace: %s' % (softmax, trace))
test_acc_list = []
acc_dict = {}
loss_dict = {}
trace_dict = {}
for i in range(3):
    history, trace_arr, mets, weights = crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_dir)
    acc_dict[i] = history.history['baseline_acc']
    loss_dict[i] = history.history['baseline_loss']
    trace_dict[i] = trace_arr
    print('weight: ', K.eval(weights))
    test_acc_list.append(mets['baseline_acc'])


Crowd noise adaptation model with softmax: True, trace: False
Train on 12499 samples, validate on 6249 samples
Epoch 1/2
Epoch 2/2
4
(8192, 128) (128,) (128, 2) (2,) (2, 2, 5)
Test dataset results: 
{'loss': 0.6495717818832397, 'crowds_classification_s_model_channel_matrix_20_loss': 0.6495717818832397, 'baseline_loss': 2.0526993731021883, 'crowds_classification_s_model_channel_matrix_20_acc': 0.15424000001907348, 'baseline_acc': 0.870719999961853}
weight:  [[[0.7512073  0.25437877]
  [0.25861698 0.75383484]]

 [[0.7586201  0.24349381]
  [0.25106055 0.7640827 ]]

 [[0.74080896 0.26134732]
  [0.2665666  0.7565239 ]]

 [[0.7584698  0.25372905]
  [0.25788847 0.7484834 ]]

 [[0.7725573  0.25658098]
  [0.23345797 0.750489  ]]]
Train on 12499 samples, validate on 6249 samples
Epoch 1/2
Epoch 2/2
4
(8192, 128) (128,) (128, 2) (2,) (2, 2, 5)
Test dataset results: 
{'loss': 0.6480037514877319, 'crowds_classification_s_model_channel_matrix_21_loss': 0.6480037514877319, 'baseline_loss': 1.6445188

In [None]:
trace_dict[1]

### Baseline with clean data

In [None]:
print('\nBaseline model with clean data')
test_acc_list = []

for i in range(N_RUNS):
    clean_base_acc_df = pd.DataFrame()
    clean_base_loss_df = pd.DataFrame()
    filepath="weights.best.hdf5"
    model_path = model_dir + filepath
    clean_history, mets = baseline_gt(x, y_gt, N_CLASSES, model_path)
    clean_base_acc_df.loc[:, i] = clean_history.history['acc']
    clean_base_loss_df.loc[:, i] = clean_history.history['loss']
    test_acc = mets['acc']
    test_acc_list.append(test_acc)
test_acc = np.array(test_acc_list)

acc_mean['clean_base'] = test_acc.mean()
acc_std['clean_base'] = test_acc.std()
print(acc_mean['clean_base'], acc_std['clean_base'])

### Majority Voting

In [None]:
print('\nBaseline model with majority vote')
test_acc_list = []
for i in range(N_RUNS):
    mv_acc_df = pd.DataFrame()
    mv_loss_df = pd.DataFrame()
    mv_history, mets = majority_vote(x, y_gt, y_annot, N_CLASSES, model_dir)
    mv_acc_df = mv_history.history['acc']
    mv_loss_df = mv_history.history['loss']
    test_acc = mets['acc']
    test_acc_list.append(test_acc)
test_acc = np.array(test_acc_list)

acc_mean['mv'] = test_acc.mean()
acc_std['mv'] = test_acc.std()
print(acc_mean['mv'], acc_std['mv'])

### Crowd Model

In [None]:
crowd_model_acc_list = []
crowd_model_loss_list = []
crowd_model_trace_list = []
test_acc_lists = []
model_name = ['base_crowd', 'trace_crowd', 'softmax_crowd', 'softmax_trace_crowd']
m = 0

for softmax in [False, True]:
    for trace in [False, True]:
        print('\nCrowd noise adaptation model with softmax: %s, trace: %s' % (softmax, trace))
        test_acc_list = []
        acc_dict = {}
        loss_dict = {}
        trace_dict = {}
        for i in range(N_RUNS):
            history, trace_arr, mets = crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_dir)
            acc_dict[i] = history.history['baseline_acc']
            loss_dict[i] = history.history['baseline_loss']
            trace_dict[i] = trace_arr
            test_acc_list.append(mets['baseline_acc'])
            
        crowd_model_acc_list.append(acc_dict)
        crowd_model_loss_list.append(loss_dict)
        crowd_model_trace_list.append(trace_dict)
        test_acc = np.array(test_acc_list)
        acc_mean[model_name[m]] = test_acc.mean()
        acc_std[model_name[m]] = test_acc.std()
        m+=1
        print(model_name[m], acc_mean[model_name[m]], acc_std[model_name[m]])

for j in range(4):
    print(model_name[j], acc_mean[model_name[j]], acc_std[model_name[j]])
            
            

### Clean data sample

In [None]:
sample_base_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
sample_base_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

mix_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
mix_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
mix_trace_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

pretrain_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
pretrain_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
pretrain_trace_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

for clean_percent in [0.01, 0.02, 0.03, 0.04, 0.05]:

    print('\nBaseline model with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        clean_history, mets = baseline_gt(x_sample[int(clean_percent*100)], y_sample[int(clean_percent*100)], N_CLASSES, model_dir)
        sample_base_acc_dict[clean_percent] = clean_history.history['acc']
        sample_base_loss_dict[clean_percent] = clean_history.history['loss']
        test_acc_list.append(mets['acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'sample_base_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
#     print(clean_percent, acc_mean[model_desc], acc_std[model_desc])
            
    print('\nCrowd noise adaptation model with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        history, trace_arr, mets = crowd_model(x, y_gt, y_annot_mix[int(clean_percent*100)], N_CLASSES, False, True, model_dir)
        mix_acc_dict[clean_percent] = history.history['baseline_acc']
        mix_loss_dict[clean_percent] = history.history['baseline_loss']
        mix_trace_dict[clean_percent] = trace_arr
        test_acc_list.append(mets['baseline_acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'mix_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
#     print(clean_percent, acc_mean[model_desc], acc_std[model_desc])
    
    print('\nCrowd noise adaptation model pretrain with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        history, trace_arr, mets = crowd_model_pretrain_with_clean_data(x, y_gt, y_annot, x_sample[int(clean_percent*100)], y_sample[int(clean_percent*100)], N_CLASSES, False, True, model_dir)
        pretrain_trace_dict[clean_percent] = history.history['baseline_acc']
        pretrain_trace_dict[clean_percent] = history.history['baseline_loss'] 
        pretrain_trace_dict[clean_percent] = trace_arr
        test_acc_list.append(mets['baseline_acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'pretrain_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
#     print(clean_percent, acc_mean[model_desc], acc_std[model_desc])


Baseline model with 0.01 clean data
Test dataset results: 
{'loss': 0.8575795260867937, 'acc': 0.9398399999618531}

Crowd noise adaptation model with 0.01 clean data
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Test dataset results: 
{'loss': -1.306828787727356, 'crowds_classification_1_loss': -1.306828787727356, 'baseline_loss': 8.010048790893554, 'crowds_classification_1_acc': 0.0, 'baseline_acc': 0.5030400000047683}

Crowd noise adaptation model pretrain with 0.01 clean data
Test dataset results: 
{'loss': -1.5115323276138306, 'crowds_classification_2_loss': -1.5115323276138306, 'baseline_loss': 0.8598640452689119, 'crowds_classification_2_acc': 0.0, 'baseline_acc': 0.94656}

Baseline model with 0.02 clean data
Test dataset results: 
{'loss': 0.9977716064417235, 'acc': 0.936}

Crowd noise adaptation model with 0.02 clean data
Test datase

In [13]:
N_CLASSES = 8
BATCH_SIZE = 64
N_EPOCHS = 35
N_RUNS = 30
DATA_PATH = "/home/yajingyang/Downloads/LabelMe/prepared/with_sample/"
x, y_gt, y_annot, x_sample, y_sample, y_annot_mix = get_data_with_sample(DATA_PATH, N_CLASSES)
N_ANNOT = y_annot['train'].shape[2]

model_dir = "/home/yajingyang/PycharmProjects/CrowdLayer/dogs_and_cats/0.2/"
# if not os.path.isdir(model_dir):
#     os.mkdir(model_dir)
    
acc_mean = {}
acc_std = {}


Loading train data...
(10000, 4, 4, 512)
(10000,)
(10000,)

Loading AMT data...
(10000, 59)
N_CLASSES: 8
N_ANNOT: 59

Loading test data...
(1188, 4, 4, 512)
(1188,)

Loading validation data...
(500, 4, 4, 512)
(500,)


In [None]:
crowd_model_acc_list = []
crowd_model_loss_list = []
crowd_model_trace_list = []
test_acc_lists = []
model_name = ['base_crowd', 'trace_crowd', 'softmax_crowd', 'softmax_trace_crowd']
m = 0

for softmax in [False, True]:
    for trace in [False, True]:
        print('\nCrowd noise adaptation model with softmax: %s, trace: %s' % (softmax, trace))
        test_acc_list = []
        acc_dict = {}
        loss_dict = {}
        trace_dict = {}
        for i in range(N_RUNS):
            history, trace_arr, mets = crowd_model(x, y_gt, y_annot, N_CLASSES, softmax, trace, model_dir)
            acc_dict[i] = history.history['baseline_acc']
            loss_dict[i] = history.history['baseline_loss']
            trace_dict[i] = trace_arr
            test_acc_list.append(mets['baseline_acc'])
            
        crowd_model_acc_list.append(acc_dict)
        crowd_model_loss_list.append(loss_dict)
        crowd_model_trace_list.append(trace_dict)
        test_acc = np.array(test_acc_list)
        acc_mean[model_name[m]] = test_acc.mean()
        acc_std[model_name[m]] = test_acc.std()
        m+=1
for j in range(4):
    print(model_name[j], acc_mean[model_name[j]], acc_std[model_name[j]])


Crowd noise adaptation model with softmax: False, trace: False


In [14]:
sample_base_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
sample_base_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

mix_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
mix_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
mix_trace_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

pretrain_acc_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
pretrain_loss_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}
pretrain_trace_dict = {0.01:[], 0.02:[], 0.03:[], 0.04:[], 0.05:[]}

for clean_percent in [0.01, 0.02, 0.03, 0.04, 0.05]:

    print('\nBaseline model with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        clean_history, mets = baseline_gt(x_sample[int(clean_percent*100)], y_sample[int(clean_percent*100)], N_CLASSES, model_dir)
        sample_base_acc_dict[clean_percent] = clean_history.history['acc']
        sample_base_loss_dict[clean_percent] = clean_history.history['loss']
        test_acc_list.append(mets['acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'sample_base_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
    print(clean_percent, acc_mean[model_desc], acc_std[model_desc])
            
    print('\nCrowd noise adaptation model with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        history, trace_arr, mets = crowd_model(x, y_gt, y_annot_mix[int(clean_percent*100)], N_CLASSES, False, True, model_dir)
        mix_acc_dict[clean_percent] = history.history['baseline_acc']
        mix_loss_dict[clean_percent] = history.history['baseline_loss']
        mix_trace_dict[clean_percent] = trace_arr
        test_acc_list.append(mets['baseline_acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'mix_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
    print(clean_percent, acc_mean[model_desc], acc_std[model_desc])
    
    print('\nCrowd noise adaptation model pretrain with %.2f clean data' % (clean_percent))
    test_acc_list = []
    for i in range(N_RUNS):
        history, trace_arr, mets = crowd_model_pretrain_with_clean_data(x, y_gt, y_annot, x_sample[int(clean_percent*100)], y_sample[int(clean_percent*100)], N_CLASSES, False, True, model_dir)
        pretrain_trace_dict[clean_percent] = history.history['baseline_acc']
        pretrain_trace_dict[clean_percent] = history.history['baseline_loss'] 
        pretrain_trace_dict[clean_percent] = trace_arr
        test_acc_list.append(mets['baseline_acc'])
    test_acc = np.array(test_acc_list)
    model_desc = 'pretrain_%s'%clean_percent
    acc_mean[model_desc] = test_acc.mean()
    acc_std[model_desc] = test_acc.std()
    print(clean_percent, acc_mean[model_desc], acc_std[model_desc])


Baseline model with 0.01 clean data
Test dataset results: 
{'loss': 2.1243054886859674, 'acc': 0.45791245791245794}
Test dataset results: 
{'loss': 1.801091168464635, 'acc': 0.45875420875420875}
Test dataset results: 
{'loss': 1.6508558948433358, 'acc': 0.44191919191919193}
Test dataset results: 
{'loss': 2.1716030221996885, 'acc': 0.398989898989899}
Test dataset results: 
{'loss': 1.8531769582318136, 'acc': 0.40404040404040403}
Test dataset results: 
{'loss': 2.0309800692278928, 'acc': 0.414983164983165}
Test dataset results: 
{'loss': 2.620486430447511, 'acc': 0.41919191919191917}
Test dataset results: 
{'loss': 1.9096883033662533, 'acc': 0.4074074074074074}
Test dataset results: 
{'loss': 2.0237568669046215, 'acc': 0.4377104377104377}
Test dataset results: 
{'loss': 1.9927224486765236, 'acc': 0.3813131313131313}
Test dataset results: 
{'loss': 2.096517644747339, 'acc': 0.4377104377104377}
Test dataset results: 
{'loss': 1.7104399380860504, 'acc': 0.4234006734006734}
Test dataset re

Test dataset results: 
{'loss': -7.244180178401446, 'crowds_classification_24_loss': -7.244180178401446, 'baseline_loss': 0.7573958964170167, 'crowds_classification_24_acc': 0.030829124579124578, 'baseline_acc': 0.8569023569023569}
Test dataset results: 
{'loss': -6.970684393487796, 'crowds_classification_25_loss': -6.970684393487796, 'baseline_loss': 2.7157080116296055, 'crowds_classification_25_acc': 0.024305555555555556, 'baseline_acc': 0.7289562289562289}
Test dataset results: 
{'loss': -7.14579469667942, 'crowds_classification_26_loss': -7.14579469667942, 'baseline_loss': 0.7556540212697453, 'crowds_classification_26_acc': 0.02798821548821549, 'baseline_acc': 0.8535353535353535}
Test dataset results: 
{'loss': -7.163216504183683, 'crowds_classification_27_loss': -7.163216504183683, 'baseline_loss': 0.9052759992203327, 'crowds_classification_27_acc': 0.02925084175084175, 'baseline_acc': 0.8400673400673401}
Test dataset results: 
{'loss': -6.972976373100923, 'crowds_classification_2

Test dataset results: 
{'loss': -6.898029690238362, 'crowds_classification_59_loss': -6.898029690238362, 'baseline_loss': 0.9571079032288657, 'crowds_classification_59_acc': 0.022832491582491583, 'baseline_acc': 0.8274410774410774}
Test dataset results: 
{'loss': -6.971898387176822, 'crowds_classification_60_loss': -6.971898387176822, 'baseline_loss': 0.9209376478266685, 'crowds_classification_60_acc': 0.03188131313131313, 'baseline_acc': 0.8341750841750841}
0.01 0.8315656565656563 0.010523232237048078

Baseline model with 0.02 clean data
Test dataset results: 
{'loss': 1.5477059678196505, 'acc': 0.5244107744107744}
Test dataset results: 
{'loss': 1.6929704857996417, 'acc': 0.48063973063973064}
Test dataset results: 
{'loss': 1.1683529414713183, 'acc': 0.57996632996633}
Test dataset results: 
{'loss': 1.2473490928559994, 'acc': 0.563973063973064}
Test dataset results: 
{'loss': 1.3468182982017698, 'acc': 0.5311447811447811}
Test dataset results: 
{'loss': 1.2170211550362584, 'acc': 0.5

Test dataset results: 
{'loss': -7.101573797990176, 'crowds_classification_84_loss': -7.101573797990176, 'baseline_loss': 0.7793949295951662, 'crowds_classification_84_acc': 0.010416666666666666, 'baseline_acc': 0.8265993265993266}
Test dataset results: 
{'loss': -7.030849496924917, 'crowds_classification_85_loss': -7.030849496924917, 'baseline_loss': 0.9219747696460698, 'crowds_classification_85_acc': 0.008312289562289563, 'baseline_acc': 0.8282828282828283}
Test dataset results: 
{'loss': -7.137381458924676, 'crowds_classification_86_loss': -7.137381458924676, 'baseline_loss': 1.1815676966098825, 'crowds_classification_86_acc': 0.0030513468013468013, 'baseline_acc': 0.8038720538720538}
Test dataset results: 
{'loss': -7.139444648216068, 'crowds_classification_87_loss': -7.139444648216068, 'baseline_loss': 0.9991874734962026, 'crowds_classification_87_acc': 0.011784511784511785, 'baseline_acc': 0.8316498316498316}
Test dataset results: 
{'loss': -6.987733213187067, 'crowds_classificat

Test dataset results: 
{'loss': -6.80191461646597, 'crowds_classification_119_loss': -6.80191461646597, 'baseline_loss': 0.9307503841491259, 'crowds_classification_119_acc': 0.029987373737373736, 'baseline_acc': 0.8173400673400674}
Test dataset results: 
{'loss': -7.001889206343629, 'crowds_classification_120_loss': -7.001889206343629, 'baseline_loss': 0.8437947028815144, 'crowds_classification_120_acc': 0.02704124579124579, 'baseline_acc': 0.8367003367003367}
0.02 0.8314534231200895 0.011328109517545986

Baseline model with 0.03 clean data
Test dataset results: 
{'loss': 1.110034982363383, 'acc': 0.6742424242424242}
Test dataset results: 
{'loss': 1.048101994927082, 'acc': 0.6607744107744108}
Test dataset results: 
{'loss': 1.145135258986091, 'acc': 0.6582491582491582}
Test dataset results: 
{'loss': 1.1453323017065773, 'acc': 0.6590909090909091}
Test dataset results: 
{'loss': 1.0248626464545125, 'acc': 0.7045454545454546}
Test dataset results: 
{'loss': 1.1728449560175038, 'acc': 0.

Test dataset results: 
{'loss': -7.165602134935783, 'crowds_classification_144_loss': -7.165602134935783, 'baseline_loss': 0.9119568954312841, 'crowds_classification_144_acc': 0.026199494949494948, 'baseline_acc': 0.8333333333333334}
Test dataset results: 
{'loss': -7.2075915095782035, 'crowds_classification_145_loss': -7.2075915095782035, 'baseline_loss': 0.767613927121917, 'crowds_classification_145_acc': 0.0035774410774410776, 'baseline_acc': 0.8274410774410774}
Test dataset results: 
{'loss': -7.189385428573146, 'crowds_classification_146_loss': -7.189385428573146, 'baseline_loss': 0.9383493028675215, 'crowds_classification_146_acc': 0.01125841750841751, 'baseline_acc': 0.8324915824915825}
Test dataset results: 
{'loss': -7.126834813191834, 'crowds_classification_147_loss': -7.126834813191834, 'baseline_loss': 0.7454321022687938, 'crowds_classification_147_acc': 0.03019781144781145, 'baseline_acc': 0.8459595959595959}
Test dataset results: 
{'loss': -7.266965896593601, 'crowds_clas

Test dataset results: 
{'loss': -6.956112280437842, 'crowds_classification_179_loss': -6.956112280437842, 'baseline_loss': 0.8866790503589593, 'crowds_classification_179_acc': 0.002840909090909091, 'baseline_acc': 0.8425925925925926}
Test dataset results: 
{'loss': -7.001610532754198, 'crowds_classification_180_loss': -7.001610532754198, 'baseline_loss': 1.0440166567260971, 'crowds_classification_180_acc': 0.03324915824915825, 'baseline_acc': 0.8333333333333334}
0.03 0.8294332210998874 0.009374558461421996

Baseline model with 0.04 clean data
Test dataset results: 
{'loss': 1.072236053470008, 'acc': 0.6843434343434344}
Test dataset results: 
{'loss': 0.9583428391703853, 'acc': 0.6717171717171717}
Test dataset results: 
{'loss': 1.0137150135104505, 'acc': 0.6843434343434344}
Test dataset results: 
{'loss': 0.9936923807116871, 'acc': 0.6835016835016835}
Test dataset results: 
{'loss': 1.0350774185424703, 'acc': 0.6868686868686869}
Test dataset results: 
{'loss': 1.0829286881368168, 'acc'

Test dataset results: 
{'loss': -7.241660802051275, 'crowds_classification_204_loss': -7.241660802051275, 'baseline_loss': 0.8776078217659735, 'crowds_classification_204_acc': 0.011153198653198653, 'baseline_acc': 0.8333333333333334}
Test dataset results: 
{'loss': -7.099870204925537, 'crowds_classification_205_loss': -7.099870204925537, 'baseline_loss': 0.8232337474220931, 'crowds_classification_205_acc': 0.007891414141414142, 'baseline_acc': 0.8341750841750841}
Test dataset results: 
{'loss': -7.246424851594148, 'crowds_classification_206_loss': -7.246424851594148, 'baseline_loss': 0.9811092741059936, 'crowds_classification_206_acc': 0.029356060606060608, 'baseline_acc': 0.819023569023569}
Test dataset results: 
{'loss': -7.1951862922822585, 'crowds_classification_207_loss': -7.1951862922822585, 'baseline_loss': 0.9854209235420933, 'crowds_classification_207_acc': 0.030934343434343436, 'baseline_acc': 0.819023569023569}
Test dataset results: 
{'loss': -7.173074221370196, 'crowds_clas

Test dataset results: 
{'loss': -6.8719407605001015, 'crowds_classification_239_loss': -6.8719407605001015, 'baseline_loss': 1.023356152855186, 'crowds_classification_239_acc': 0.0021043771043771043, 'baseline_acc': 0.8308080808080808}
Test dataset results: 
{'loss': -6.983989890577014, 'crowds_classification_240_loss': -6.983989890577014, 'baseline_loss': 0.985672834545675, 'crowds_classification_240_acc': 0.029671717171717172, 'baseline_acc': 0.8282828282828283}
0.04 0.8314814814814813 0.011611687682493883

Baseline model with 0.05 clean data
Test dataset results: 
{'loss': 0.8376661866702616, 'acc': 0.7407407407407407}
Test dataset results: 
{'loss': 0.8799682637858471, 'acc': 0.7239057239057239}
Test dataset results: 
{'loss': 0.8339177880825017, 'acc': 0.7558922558922558}
Test dataset results: 
{'loss': 0.764257732866589, 'acc': 0.7558922558922558}
Test dataset results: 
{'loss': 0.871265344748192, 'acc': 0.7314814814814815}
Test dataset results: 
{'loss': 0.8931780842819599, 'acc

Test dataset results: 
{'loss': -7.3450520030577175, 'crowds_classification_264_loss': -7.3450520030577175, 'baseline_loss': 0.5978403608957525, 'crowds_classification_264_acc': 0.030618686868686868, 'baseline_acc': 0.8602693602693603}
Test dataset results: 
{'loss': -7.199581159084333, 'crowds_classification_265_loss': -7.199581159084333, 'baseline_loss': 0.8075601693355676, 'crowds_classification_265_acc': 0.028409090909090908, 'baseline_acc': 0.8173400673400674}
Test dataset results: 
{'loss': -7.378962532839791, 'crowds_classification_266_loss': -7.378962532839791, 'baseline_loss': 0.7008813756082194, 'crowds_classification_266_acc': 0.030303030303030304, 'baseline_acc': 0.8417508417508418}
Test dataset results: 
{'loss': -7.277306148901531, 'crowds_classification_267_loss': -7.277306148901531, 'baseline_loss': 0.6840198833231974, 'crowds_classification_267_acc': 0.03177609427609428, 'baseline_acc': 0.8526936026936027}
Test dataset results: 
{'loss': -7.04713297693015, 'crowds_clas

Test dataset results: 
{'loss': -6.964468196586326, 'crowds_classification_299_loss': -6.964468196586326, 'baseline_loss': 0.8536269677934623, 'crowds_classification_299_acc': 0.028935185185185185, 'baseline_acc': 0.8451178451178452}
Test dataset results: 
{'loss': -6.829018372879285, 'crowds_classification_300_loss': -6.829018372879285, 'baseline_loss': 0.8766870470560761, 'crowds_classification_300_acc': 0.022095959595959596, 'baseline_acc': 0.8434343434343434}
0.05 0.8316217732884397 0.01008576134200633
