In [1]:
from __future__ import print_function
import numpy as np
import random
seed = 42
np.random.seed(seed)  # for reproducibility
random.seed(seed)
verbose = True

# Data

In [2]:
nb_classes = 10 # number of categories we classify. MNIST is 10 digits
# input image dimensions. In CNN we think we have a "color" image with 1 channel of color.
# in MLP with flatten the pixels to img_rows*img_cols
img_color, img_rows, img_cols = 1, 28, 28
img_size = img_color*img_rows*img_cols

In [3]:
from keras.datasets import mnist
# keras has a built in tool that download the MNIST data set for you to `~/.keras/datasets/`
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print('MNIST training data set label distribution', np.bincount(y_train))
print('test distribution', np.bincount(y_test))

MNIST training data set label distribution [5923 6742 5958 6131 5842 5421 5918 6265 5851 5949]
test distribution [ 980 1135 1032 1010  982  892  958 1028  974 1009]


In [4]:
X_train = X_train.reshape(X_train.shape[0], img_size)
X_test = X_test.reshape(X_test.shape[0], img_size)
    
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train shape: (60000, 784)
60000 train samples
10000 test samples


## noisy labels

In [5]:
NOISE_LEVEL=0.46  # what part of training labels are permuted
perm = np.array([7, 9, 0, 4, 2, 1, 3, 5, 6, 8])  # noise permutation (from Reed)

In [6]:
noise = perm[y_train]

In [7]:
# replace some of the training labels with permuted (noise) labels.
# make sure each categories receive an equal amount of noise
from sklearn.model_selection import StratifiedShuffleSplit
_, noise_idx = next(iter(StratifiedShuffleSplit(n_splits=1,
                                                test_size=NOISE_LEVEL,
                                                random_state=seed).split(X_train,y_train)))
y_train_noise = y_train.copy()
y_train_noise[noise_idx] = noise[noise_idx]

actual noise level

In [8]:
1. - np.mean(y_train_noise == y_train)

0.45999999999999996

split training data to training and validation

In [9]:
# break the training set to 10% validation which we will use for early stopping.
train_idx, val_idx = next(iter(
        StratifiedShuffleSplit(n_splits=1, test_size=0.1,
                               random_state=seed).split(X_train, y_train_noise)))
X_train_train = X_train[train_idx]
y_train_train = y_train_noise[train_idx]
X_train_val = X_train[val_idx]
y_train_val = y_train_noise[val_idx]

In [10]:
from keras.utils.np_utils import to_categorical   

y_test = to_categorical(y_test, num_classes=10)
y_train_train = to_categorical(y_train_train, num_classes=10)
y_train_val = to_categorical(y_train_val, num_classes=10)

In [11]:
# Prepare the training dataset.
import tensorflow as tf

batch_size = 256
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_train, y_train_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((X_train_val, y_train_val))
val_dataset = val_dataset.batch(batch_size)

# Prepare the test dataset.
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.batch(batch_size)

# Model setting

In [12]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from keras.layers import Dense, Dropout, Activation

In [13]:
inputs = keras.Input(shape=(784,), name="digits")
x1 = layers.Dense(500, activation="relu")(inputs)
x1 = Activation('relu')(x1)
x1 = Dropout(0.5)(x1)
x2 = layers.Dense(300, activation="relu")(x1)
x2 = Activation('relu')(x2)
x2 = Dropout(0.5)(x2)
outputs = layers.Dense(10, activation='softmax', name="predictions")(x2)
model = keras.Model(inputs=inputs, outputs=outputs)

# Stage 1

In [14]:
# Instantiate an optimizer.
#optimizer = keras.optimizers.SGD(learning_rate=1e-3)
optimizer = keras.optimizers.Adam(learning_rate=1e-3)
# Instantiate a loss function.
loss_cce = tf.keras.losses.CategoricalCrossentropy()

train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = keras.metrics.CategoricalAccuracy()
test_acc_metric = keras.metrics.CategoricalAccuracy()

In [None]:

epochs = 40
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
   

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_value = loss_cce(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Update training metric.
        train_acc_metric.update_state(y_batch_train, logits)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    



Start of epoch 0
Training loss (for one batch) at step 0: 2.3761
Training loss (for one batch) at step 200: 0.9789
Training acc over epoch: 0.4287
Validation acc: 0.4778

Start of epoch 1
Training loss (for one batch) at step 0: 0.9669
Training loss (for one batch) at step 200: 0.8886
Training acc over epoch: 0.4820
Validation acc: 0.4955

Start of epoch 2
Training loss (for one batch) at step 0: 1.0043
Training loss (for one batch) at step 200: 0.8839
Training acc over epoch: 0.4924
Validation acc: 0.5117

Start of epoch 3
Training loss (for one batch) at step 0: 0.8125
Training loss (for one batch) at step 200: 0.7880
Training acc over epoch: 0.4999
Validation acc: 0.5062

Start of epoch 4
Training loss (for one batch) at step 0: 0.8244
Training loss (for one batch) at step 200: 0.7848
Training acc over epoch: 0.5041
Validation acc: 0.5042

Start of epoch 5
Training loss (for one batch) at step 0: 0.8167
Training loss (for one batch) at step 200: 0.8006
Training acc over epoch: 0.51

In [None]:
for x_batch_test, y_batch_test in test_dataset:
    
    test_logits = model(x_batch_test, training=False)
    # Update val metrics

    test_acc_metric.update_state(y_batch_test, test_logits)
test_acc = test_acc_metric.result()
test_acc_metric.reset_states()
print("test acc: %.4f" % (float(test_acc),))
   

# Stage 2

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=1e-3)
y_train_train_np = np.copy(y_train_train)

In [None]:
# cce = tf.keras.losses.CategoricalCrossentropy()
# for x_batch_val, y_batch_val in val_dataset:
#     loss1 = -tf.reduce_mean(tf.multiply(tf.nn.softmax(y_batch_val), tf.nn.log_softmax(y_batch_val))).numpy()
#     loss2 = cce(y_batch_val,y_batch_val).numpy()
#     print(y_batch_val.shape,loss1,loss2)
#     break

In [None]:
epochs = 5
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
   
    n_train = X_train_train.shape[0] 
    for step in range((n_train//batch_size)+1):
        end_index = None if batch_size*(step+1)>n_train else batch_size*(step+1) 
    
        x_batch_train = X_train_train[batch_size*step:end_index]
        y_batch_train = y_train_train[batch_size*step:end_index]
        y_batch_train_np = y_train_train_np[batch_size*step:end_index]
        y_batch_train_variable = tf.Variable(y_batch_train_np)
       
        with tf.GradientTape(persistent=True) as tape:
            logits = model(x_batch_train, training=True)
        
            loss_entropy = -tf.reduce_mean(tf.multiply(tf.nn.softmax(logits), tf.nn.log_softmax(logits)))
            loss_classification = tf.reduce_mean(tf.nn.softmax(logits)*(tf.nn.log_softmax(logits)-tf.math.log((tf.nn.softmax(y_batch_train_variable)))))
            loss_compatibility = loss_cce(tf.nn.softmax(y_batch_train_variable),y_batch_train)
            loss_forModel = loss_entropy + loss_classification
            loss_forVar = loss_classification + loss_compatibility
            
        
        grads = tape.gradient(loss_forModel, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        grads_variable = tape.gradient(loss_forVar, y_batch_train_variable)
        y_train_train_np[batch_size*step:end_index] -= 1000*grads_variable.numpy()
        
        print('\nepoch: {}, step: {}'.format(epoch, step))
        print('\npredict:{}, \nlabel: {}'.format(logits[0].numpy(),y_batch_train[0]))
        print('\nentropy_loss:{:4f}, compatibility_loss: {:4f}, classification_loss: {:4f}'.format(loss_entropy.numpy(),loss_compatibility.numpy(),loss_classification.numpy()))
        print('\nvariable:{} \ngradient: {}'.format(y_batch_train_np[0],grads_variable[0].numpy()))
            
        train_acc_metric.update_state(y_batch_train, logits)
    
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_entropy))
            )
            
   
    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    



In [None]:
for x_batch_test, y_batch_test in test_dataset:
    
    test_logits = model(x_batch_test, training=False)
    # Update val metrics

    test_acc_metric.update_state(y_batch_test, test_logits)
test_acc = test_acc_metric.result()
test_acc_metric.reset_states()
print("test acc: %.4f" % (float(test_acc),))
   

# Stage 3

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=1e-4)
y_train_train_np = np.copy(y_train_train)

In [None]:
epochs = 5
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
   
    n_train = X_train_train.shape[0] 
    for step in range((n_train//batch_size)+1):
        end_index = None if batch_size*(step+1)>n_train else batch_size*(step+1) 
    
        x_batch_train = X_train_train[batch_size*step:end_index]
        y_batch_train = y_train_train[batch_size*step:end_index]
        y_batch_train_np = y_train_train_np[batch_size*step:end_index]
       
        with tf.GradientTape(persistent=True) as tape:
            logits = model(x_batch_train, training=True)
            loss_classification = tf.reduce_mean(tf.nn.softmax(logits)*(tf.nn.log_softmax(logits)-tf.math.log((tf.nn.softmax(y_batch_train_np)))))
            
        grads = tape.gradient(loss_classification, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        train_acc_metric.update_state(y_batch_train, logits)
    
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_entropy))
            )
            
    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    



In [None]:
for x_batch_test, y_batch_test in test_dataset:
    
    test_logits = model(x_batch_test, training=False)
    # Update val metrics

    test_acc_metric.update_state(y_batch_test, test_logits)
test_acc = test_acc_metric.result()
test_acc_metric.reset_states()
print("test acc: %.4f" % (float(test_acc),))
   