In [228]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import tensorflow as tf

In [5]:
import numpy as np

In [4]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Multiply
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv3D, MaxPool3D
from tensorflow.keras import backend as K

In [6]:
import random

In [7]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
    
# convert to RGB
x_train = np.stack((x_train,)*3, axis=-1)
x_test = np.stack((x_test,)*3, axis=-1)

# normalize
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# binary label
y_train = (y_train < 5).astype(int)
y_test = (y_test < 5).astype(int)

In [8]:
def get_env(x, y, e, labelflip_proba=.25):
    x = x.copy()
    y = y.copy()
    
    y = np.logical_xor(
        y,
        (np.random.random(size=len(y)) < labelflip_proba).astype(int)
    ).astype(int)
    
    color = np.logical_xor(
        y,
        (np.random.random(size=len(y)) < e).astype(int)
    )
    
    x[color, :, :, 2] = 0
    x[color, :, :, 1] = 0
    return x, y

In [9]:
x, y = get_env(x_train, y_train, .5)

In [12]:
class MNISTDataGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, e=.1, batch_size=128, shuffle=True):
        
        self.e=.1
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.__load_initial_data()
        self.__create_envs()
        self.__create_validation_envs()
        self.on_epoch_end()
        
        print(len(self), 'batches/epoch')
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.e1[1]) / self.batch_size))
    
    def __getitem__(self, index):
        'Generate one batch of data'
        
        batch_indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        
        e1_x_batch = self.e1[0][batch_indices,:,:,:]
        e1_y_batch = self.e1[1][batch_indices]
        
        return [e1_x_batch, e1_y_batch]
    
    def __load_initial_data(self):
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        # convert to RGB
        x_train = np.stack((x_train,)*3, axis=-1)
        x_test = np.stack((x_test,)*3, axis=-1)

        # normalize
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255

        # binary label
        y_train = (y_train < 5).astype(int)
        y_test = (y_test < 5).astype(int)
        
        self.original_data = {
            'x_train':x_train,
            'x_test':x_test,
            'y_train':y_train,
            'y_test':y_test
        }
        
    def __create_envs(self):
        self.e1 = self.__create_env(self.original_data['x_train'][:10000], 
                                    self.original_data['y_train'][:10000], .1)
        
    def __create_validation_envs(self):
        self.e11 = self.__create_env(self.original_data['x_train'][30000:40000], 
                                     self.original_data['y_train'][30000:40000], .1)
        
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indices = np.arange(len(self.e1[1]))
        if self.shuffle:
            np.random.shuffle(self.indices)
    
    def __create_env(self, x, y, e, labelflip_proba=.25):
        x = x.copy()
        y = y.copy()

        y = np.logical_xor(
            y,
            (np.random.random(size=len(y)) < labelflip_proba).astype(int)
        ).astype(int)

        color = np.logical_xor(
            y,
            (np.random.random(size=len(y)) < e).astype(int)
        )

        x[color, :, :, 2] = 0
        x[color, :, :, 1] = 0
        return x, y
        

In [273]:
def get_model(compile=False):
    
    input_images = Input(shape=(28, 28, 3))
    
    cnn = Conv2D(32, kernel_size=(3, 3),
                 activation='relu')(input_images)
    cnn = Conv2D(64, (3, 3), activation='relu')(cnn)
    cnn = MaxPooling2D(pool_size=(2, 2))(cnn)
    cnn = Dropout(0.25)(cnn)
    cnn = Flatten()(cnn)
    
    env1 = Dense(32, activation='relu')(cnn)
    env1 = Dropout(0.5)(env1)
    env1 = Dense(1, name='env1')(env1)
        
    model = Model(
        inputs=[input_images],
        outputs=[env1]
    )
    
    if compile:
        model.compile(
            loss=[
                tf.keras.losses.binary_crossentropy,
            ],
            optimizer=tf.keras.optimizers.Adadelta(),
            metrics=['accuracy']
        )
    return model

In [229]:
g = MNISTDataGenerator()
d = tf.data.Dataset.from_tensor_slices(g.e1).shuffle(256).batch(128)

78 batches/epoch


In [285]:
d.enumerate()

<ZipDataset shapes: ((), ((None, 28, 28, 3), (None,))), types: (tf.int64, (tf.float32, tf.int64))>

In [292]:
def train(model, dataset, valid_dataset, epochs, 
          lambda_=1.0, 
          dummy=tf.convert_to_tensor([1.]),
          loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True),
          accuracy_object = tf.keras.metrics.Accuracy(),
          optimizer = tf.keras.optimizers.Adam()):
  for epoch in range(epochs):
    for (batch, (images, labels)) in enumerate(dataset):
    
      # compute penalty
      with tf.GradientTape() as tape:
        tape.watch(dummy)
        logits = model(images, training=False)
        loss_value = loss_object(labels, logits * dummy)
      accuracy_object.update_state(labels, 
                                   tf.math.greater(
                                       tf.keras.activations.sigmoid(logits),
                                       .5)
                                   )
      grads = tape.gradient(loss_value, dummy)
      penalty = tf.math.reduce_mean(loss_value * tf.math.square(grads)).numpy()
    
      # train
      with tf.GradientTape() as tape:
        logits = model(images, training=True)
        loss_value = loss_object(labels, logits)
      grads = tape.gradient(loss_value, model.trainable_variables)
      grads += penalty * lambda_
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
    
      if not batch % 30:
        tr_acc = accuracy_object.result().numpy()
        accuracy_object.reset_states()
        # validation
        for (v_batch, (v_images, v_labels)) in enumerate(valid_dataset):
          logits = model(v_images, training=False)
          accuracy_object.update_state(v_labels, 
                                       tf.math.greater(
                                         tf.keras.activations.sigmoid(logits),
                                         .5)
                                       )
        v_acc = accuracy_object.result().numpy()
        accuracy_object.reset_states()
        print ('Epoch %3d TrainLoss %.5f Penalty %.5f TrainAcc %.3f TestAcc %.3f' % (
            epoch, loss_value.numpy().mean(), penalty, tr_acc, v_acc 
        ))
        

In [293]:
train(
    get_model(), 
    tf.data.Dataset.from_tensor_slices(g.e1).shuffle(256).batch(128), 
    tf.data.Dataset.from_tensor_slices(g.e11).shuffle(256).batch(128), 
    epochs = 10, 
    lambda_=0
)

Epoch   0 TrainLoss 0.69328 Penalty 0.00001 TrainAcc 0.477 TestAcc 0.500
Epoch   0 TrainLoss 0.31873 Penalty 0.00228 TrainAcc 0.870 TestAcc 0.896
Epoch   0 TrainLoss 0.31029 Penalty 0.00001 TrainAcc 0.900 TestAcc 0.896
Epoch   1 TrainLoss 0.27273 Penalty 0.00013 TrainAcc 0.894 TestAcc 0.896
Epoch   1 TrainLoss 0.35763 Penalty 0.00037 TrainAcc 0.895 TestAcc 0.896
Epoch   1 TrainLoss 0.42485 Penalty 0.00392 TrainAcc 0.901 TestAcc 0.896
Epoch   2 TrainLoss 0.18218 Penalty 0.00111 TrainAcc 0.898 TestAcc 0.896
Epoch   2 TrainLoss 0.34221 Penalty 0.00001 TrainAcc 0.894 TestAcc 0.896
Epoch   2 TrainLoss 0.27341 Penalty 0.00017 TrainAcc 0.901 TestAcc 0.896
Epoch   3 TrainLoss 0.29819 Penalty 0.00019 TrainAcc 0.894 TestAcc 0.896
Epoch   3 TrainLoss 0.38884 Penalty 0.00023 TrainAcc 0.896 TestAcc 0.895
Epoch   3 TrainLoss 0.30577 Penalty 0.00002 TrainAcc 0.899 TestAcc 0.896
Epoch   4 TrainLoss 0.32601 Penalty 0.00246 TrainAcc 0.895 TestAcc 0.896
Epoch   4 TrainLoss 0.35987 Penalty 0.00010 TrainAc

KeyboardInterrupt: 

In [None]:
train(
    get_model(), 
    tf.data.Dataset.from_tensor_slices(g.e1).shuffle(256).batch(128), 
    tf.data.Dataset.from_tensor_slices(g.e11).shuffle(256).batch(128), 
    epochs = 10, 
    lambda_=1
)

Epoch   0 TrainLoss 0.68259 Penalty 0.00001 TrainAcc 0.506 TestAcc 0.751
Epoch   0 TrainLoss 0.57051 Penalty 0.00122 TrainAcc 0.853 TestAcc 0.887
Epoch   0 TrainLoss 0.56480 Penalty 0.00213 TrainAcc 0.888 TestAcc 0.885
Epoch   1 TrainLoss 0.45311 Penalty 0.01056 TrainAcc 0.878 TestAcc 0.870
Epoch   1 TrainLoss 0.47709 Penalty 0.00943 TrainAcc 0.877 TestAcc 0.865
Epoch   1 TrainLoss 0.52203 Penalty 0.00207 TrainAcc 0.868 TestAcc 0.860
Epoch   2 TrainLoss 0.40654 Penalty 0.00887 TrainAcc 0.869 TestAcc 0.861
Epoch   2 TrainLoss 0.41467 Penalty 0.00582 TrainAcc 0.858 TestAcc 0.855
Epoch   2 TrainLoss 0.39508 Penalty 0.00664 TrainAcc 0.867 TestAcc 0.865
Epoch   3 TrainLoss 0.38418 Penalty 0.00670 TrainAcc 0.867 TestAcc 0.862
Epoch   3 TrainLoss 0.51284 Penalty 0.00073 TrainAcc 0.868 TestAcc 0.863
