In [1]:
from keras import models, layers, backend, initializers
import time

Using TensorFlow backend.


In [0]:
"""
This section is temporarily taken from RPMCruz, and 
we'll want to revisit them when we focus on the whole set of transforms
https://github.com/rpmcruz/autoaugment

"""

import PIL
import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw

import matplotlib.pyplot as plt
import numpy as np

def ShearX(img, v):  # [-0.3, 0.3]
    return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))

def ShearY(img, v):  # [-0.3, 0.3]
    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))

def TranslateX(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
    v = v*img.size[0]
    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))

def TranslateY(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
    v = v*img.size[1]
    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))

def Rotate(img, v):  # [-30, 30]
    return img.rotate(v)

def AutoContrast(img, _):
    return PIL.ImageOps.autocontrast(img)

def Invert(img, _):
    return PIL.ImageOps.invert(img)

def Equalize(img, _):
    return PIL.ImageOps.equalize(img)

def Flip(img, _):  # not from the paper
    return PIL.ImageOps.mirror(img)

def Solarize(img, v):  # [0, 256]
    return PIL.ImageOps.solarize(img, v)

def Posterize(img, v):  # [4, 8]
    v = int(v)
    return PIL.ImageOps.posterize(img, v)

def Contrast(img, v):  # [0.1,1.9]
    return PIL.ImageEnhance.Contrast(img).enhance(v)

def Color(img, v):  # [0.1,1.9]
    return PIL.ImageEnhance.Color(img).enhance(v)

def Brightness(img, v):  # [0.1,1.9]
    return PIL.ImageEnhance.Brightness(img).enhance(v)

def Sharpness(img, v):  # [0.1,1.9]
    return PIL.ImageEnhance.Sharpness(img).enhance(v)

def Cutout(img, v):  # [0, 60] => percentage: [0, 0.2]
    w, h = img.size
    v = v*img.size[0]
    x0 = np.random.uniform(w-v)
    y0 = np.random.uniform(h-v)
    xy = (x0, y0, x0+v, y0+v)
    color = (127, 127, 127)
    img = img.copy()
    PIL.ImageDraw.Draw(img).rectangle(xy, color)
    return img

  
# JG - Removed this one for simplicity, as it was the only operation
# that required passing multiple images
# We might want to add it back later

# def SamplePairing(imgs):  # [0, 0.4]
#     def f(img1, v):
#         i = np.random.choice(len(imgs))
#         img2 = PIL.Image.fromarray(imgs[i])
#         return PIL.Image.blend(img1, img2, v)
#     return f

def getTransformations():
    return [
        (ShearX, -0.3, 0.3),
        (ShearY, -0.3, 0.3),
        (TranslateX, -0.45, 0.45),
        (TranslateY, -0.45, 0.45),
        (Rotate, -30, 30),
        (AutoContrast, 0, 1),
        (Invert, 0, 1),
        (Equalize, 0, 1),
        (Solarize, 0, 256),
        (Posterize, 4, 8),
        (Contrast, 0.1, 1.9),
        (Color, 0.1, 1.9),
        (Brightness, 0.1, 1.9),
        (Sharpness, 0.1, 1.9),
        (Cutout, 0, 0.2),
#         (SamplePairing(imgs), 0, 0.4),
    ]
transformations = getTransformations()

In [0]:

class Operation():
  def __init__(self,
               transformFunction,
               probability,
               magnitude):
    """
    Args: 
      transformFunction: a function that takes a list of np arrays and a 
        magnitude value, and returns a list of the same length in which each
        np array that has been transformed
        
      probability: float between 0 and 1. When this operation is called,
        there is a [probability] chance of applying transformFunction, so
        a probability of 0 indicates the transformFunction is never called
        
      magnitude: float to pass to the transformFunction when this Operation is 
        called
    """
    
    assert (probability >= 0.0) and (probability <= 1.0)
    
    self.transformFunction = transformFunction
    self.probability = probability
    self.magnitude = magnitude
    self.transformName = str(transformFunction).split(' ')[1]
    
  def __call__(self, X):
    """
    Takes a list of numpy arrays, and one at a time:
      converts to PIL image
      With self.probability chance, apply self.transformFunction
      with magnitude self.magnitude.
    Converts back to numpy array
    Returns the new list of np.arrays
    """
    transformed = []
    for x in X:
      pilX = PIL.Image.fromarray(x)
      if np.random.rand() < self.probability:
        pilX = self.transformFunction(pilX, self.magnitude)
      transformed.append(np.array(pilX))
    return np.array(transformed)
  
  def __str__(self):
    return '%s (P=%.3f, M=%.3f)' % (self.transformName,
                                    self.probability,
                                    self.magnitude)
  
class Subpolicy():
  def __init__(self, operations):
    self.operations = operations
  
  def __call__(self, X):
    for operation in self.operations:
      X = operation(X)
    return X
  
  def __str__(self):
    s = ''
    for op in self.operations:
      s += str(op)
      s += '\n'
    return s[:-1]
    
    

In [0]:
from keras import models, layers, initializers
import tensorflow as tf

    
class Controller():
  
    def __init__(self, lstmUnits=100, minibatchSize=8, lr=1e-2,
                nSubpolicies=5, nOpsPerSubpolicy=2,
                transforms=[], nProbabilities=6, nMagnitudes=5):

      """
      Controller object which generates transformation policies,
      sets of subpolicies each of which contain transform operations
      
      Consists of a neural net which takes a constant "dummy" input state
      Feeds through a LSTM, then a set of parallel dense layers with
      softmax activation.  Each dense layer corresponds with a decision
      in selecting the policy: A transform type, probability, and magnitude
      for each Operation
      
      Args:
      
      lstmUnits: The output dimension of the LSTM encoder
      
      minibatchSize: How many times to sample policies and train a child model
                     before fitting based on those sample policy/accuracy
                     pairs
      lr: Learning rate for the controller net
      
      nSubpolicies:  The number of different subpolicies to generate
      
      nOpsPerSubpolicy: The number of operations in each subpolicy
      
      transforms: a list tuples identifying the possible image transformations
                  each representing (function, minMagnitude, maxMagnitude)

      nProbabilities: How many evenly-spaced discrete probability values will 
                      be considered: 11 means that each operation will have a 
                      0%, 10%, 20% ... 80%, 90%, or 100% of being applied.
                      
      nMagnitudes: How many evenly-spaced discrete magnitude values will be
                   considered for each operation. This will mean different
                   things to different operations
                   
      """
      self.lstmUnits = 100
      self.minibatchSize = minibatchSize
      self.lr = lr
      self.nSubpolicies = nSubpolicies
      self.nOpsPerSubpolicy = nOpsPerSubpolicy
      self.transforms = transforms
      self.nTransforms = len(transforms)
      self.nProbabilities = nProbabilities
      self.nMagnitudes = nMagnitudes
      self.softmaxCache = None
      self.buildModel()
      
    def buildModel(self):

      """
      Builds the neural net to generate policy probabilities.
      
      Each forward pass produces 3 softmax for each operation:
      Operation Type, Probability, Magnitude.
      Each subpolicy has self.nOpsPerSubpolicy such Operations, and each
      policy has self.nSubpolicies such subpolicies.

      To set the loss we want to minimize, we need the following placeholders,
      which are given in feedDict each training step:
        selectionMask_ph: Binary mask for each softmax layer indicating which 
          operation type, probability, and magnitude was selected for each
          Operation in each Subpolicy. 
          All concatenated together to match the shape of the concatSoftmaxes

        score_ph: Single score for each sample

      Each training step, we apply the selectionMask_ph to the concatSoftmaxes,
      so backprop will only update the weights leading to the subpolicy choices
      made in this particular step.  These are multiplied by the 
      score we pass, and we create an optimizer to maximize the mean of this
      tensor. (well, minimize for negative score)

      If the model did well, we pass a positive accuracy score along with the
      selectionMask.  The optimizer will update our weights which would lead
      to the softmax selections we saw.
      """
      self.input_ph = tf.placeholder(shape=(None, 1,1),
                                     dtype=tf.float32,
                                     name='inputLayer')

      self.lstmLayerOutput = tf.keras.layers.CuDNNLSTM(
                            units=self.lstmUnits,
                            return_sequences=True,
                            name='controller')(self.input_ph)

      self.softmaxes = []

      for j in range(self.nSubpolicies):
        for i in range(self.nOpsPerSubpolicy):
            name = 'pol%d-op%d-' % (j+1, i+1)

            for units, n in [[self.nTransforms, 't'],
                             [self.nProbabilities, 'p'],
                             [self.nMagnitudes, 'm']]:
              self.softmaxOutputs.append(layers.Dense(units,
                                        activation='softmax',
                                        name=name + n)(self.lstmLayerOutput))

      concatSoftmaxes = tf.concat(self.softmaxOutputs, axis=2, name='concatSoftmaxes')
      self.score_ph = tf.placeholder(shape=(), dtype=tf.float32)

      maskSize = (self.nTransforms + self.nProbabilities + self.nMagnitudes)
      maskSize *= self.nSubpolicies
      maskSize *= self.nOpsPerSubpolicy

      self.selectionMask_ph = tf.placeholder(
                           shape=(maskSize),
                           dtype=tf.float32,
                           name='selectionMask')

      maskedSoftmaxes = self.selectionMask_ph * concatSoftmaxes
      loss = -tf.reduce_mean(self.score_ph * maskedSoftmaxes)
      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(loss)      
      
    def fit(self, selectionMasks, scores):
      """
      Fits the model based on a batch of data tuples, (mask, score)
      indicating a policy selection and the accuracy of a child model
      trained with that data augmentation policy.
      """
      
      meanScore = np.mean(scores)
      
      session = backend.get_session()
      assert len(selectionMasks) >= self.minibatchSize
      assert len(scores) >= self.minibatchSize
      
      for mask, score in zip(selectionMasks[-self.minibatchSize:],
                             scores[-self.minibatchSize:]):
        fd = {}
        fd[self.selectionMask_ph] = mask
        fd[self.score_ph] = score - meanScore
        fd[self.input_ph] = np.zeros([1,1,1])
      session.run(self.optimizer, feed_dict=fd)
      self.softmaxCache = None
      
      return self
    
    def getSoftmaxes(self):
      session = backend.get_session()
      softmaxes = session.run(self.softmaxOutputs,
                              feed_dict={self.input_ph:np.zeros([1,1,1])})
      softmaxes = [np.squeeze(s) for s in softmaxes]
      self.softmaxCache = softmaxes
      
      return softmaxes
   
    def getPolicy(self, softmaxes=None):
      """
      Generates a policy and the corresponding selectionMask      
      based on the passed softmaxes or the softmaxes from the previous
      time the model was used to create softmaxes
      
      Args:
        softmaxes: a list of one-dimensional np.arrays, each 3 corresponding to
          [probabilities of selecting each transformFunction type],
          [probabilities of selecting each probability],
          [probabilities of selecting each magnitute],
          ...
      
      Returns:
        policy: is a list of Subpolicies, each with Operations, selected based
          on the probabilities passed in softmaxes
        selectionMask is a single-dimension binary array, indicating
          which of the options was selected

      
      """
      if softmaxes is None:
        if self.softmaxCache is None:
          softmaxes = self.getSoftmaxes()
        else:
          softmaxes = self.softmaxCache
        
      typeIdentity = np.eye(self.nTransforms)
      probIdentity = np.eye(self.nProbabilities)
      magIdentity = np.eye(self.nMagnitudes)

      subpolicies = []
      sRow = 0 #I find this easier to interpret
      
      policySelectionMask = []
      for i in range(self.nSubpolicies):
        operations = []
        for j in range(self.nOpsPerSubpolicy):
          typeSelection = np.random.choice(self.nTransforms, p=softmaxes[sRow])
          probSelection = np.random.choice(self.nProbabilities, p=softmaxes[sRow+1])
          magSelection = np.random.choice(self.nMagnitudes, p=softmaxes[sRow+2])
          
          policySelectionMask.extend(typeIdentity[typeSelection])
          policySelectionMask.extend(probIdentity[probSelection])          
          policySelectionMask.extend(magIdentity[magSelection])
          
          transform = self.transforms[typeSelection]
          
          probability = np.linspace(0, 1, self.nProbabilities)[probSelection]
          magnitude = np.linspace(transform[1],
                                  transform[2],
                                  self.nMagnitudes)[magSelection]
          operations.append(Operation(transform[0], probability, magnitude))
          sRow += 3
        subpolicies.append(Subpolicy(operations))
      return subpolicies, policySelectionMask

    def interpretMask(self, mask):
      """
      Takes a selectionMask and returns the policy it indicates.
      This is useful if we want to take the selections which scored well
      and make the corresponding policies/subpolicies
      """
      maskPosition = 0
      fauxSoftmaxes = []

      for i in range(self.nSubpolicies):
        for j in range(self.nOpsPerSubpolicy):
          fauxSoftmaxes.append(mask[maskPosition: maskPosition+self.nTransforms])
          maskPosition += self.nTransforms

          fauxSoftmaxes.append(mask[maskPosition: maskPosition+self.nProbabilities])
          maskPosition += self.nProbabilities

          fauxSoftmaxes.append(mask[maskPosition: maskPosition+self.nMagnitudes])
          maskPosition += self.nMagnitudes
      return self.getPolicy(fauxSoftmaxes)[0]


In [0]:
def augmentGenerator(policy, X, y, batch):
  while True:
    ix = np.arange(len(X))
    np.random.shuffle(ix)
    for i in range(len(X) // batch):
      batchIndexes = ix[i*batch:(i+1)*batch]
      batchX = X[batchIndexes]
      batchy = y[batchIndexes]
      subpolicy = np.random.choice(policy)
      transformedX = subpolicy(batchX)
      transformedX = transformedX.astype(np.float32) / 255
      yield transformedX, batchy

In [6]:
from keras import datasets
from keras.utils import to_categorical

(XtrainFull, ytrainFull), (Xtest, ytest) = datasets.cifar10.load_data()
ix = np.arange(len(XtrainFull))
np.random.shuffle(ix)
Xtrain = XtrainFull[ix[:500]]
ytrain = ytrainFull[ix[:500]]
ytrain = to_categorical(ytrain)
ytest = to_categorical(ytest)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.losses import categorical_crossentropy
from keras.layers import Conv2D, MaxPooling2D

class Child():
  def __init__(self, epochs=256, batchSize=64, inputShape=[32,32,3],
              num_classes=10):
    # architecture from: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py

    self.epochs = epochs
    self.batchSize = batchSize
    self.inputShape = inputShape
    
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu', padding='same',
                     input_shape=inputShape))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same',))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same',))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam',
                  loss=categorical_crossentropy,
                  metrics=['accuracy'])
    
    self.model = model

  def fit(self, generator, nbatches):
    self.model.fit_generator(generator,
                             steps_per_epoch = nbatches,
                             epochs=self.epochs,
                             use_multiprocessing=True,
                             verbose=0)
    return self
  
  def evaluate(self, X, y):
    return self.model.evaluate(X/255., y, verbose=0)[1]
    

In [13]:
Child().model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 16, 16, 64)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 16384)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               2097280   
__________

In [0]:
tic = time.time()
print ('Preparing Controller')
controller = Controller(transforms=transformations, minibatchSize=16, lr=3e-2,
                       nSubpolicies=5)
toc = time.time()
print ("Controller ready, took %ds" % (toc-tic)) 
accuracies = []
selectionMasks = []
savedSoftmaxes = []

for i in range(32):
  print ("Iteration %d" % i)
  softmaxes = controller.getSoftmaxes()
  savedSoftmaxes.append(softmaxes)
  
  for policySample in range(controller.minibatchSize):
    policy, selectionMask = controller.getPolicy(softmaxes)
    
    selectionMasks.append(selectionMask)
    
    child = Child()

    tic = time.time()
    aug = augmentGenerator(policy, Xtrain, ytrain, child.batchSize)
    child.fit(aug, len(Xtrain) // child.batchSize)
    toc = time.time()

    accuracy = child.evaluate(Xtest, ytest)
    print('-> Child accuracy: %.3f (elaspsed time: %ds)' % (accuracy, (toc-tic)))
    accuracies.append(accuracy)
    
  controller.fit(selectionMasks, accuracies)


Preparing Controller
Controller ready, took 4s
Iteration 0
-> Child accuracy: 0.329 (elaspsed time: 64s)
-> Child accuracy: 0.353 (elaspsed time: 50s)
-> Child accuracy: 0.384 (elaspsed time: 50s)
-> Child accuracy: 0.380 (elaspsed time: 52s)
-> Child accuracy: 0.363 (elaspsed time: 55s)
-> Child accuracy: 0.341 (elaspsed time: 52s)
-> Child accuracy: 0.362 (elaspsed time: 52s)
-> Child accuracy: 0.371 (elaspsed time: 52s)
-> Child accuracy: 0.309 (elaspsed time: 52s)
-> Child accuracy: 0.381 (elaspsed time: 53s)
-> Child accuracy: 0.361 (elaspsed time: 58s)
-> Child accuracy: 0.330 (elaspsed time: 63s)
-> Child accuracy: 0.352 (elaspsed time: 65s)
-> Child accuracy: 0.390 (elaspsed time: 65s)
-> Child accuracy: 0.381 (elaspsed time: 52s)
-> Child accuracy: 0.378 (elaspsed time: 59s)
Iteration 1
-> Child accuracy: 0.345 (elaspsed time: 53s)
-> Child accuracy: 0.371 (elaspsed time: 54s)
-> Child accuracy: 0.406 (elaspsed time: 55s)
-> Child accuracy: 0.375 (elaspsed time: 67s)
-> Child 