In [1]:
import os
import ipdb
%matplotlib inline
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# Gated Pooling 

In [79]:
import tensorflow as tf
from keras import backend as K

# Add new layer in Keras
# https://keunwoochoi.wordpress.com/2016/11/18/for-beginners-writing-a-custom-keras-layer/
# define gated max-average pooling lyaer
class GatedPooling2D(MaxPooling2D):
    def call(self, inputs, fil=32, size =2, learn_option='l/c', mask=None, data_format = 'tf'):
        """Gated pooling operation, responsive
        Combine max pooling and average pooling in a mixing proportion,
        which is obtained from the inner product between the gating mask and the region being
        pooled and then fed through a sigmoid:
           fgate(x) =  sigmoid(w*x)* fmax(x) + (1-sigmoid(w*x))* favg(x)
           arguments:
             inputs: input of shape [batch size, height, width, channels]
             filter: filter size of the input layer, used to initialize gating mask
             size: an integer, width and height of the pooling filter
             learn_option: learning options of gated pooling, include:
                            'l/c': learn a mask per layer/channel
                            'l/r/c': learn a mask per layer/pooling region/channel combined
           return:
             outputs: tensor with the shape of [batch_size, height//size, width//size, channels]
        """
        if learn_option == 'l':
            gating_mask = all_channel_connected2d(inputs)
        if learn_option == 'l/c':
            w_gated = tf.Variable(tf.truncated_normal([size,size,fil,fil]))
            gating_mask = K.conv2d(inputs, w_gated, strides=(size,size), padding=self.padding)
        if learn_option == 'l/r/c':
            gating_mask = locally_connected2d(inputs)

        alpha = tf.sigmoid(gating_mask)

        x1 = K.pool2d(inputs, pool_size = (size, size), strides=(2,2), padding=self.padding, data_format=self.data_format, pool_mode = "max")
        x2 = K.pool2d(inputs,  pool_size = (size, size), strides=(2,2), padding=self.padding, data_format=self.data_format, pool_mode= "avg")
        outputs = tf.add(tf.multiply(x1, alpha), tf.multiply(x2, (1-alpha)))
        return outputs


    #locally connected layer (unshared-weights conv, layer),
    # designed for gated pooling, learn a param "per layer/region/channel"
    def locally_connected2d(x, size = 2):
        """
        The `LocallyConnected2D` layer works similarly
        to the `Convolution2D` layer, except that weights are unshared,
        that is, a different set of filters is applied at each
        different patch of the input.
        NOTE: No bias or activation function applied. No overlapping between sub-region.
        arguments:
            x: 4D tensor with shape: [samples, rows, cols, channels]
            size: width and height of the filter, default 2x2 filter.
                  this is also the length of stride to ensure no overlapping
        returns:
            4D tensor with shape: [samples, new_rows, new_cols, nb_filter]
            `rows` and `cols` values might have changed due to padding.
        """

        xs = []
        _, input_row, input_col, nb_filter = x.get_shape().as_list()
        output_row = input_row //2
        output_col = input_col //2
        nb_row = size
        nb_col = size
        stride_row = size
        stride_col = size
        feature_dim = nb_row * nb_col * nb_filter

        w_shape = (output_row * output_col,
                   nb_row * nb_col * nb_filter,
                   nb_filter)
        mask = tf.Variable(tf.truncated_normal(w_shape, stddev=2./(w_shape[0]*w_shape[1]*2)**0.5))
        for i in range(output_row):
            for j in range(output_col):
                slice_row = slice(i * stride_row,
                                  i * stride_row + nb_row)
                slice_col = slice(j * stride_col,
                                  j * stride_col + nb_col)
                xs.append(tf.reshape(x[:, slice_row, slice_col, :], (1, -1, feature_dim)))
        x_aggregate = tf.concat(0, xs)
        output = tf.matmul(x_aggregate, mask)
        output = tf.reshape(output, (output_row, output_col, -1, nb_filter))
        output = tf.transpose(output, perm=[2, 0, 1, 3])

        return output



    #design for gated pooling, learn a param "per layer" option
    def all_channel_connected2d(x, size=2):
        """
        The all channel connected layer is a modified version of
        Convolutional layer,
        which shares the same weights not only between each patch,
        but also between all channels of the layer input. That is,
        the whole layer only has one filter
        NOTE: 'VALID', no bias, no activation function.
        arguments:
            x: 4D tensor with shape: [batch_size, rows, cols, channels]
            size: width and height of the filter, default 2x2 filter.
                  this is also the length of stride to ensure no overlapping
        returns:
            4D tensor with shape: [batch_size, new_rows, new_cols, nb_filter]
        """

        nb_batch, input_row, input_col, nb_filter = x.get_shape().as_list()
        output_size = input_row //2
        mask = tf.Variable(tf.truncated_normal([size,size,1,1], stddev=2./(size*size*2)**0.5))

        xs = []
        for c in tf.split(x, nb_filter, 3):
            xs.append(tf.nn.conv2d(c, mask, strides=[1,1,1,1], padding='VALID'))
        output = tf.reshape(x, [nb_batch, output_size, output_size, nb_filter])

        return output

# Mixed Pooling 

In [84]:
import tensorflow as tf
from keras.engine.topology import Layer
from keras import backend as K
from keras.layers import MaxPooling2D

# define mixed max-average pooling layer
class MixedPooling2D(MaxPooling2D):
    def call(self, inputs, alpha = -1, size =2, mask=None, data_format = 'tf'):
        
    # def mixed_pooling (inputs, alpha, size=2):
        """Mixed pooling operation, nonresponsive
           Combine max pooling and average pooling in fixed proportion specified by alpha a:
            f mixed (x) = a * f max(x) + (1-a) * f avg(x)
            arguments:
              inputs: tensor of shape [batch size, height, width, channels]
              size: an integer, width and height of the pooling filter
              alpha: the scalar mixing proportion of range [0,1]
            return:
              outputs: tensor of shape [batch_size, height//size, width//size, channels]
        """

        if alpha == -1:
            alpha = tf.Variable(0.0)
        x1 = K.pool2d(inputs, pool_size = (size, size), strides=(2,2), padding=self.padding, data_format=self.data_format, pool_mode = "max")
        x2 = K.pool2d(inputs,  pool_size = (size, size), strides=(2,2), padding=self.padding, data_format=self.data_format, pool_mode= "avg")
        outputs = tf.add(tf.multiply(x1, alpha), tf.multiply(x2, (1-alpha)))

        return outputs

# Setup Data

In [49]:
MNIST_data = '../STN-exp/MNIST-data/'
model_dir = '../STN-exp/models/'

In [90]:
## setup_mnist.py -- mnist data and model loading code
##
## Copyright (C) 2016, Nicholas Carlini <nicholas@carlini.com>.
##
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

import tensorflow as tf
import numpy as np
import os
import pickle
import gzip
import urllib

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.models import load_model

def extract_data(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(num_images*28*28)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
        data = (data / 255) - 0.5
        data = data.reshape(num_images, 28, 28, 1)
        return data

def extract_labels(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8)
    return (np.arange(10) == labels[:, None]).astype(np.float32)

class MNIST:
    def __init__(self):
        if not os.path.exists(MNIST_data):
            os.mkdir(MNIST_data)
            files = ["train-images-idx3-ubyte.gz",
                     "t10k-images-idx3-ubyte.gz",
                     "train-labels-idx1-ubyte.gz",
                     "t10k-labels-idx1-ubyte.gz"]
            for name in files:

                urllib.urlretrieve('http://yann.lecun.com/exdb/mnist/' + name, "MNIST_data/"+name)

        train_data = extract_data(MNIST_data + "//train-images-idx3-ubyte.gz", 60000)
        train_labels = extract_labels(MNIST_data + "/train-labels-idx1-ubyte.gz", 60000)
        self.test_data = extract_data(MNIST_data + "/t10k-images-idx3-ubyte.gz", 10000)
        self.test_labels = extract_labels(MNIST_data + "/t10k-labels-idx1-ubyte.gz", 10000)
        
        VALIDATION_SIZE = 5000
        
        self.validation_data = train_data[:VALIDATION_SIZE, :, :, :]
        self.validation_labels = train_labels[:VALIDATION_SIZE]
        self.train_data = train_data[VALIDATION_SIZE:, :, :, :]
        self.train_labels = train_labels[VALIDATION_SIZE:]
        self.num_classes = train_labels[1:]

        
MNIST_adv_data = '../STN-exp/mnist_adv_data'
class MNIST_Adv:
    def __init__(self):
        if not os.path.exists(MNIST_data):
            os.mkdir(MNIST_data)
        data = MNIST()
        train_data = np.load(MNIST_adv_data+"/adv_inputs.npy")
        train_labels = np.load(MNIST_adv_data+"/true_labels.npy")
#         train_targets = np.load(MNIST_adv_data+"/adv_targets.npy")
        train_data = np.concatenate((train_data, data.train_data))
        train_labels = np.concatenate((train_labels, data.train_labels))
        VALIDATION_SIZE = 90
        
        self.validation_data = train_data[:VALIDATION_SIZE, :, :, :]
        self.validation_labels = train_labels[:VALIDATION_SIZE]
        self.train_data = train_data[VALIDATION_SIZE:, :, :, :]
        self.train_labels = train_labels[VALIDATION_SIZE:]
        self.num_classes = train_labels[1:]

# Train normal baseline models

In [66]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD
from keras.models import Model

import tensorflow as tf
# from setup_mnist import MNIST
# from setup_cifar import CIFAR
import os

def train_normal(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None):

    model = Sequential()
    model.add(Conv2D(params[0], (3, 3),
                            input_shape=data.train_data.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(params[1], (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size = (2,2) ))
    
    model.add(Conv2D(params[2], (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(params[3], (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size = (2,2)  ))

    model.add(Flatten())
    model.add(Dense(params[4]))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(params[5]))
    model.add(Activation('relu'))
    model.add(Dense(10))
    # no softmax
    
    print(model.summary)
   
    if init != None:
        model.load_weights(init)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted/train_temp)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    
    model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)
    

    if file_name != None:
        model.save(file_name)

    return model

# Normal baseline

In [69]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

train_normal(MNIST(), model_dir+"/mnist_baseline", [32, 32, 64, 64, 200, 200], num_epochs=50)


<bound method Sequential.summary of <keras.models.Sequential object at 0x7f35a21caa50>>
Train on 55000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.models.Sequential at 0x7f35a21caa50>

# normal baseline on adv examples

In [None]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

train_normal(MNIST_Adv(), model_dir+"/mnist_baseline_adv", [32, 32, 64, 64, 200, 200], num_epochs=50, init = None)

# Train Mixed Pooilng models

In [85]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD
from keras.models import Model

import tensorflow as tf
# from setup_mnist import MNIST
# from setup_cifar import CIFAR
import os

def train(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None):

    model = Sequential()
    model.add(Conv2D(params[0], (3, 3),
                            input_shape=data.train_data.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(params[1], (3, 3)))
    model.add(Activation('relu'))
    model.add(MixedPooling2D( ))
    
    model.add(Conv2D(params[2], (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(params[3], (3, 3)))
    model.add(Activation('relu'))
    model.add(MixedPooling2D( ))

    model.add(Flatten())
    model.add(Dense(params[4]))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(params[5]))
    model.add(Activation('relu'))
    model.add(Dense(10))
    # no softmax
    
    print(model.summary)
   
    if init != None:
        model.load_weights(init)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted/train_temp)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    
    model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)
    

    if file_name != None:
        model.save(file_name)

    return model

# Mixed Baseline

In [86]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

train(MNIST(), model_dir+"/mnist_mixedpool", [32, 32, 64, 64, 200, 200], num_epochs=50)


<bound method Sequential.summary of <keras.models.Sequential object at 0x7f359b0a8e50>>
Train on 55000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.models.Sequential at 0x7f359b0a8e50>

# Mixedpool on Spatially transformed examples

In [None]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

train(MNIST_Adv(), model_dir+"/mnist_mixedpool_adv", [32, 32, 64, 64, 200, 200], num_epochs=50, init = None)

<bound method Sequential.summary of <keras.models.Sequential object at 0x7f34a8ab5150>>
Train on 55090 samples, validate on 90 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50

# Train GatedPool

In [81]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD
from keras.models import Model

import tensorflow as tf
# from setup_mnist import MNIST
# from setup_cifar import CIFAR
import os

def train_gated(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None):

    model = Sequential()
    model.add(Conv2D(params[0], (3, 3),
                            input_shape=data.train_data.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(params[1], (3, 3)))
    model.add(Activation('relu'))
    model.add(GatedPooling2D(fil=params[1] ))
    
    model.add(Conv2D(params[2], (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(params[3], (3, 3)))
    model.add(Activation('relu'))
    model.add(GatedPooling2D( fil=params[3]))

    model.add(Flatten())
    model.add(Dense(params[4]))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(params[5]))
    model.add(Activation('relu'))
    model.add(Dense(10))
    # no softmax
    
    print(model.summary)
   
    if init != None:
        model.load_weights(init)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted/train_temp)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    
    model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)
    

    if file_name != None:
        model.save(file_name)

    return model

In [82]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

train_gated(MNIST(), model_dir+"/mnist_gatedpool", [32, 32, 64, 64, 200, 200], num_epochs=50)

TypeError: ('Keyword argument not understood:', 'fil')