In this notebook I will be implementing the power linear unit activation function, which was presented [here](https://arxiv.org/pdf/1802.00212.pdf). The main advantages of this activation is its negative value noise robustness. There is a non-zero output for negative inputs, this reduces the bias shift effect. By pushing the mean activation closer to zero, the activation function should be able to get more standardized results.

In [10]:
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops

### Activation Definition

In [20]:
## proof of concept function, implemented in numpy
def polu(x, n=1.5):
    r = x % 1
    if r >= 0:
        return r
    else:
        return ((1-r)**(-n)) - 1

np_polu = np.vectorize(polu)

def d_polu(x, n=1.5):
    r = x % 1
    if r >= 0:
        return 1
    else:
        return (n * (1 - r)**(((-1 * n) - 1)))
    
npd_polu = np.vectorize(d_polu)


np_d_polu_32 = lambda x: npd_polu(x, n=1.5).astype(np.float32)

def tf_d_polu(x, n=1.5, name=None):
    with ops.op_scope([x], name, "d_polu") as name:
        y = tf.py_func(np_d_polu_32,
                        [x],
                        [tf.float32],
                        name=name,
                        stateful=False)
        return y[0]
    
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
    # Need to generate a unique name to avoid duplicates:
    rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
    tf.RegisterGradient(rnd_name)(grad)
    g = tf.get_default_graph()
    with g.gradient_override_map({"PyFunc": rnd_name}):
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)

def polugrad(op, grad):
    x = op.inputs[0]

    n_gr = tf_d_polu(x)
    return grad * n_gr  

np_polu_32 = lambda x: np_polu(x, n=1.5).astype(np.float32)

def tf_polu(x, n, name=None):

    with ops.op_scope([x, n], name, "polu") as name:
        y = py_func(np_polu_32,
                        [x],
                        [tf.float32],
                        name=name,
                        grad=polugrad)  # <-- here's the call to the gradient
        return y[0]

### Set Performance Benchmark
In the paper where the activation function is presented, the activation function is tested against the [ReLU activation function](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.165.6419&rep=rep1&type=pdf) and the [ELU activation function](http://image-net.org/challenges/posters/JKU_EN_RGB_Schwarz_poster.pdf). 

Here, I will also test against the ReLU activation function, but also the Leaky ReLU, Parametric ReLU, and SELU. 

*Model Definition*:
I will define a small architecture to be used to classify the CIFAR dataset.

In [None]:
## relevant imports
import keras
from keras.layers import Dense, Conv2D, Dropout, MaxPooling2D, Flatten
from keras.models import Sequential
from keras.datasets import cifar10

In [None]:
batch_size = 128
num_classes = 10
epochs = 20

In [41]:
# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

opt = keras.optimizers.rmsprop(lr=0.01, decay=1e-6)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


##### ReLU Activation

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(64, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Conv2D(128, (3,3), padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Conv2D(64, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Conv2D(32, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Leaky ReLU Activation

In [38]:
from keras.layers.advanced_activations import LeakyReLU

In [43]:
model2 = Sequential()
model2.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model2.add(Activation('relu'))
model2.add(Conv2D(64, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Conv2D(128, (3,3), padding='same'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Conv2D(64, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Conv2D(32, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Flatten())
model2.add(Dense(512))
model2.add(LeakyReLU())
model2.add(Dropout(0.5))
model2.add(Dense(num_classes))
model2.add(Activation('softmax'))

In [None]:
model2.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model2.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Parametric ReLU

In [42]:
from keras.layers.advanced_activations import PReLU

In [45]:
model3 = Sequential()
model3.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model3.add(Activation('relu'))
model3.add(Conv2D(64, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Conv2D(128, (3,3), padding='same'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Conv2D(64, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Conv2D(32, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Flatten())
model3.add(Dense(512))
model3.add(PReLU())
model3.add(Dropout(0.5))
model3.add(Dense(num_classes))
model3.add(Activation('softmax'))

In [None]:
model3.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model3.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Scaled Exponential Linear Unit

In [46]:
model4 = Sequential()
model4.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model4.add(Activation('relu'))
model4.add(Conv2D(64, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Conv2D(128, (3,3), padding='same'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Conv2D(64, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Conv2D(32, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Flatten())
model4.add(Dense(512))
model4.add(Activation('selu'))
model4.add(Dropout(0.5))
model4.add(Dense(num_classes))
model4.add(Activation('softmax'))

In [None]:
model4.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model4.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

### PoLU Activation

In [50]:
polu_model = Sequential()
polu_model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
polu_model.add(Activation('polu'))
polu_model.add(Conv2D(64, (3,3), padding='valid'))
polu_model.add(Activation('polu'))
polu_model.add(Dropout(0.4))
polu_model.add(Conv2D(128, (3,3), padding='same'))
polu_model.add(Activation('polu'))
polu_model.add(Dropout(0.4))
polu_model.add(Conv2D(64, (3,3), padding='valid'))
polu_model.add(Activation('polu'))
polu_model.add(Conv2D(32, (3,3), padding='valid'))
polu_model.add(Activation('polu'))
polu_model.add(Dropout(0.4))
polu_model.add(Flatten())
polu_model.add(Dense(512))
polu_model.add(Activation('polu'))
polu_model.add(Dropout(0.5))
polu_model.add(Dense(num_classes))
polu_model.add(Activation('softmax'))

InvalidArgumentError: You must feed a value for placeholder tensor 'conv2d_42_input' with dtype float and shape [?,32,32,3]
	 [[Node: conv2d_42_input = Placeholder[dtype=DT_FLOAT, shape=[?,32,32,3], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'conv2d_42_input', defined at:
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-50-3758f0bd8235>", line 3, in <module>
    input_shape=x_train.shape[1:]))
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/keras/models.py", line 460, in add
    name=layer.name + '_input')
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 1439, in Input
    input_tensor=tensor)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 1348, in __init__
    name=self.name)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 497, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1599, in placeholder
    return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3091, in _placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Users/ShishirJakati/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'conv2d_42_input' with dtype float and shape [?,32,32,3]
	 [[Node: conv2d_42_input = Placeholder[dtype=DT_FLOAT, shape=[?,32,32,3], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
