In this notebook I will be implementing the power linear unit activation function, which was presented [here](https://arxiv.org/pdf/1802.00212.pdf). The main advantages of this activation is its negative value noise robustness. There is a non-zero output for negative inputs, this reduces the bias shift effect. By pushing the mean activation closer to zero, the activation function should be able to get more standardized results.

In [None]:
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation

### Activation Definition

In [39]:
def polu(x, n=1.5):
    if (x >= 0): return x 
    else: return ((1-x)**(-1* n)) - 1
    
get_custom_objects().update({'polu': Activation(polu)})

### Set Performance Benchmark
In the paper where the activation function is presented, the activation function is tested against the [ReLU activation function](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.165.6419&rep=rep1&type=pdf) and the [ELU activation function](http://image-net.org/challenges/posters/JKU_EN_RGB_Schwarz_poster.pdf). 

Here, I will also test against the ReLU activation function, but also the Leaky ReLU, Parametric ReLU, and SELU. 

*Model Definition*:
I will define a small architecture to be used to classify the CIFAR dataset.

In [None]:
## relevant imports
import keras
from keras.layers import Dense, Conv2D, Dropout, MaxPooling2D, Flatten
from keras.models import Sequential
from keras.datasets import cifar10

In [None]:
batch_size = 128
num_classes = 10
epochs = 20

In [41]:
# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

opt = keras.optimizers.rmsprop(lr=0.01, decay=1e-6)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


##### ReLU Activation

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(64, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Conv2D(128, (3,3), padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Conv2D(64, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Conv2D(32, (3,3), padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Leaky ReLU Activation

In [38]:
from keras.layers.advanced_activations import LeakyReLU

In [43]:
model2 = Sequential()
model2.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model2.add(Activation('relu'))
model2.add(Conv2D(64, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Conv2D(128, (3,3), padding='same'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Conv2D(64, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Conv2D(32, (3,3), padding='valid'))
model2.add(LeakyReLU())
model2.add(Dropout(0.4))
model2.add(Flatten())
model2.add(Dense(512))
model2.add(LeakyReLU())
model2.add(Dropout(0.5))
model2.add(Dense(num_classes))
model2.add(Activation('softmax'))

In [None]:
model2.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model2.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Parametric ReLU

In [42]:
from keras.layers.advanced_activations import PReLU

In [45]:
model3 = Sequential()
model3.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model3.add(Activation('relu'))
model3.add(Conv2D(64, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Conv2D(128, (3,3), padding='same'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Conv2D(64, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Conv2D(32, (3,3), padding='valid'))
model3.add(PReLU())
model3.add(Dropout(0.4))
model3.add(Flatten())
model3.add(Dense(512))
model3.add(PReLU())
model3.add(Dropout(0.5))
model3.add(Dense(num_classes))
model3.add(Activation('softmax'))

In [None]:
model3.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model3.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

##### Scaled Exponential Linear Unit

In [46]:
model4 = Sequential()
model4.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model4.add(Activation('relu'))
model4.add(Conv2D(64, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Conv2D(128, (3,3), padding='same'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Conv2D(64, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Conv2D(32, (3,3), padding='valid'))
model4.add(Activation('selu'))
model4.add(Dropout(0.4))
model4.add(Flatten())
model4.add(Dense(512))
model4.add(Activation('selu'))
model4.add(Dropout(0.5))
model4.add(Dense(num_classes))
model4.add(Activation('softmax'))

In [None]:
model4.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model4.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)