In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import keras.backend as K

Using TensorFlow backend.


In [2]:
((X_train, Y_train), (X_test, Y_test)) = keras.datasets.mnist.load_data()
X_train, X_test = X_train/255, X_test/255

### a compressed matrix neural layer

In [30]:
class CompressedDense(keras.layers.Layer):
    
    def __init__(self, units, freqs=128, **kwargs):
        self.units = units
        self.freqs = freqs
        super(CompressedDense, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.inputs = input_shape[1]
        self.weight_padding = (self.inputs * self.units) - self.freqs
        self.kernel = self.add_weight(
            shape=(1,self.freqs,1),
            initializer='glorot_uniform', name='kernel')
        self.bias = self.add_weight(
            shape=(self.units,),
            initializer='zeros', name='bias')
        super(CompressedDense, self).build(input_shape)
    
    def call(self, inputs):
        weights = tf.spectral.idct(
            K.permute_dimensions(
                K.temporal_padding(
                    self.kernel, (0,self.weight_padding)), (0,2,1)), norm='ortho')
        weights = K.reshape(weights, (self.inputs, self.units))
        return inputs @ weights + self.bias
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)

### baseline

In [47]:
X = X_input = keras.layers.Input((28,28))
X = keras.layers.Reshape((28*28,))(X)
X = keras.layers.Dense(10)(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('softmax')(X)
M_baseline = keras.Model(X_input, X)
M_baseline.compile('adam', 'sparse_categorical_crossentropy', ['acc'])
M_baseline.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        (None, 28, 28)            0         
_________________________________________________________________
reshape_20 (Reshape)         (None, 784)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                7850      
_________________________________________________________________
batch_normalization_29 (Batc (None, 10)                40        
_________________________________________________________________
activation_16 (Activation)   (None, 10)                0         
Total params: 7,890
Trainable params: 7,870
Non-trainable params: 20
_________________________________________________________________


In [48]:
%%time
M_baseline.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=512, epochs=200, callbacks=[
    keras.callbacks.ReduceLROnPlateau('loss', patience=3, verbose=1),
    keras.callbacks.EarlyStopping('loss', patience=10, verbose=1)
])

Train on 60000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200


Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200

Epoch 00097: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200

Epoch 00110: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 111/200
Epoch 112/200
Epoch 113/200

Epoch 00113: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 114/200
Epoch 115/200
Epoch 116/

Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200

Epoch 00120: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 121/200
Epoch 122/200
Epoch 123/200

Epoch 00123: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 124/200
Epoch 125/200
Epoch 126/200

Epoch 00126: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 127/200
Epoch 00127: early stopping
CPU times: user 2min 12s, sys: 9.09 s, total: 2min 21s
Wall time: 1min 55s


<keras.callbacks.History at 0x7f0c1c1342e8>

### compressed test

In [53]:
X = X_input = keras.layers.Input((28,28))
X = keras.layers.Reshape((28*28,))(X)
X = CompressedDense(10, freqs=4096)(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('softmax')(X)
M = keras.Model(X_input, X)
M.compile('rmsprop', 'sparse_categorical_crossentropy', ['acc'])
M.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (None, 28, 28)            0         
_________________________________________________________________
reshape_23 (Reshape)         (None, 784)               0         
_________________________________________________________________
compressed_dense_22 (Compres (None, 10)                4106      
_________________________________________________________________
batch_normalization_32 (Batc (None, 10)                40        
_________________________________________________________________
activation_19 (Activation)   (None, 10)                0         
Total params: 4,146
Trainable params: 4,126
Non-trainable params: 20
_________________________________________________________________


In [54]:
%%time
M.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=512, epochs=200, callbacks=[
    keras.callbacks.ReduceLROnPlateau('loss', patience=3, verbose=1),
    keras.callbacks.EarlyStopping('loss', patience=10, verbose=1)
])

Train on 60000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200


Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200

Epoch 00091: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200

Epoch 00096: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200

Epoch 00102: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200

Epoch 00107: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 108/200
Epoch 109/200
Epoch 110/20


Epoch 00114: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 115/200
Epoch 116/200
Epoch 117/200

Epoch 00117: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-10.
Epoch 118/200
Epoch 119/200
Epoch 120/200

Epoch 00120: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-11.
Epoch 121/200
Epoch 00121: early stopping
CPU times: user 1min 53s, sys: 28 s, total: 2min 21s
Wall time: 1min 43s


<keras.callbacks.History at 0x7f0c216448d0>