[View in Colaboratory](https://colab.research.google.com/github/sebastianjaszczur/sparser-layer/blob/master/Sparser.ipynb)

In [0]:
from keras.datasets import boston_housing

(x_train, y_train), (x_test, y_test) = boston_housing.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/keras-datasets/boston_housing.npz


In [0]:
x_train.shape

(404, 13)

In [0]:
from keras import Sequential

In [0]:
from keras.layers import Dense, Input, Layer, Activation
from keras.models import Model
from keras.optimizers import Adam
from keras.activations import relu
from keras.initializers import RandomNormal 
from keras import backend as K

In [0]:
UID = 1
def uid():
  global UID
  UID += 1
  return UID

In [0]:
# Sparser layer with less weights.
class Sparser(Layer):
    def __init__(self, size, **kwargs):
        self.size = int(size)
        super(Sparser, self).__init__(**kwargs)

    def build(self, input_shape):
        assert(input_shape[1] == self.size)
        super(Sparser, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        def basic_sparse(x):
          if x.shape[1] > 2:
            half = int(x.shape[1] // 2)
            shalf = int(x.shape[1] - half)
            x1 = x[:, :half]
            x2 = x[:, half:]
            
            w = [[self.add_weight(name='w'+str(uid()), shape=(half,),
                                  initializer=RandomNormal(mean=0.0, stddev=0.5, seed=None),
                                  trainable=True)
                  for i in range(2)] for j in range(2)]
            
            y1 = w[0][0] * x1 + w[0][1] * x2
            y2 = w[1][0] * x1 + w[1][1] * x2
            f1 = basic_sparse(y1)
            f2 = basic_sparse(y2)
            return K.concatenate([f1, f2])
          else:
            return x
        return basic_sparse(x)

    def compute_output_shape(self, input_shape):
        return input_shape

In [0]:
def get_model(layer=Dense, lsize=256, lr=0.003):
    inp = Input((13,), dtype="float32")
    prep = Dense(lsize)
    prep.trainable = False
    prep = prep(inp)
    mid = layer(lsize)(prep)
    mid = Activation('relu')(mid)
    mid = layer(lsize)(mid)
    mid = Activation('relu')(mid)
    mid = layer(lsize)(mid)
    mid = Activation('relu')(mid)
    final = Dense(1)
    final.trainable = False
    final = final(mid)

    model = Model(inputs=inp, outputs=final)
    adam = Adam(lr=lr)
    model.compile(optimizer=adam,
                  loss='mse')
    return model

In [0]:
model = get_model(Dense, 128)
model.fit(x_train, y_train, epochs=10, batch_size=4, validation_data=(x_test, y_test))

Train on 404 samples, validate on 102 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f45e0b9cb38>

In [0]:
model = get_model(Sparser, 128)
model.fit(x_train, y_train, epochs=10, batch_size=4, validation_data=(x_test, y_test))

Train on 404 samples, validate on 102 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f45e1e6bf98>

In [0]:
(sum(int(w.shape[0]) for w in model.weights) - 129*14) / 3


1024.0