In [1]:
from __future__ import print_function
import keras
import operator
import numpy as np
from matplotlib import pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Lambda
from keras.activations import relu
from keras import backend as K

import tensorflow as tf

%load_ext autoreload
%autoreload 2
%matplotlib notebook

Using TensorFlow backend.
  return f(*args, **kwds)


#### Load mnist dataset and shape it to be supported by Keras
Taken from Keras example on github

In [46]:
#set up data for training
batch_size = 128
num_classes = 10
epochs = 5

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


#### Our training model
We train our model with a sigmoid activation and floating point weights.  
The model trains fast and gets good accuracy (better than that reported in Yann LeCun's paper!)

In [47]:
#model for training with sigmoid activation
model = Sequential()
model.add(Conv2D(6, kernel_size=(5, 5),
                 input_shape=input_shape))
model.add(Activation(relu))#, max_value=1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, (5, 5)))
model.add(Activation(relu))#, max_value=1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(120))
model.add(Activation(relu))#, max_value=1)))
model.add(Dense(84))
model.add(Activation(relu))#, max_value=1)))
model.add(Dense(num_classes, activation='sigmoid'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

#show summary of our model
model.summary()

#train and test our model
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 24, 24, 6)         156       
_________________________________________________________________
activation_13 (Activation)   (None, 24, 24, 6)         0         
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 8, 8, 16)          2416      
_________________________________________________________________
activation_14 (Activation)   (None, 8, 8, 16)          0         
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 256)               0         
__________

In [48]:
for layer in model.layers:
    try:
        print(np.amax(layer.get_weights()[0]), np.amin(layer.get_weights()[0]),
              np.amax(layer.get_weights()[1]), np.amin(layer.get_weights()[1]))
    except:
        continue

0.15961 -0.182806 0.00300721 -0.036846
0.154745 -0.167102 0.0134561 -0.0279432
0.182576 -0.180867 0.0173579 -0.0208082
0.229306 -0.202454 0.0185709 -0.0194909
0.247613 -0.281095 -0.00152354 -0.0298678


#### Same model, discrete weights, without the sigmoid activation
This model will make the same predicitions without the sigmoid activation, and with discrete weights. We ensure we don't cause overflow on the FPGA by dividing by 512 after convolution and dense layers.

In [51]:
val = K.variable(256, name="val")
def lfunc(x):
    return K.round(x/val)

model_discrete = Sequential()
model_discrete.add(Conv2D(6, kernel_size=(5, 5),
                 input_shape=input_shape, activation='relu', use_bias=False))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(MaxPooling2D(pool_size=(2,2)))
model_discrete.add(Conv2D(16, (5, 5), activation='relu', use_bias=False))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(MaxPooling2D(pool_size=(2, 2)))
model_discrete.add(Flatten())
model_discrete.add(Dense(120, activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(Dense(84, activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(Dense(num_classes))

In [52]:
#manually evaluate our model on a test set
def evaluate_model(model, test_x, test_y):
    num_correct = 0
    for inp, outp in zip(test_x, test_y):
        pred = model.predict(np.reshape(inp, (1, 28, 28, 1)))
        max_index, max_value = max(enumerate(pred[0]), key=operator.itemgetter(1))
        if int(outp[max_index]) == 1:
            num_correct += 1
    return num_correct #return # correctly predicted

#magic function that applies an operation to every element in a numpy ndarray
def mod_ndarray(array, operation):
    if array.ndim == 1:
        return [operation(x) for x in array]
    else:
        return [mod_ndarray(x, operation) for x in array]

#turns a value from 0 to 1 into uint(8)
def mult_255(val):
    return round(val*255.0)

In [55]:
for i in range(0, len(model.layers)):        
    try:
        model_discrete.layers[i].set_weights([np.asarray(mod_ndarray(model.layers[i].get_weights()[0], mult_255)), np.asarray(mod_ndarray(model.layers[i].get_weights()[1], mult_255))])
    except:
        try:
            model_discrete.layers[i].set_weights([np.asarray(mod_ndarray(model.layers[i].get_weights()[0], mult_255))])
        except:
            continue
        
num_correct = evaluate_model(model_discrete, x_test, y_test)
print(num_correct, "predicted correctly")
print(num_correct/100, "% predicted correctly")

9726 predicted correctly
97.26 % predicted correctly


#### Ok looks good. Let's save these weights and inputs.
Float weights for OpenCL, discrete weights for the FPGA.

In [171]:
fh = open("inputs.txt", "w")
for inp in x_test.flatten():
    fh.write(str(inp)+"\n")
fh.close()

fh = open("flat_weights_float.txt", "w")
for layer in model.layers:
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        bias = wgt[1]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.T.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for weight in weights.T.flatten():
            fh.write(str(weight)+" ")
        fh.write("\n")
        for s in bias.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for term in bias.flatten():
            fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close()

In [90]:
fh = open("flat_weights_discrete.txt", "w")
for i in range(0, len(model_discrete.layers)):
    layer = model_discrete.layers[i]
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        if 'conv' in layer.get_config()['name']:
            for matrix in weights.T:
                for weight in matrix[0].T.flatten():
                    fh.write(str(weight)+" ")
        else:
            for weight in weights.T.flatten():
                fh.write(str(weight)+" ")
        fh.write("\n")
        if 'conv' in layer.get_config()['name']:
            bias = model.layers[i].get_weights()[1]
            for s in bias.shape:
                fh.write(str(s) + " ")
            fh.write("\n")
            for s in bias.flatten():
                fh.write(str(0) + " ")
        else:
            bias = wgt[1]
            for s in bias.shape:
                fh.write(str(s) + " ")
            fh.write("\n")
            for term in bias.flatten():
                fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close()

In [180]:
print(model_discrete.layers[7].get_weights()[0])

[[-11.   9.   3. ..., -28. -36.  -7.]
 [ -9.   9.  24. ...,  31.  25. -12.]
 [ 20.   8. -25. ..., -13.  13.  20.]
 ..., 
 [ 24.  -1.  15. ..., -22. -21.   7.]
 [-19.  -5.  36. ..., -19.   6.   6.]
 [-28.  -4.  -0. ..., -28.  -8.  10.]]


In [182]:
print(len(model_discrete.layers[7].get_weights()[0]))

256


In [184]:
res = model.predict(x_test)

In [197]:
print(res)

[[  5.19486356e-08   1.39446669e-07   4.76224962e-08 ...,   9.99417067e-01
    1.13776544e-08   2.33410677e-08]
 [  3.31773359e-11   7.70668612e-06   5.19435763e-01 ...,   2.20586160e-16
    1.01457542e-09   1.76390261e-13]
 [  1.51103279e-06   8.23440731e-01   2.09005589e-06 ...,   5.47471864e-05
    2.64673028e-04   1.09076118e-05]
 ..., 
 [  5.85141812e-14   1.29158799e-08   6.53425014e-12 ...,   1.14739555e-06
    2.78659996e-07   7.70947190e-06]
 [  4.48449899e-09   1.20941779e-09   1.61213098e-10 ...,   4.26159289e-07
    1.98695078e-01   8.69585051e-07]
 [  3.79240639e-10   1.95763697e-14   3.98833722e-09 ...,   7.04337847e-14
    9.61708158e-10   1.07173706e-11]]


In [14]:
def keras_get_layer_output(model, layer, test_input):
    """
    Helper method, gives the output matrix from a Keras layer
    """
    get_layer_output = K.function([model.layers[0].input],
                                  [layer.output])
    return get_layer_output([test_input])[0]


In [65]:
np.set_printoptions(suppress=True, threshold=100000)

maxmin = []
for tst in x_test[0:1]:
    testin = np.reshape(tst, (1, 28, 28, 1))
    print("input = ")
    print(tst)
    i = 1
    for layer in model_discrete.layers:
        print("layer", i,  "out:")
        out = keras_get_layer_output(model_discrete, layer, testin)
        print(out)
        maxmin.append(np.amax(out))
        maxmin.append(np.amin(out))
        i+=1

input = 
[[[   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]]

 [[   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]]

 [[   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]]

 [[   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [   0.]
  [  

[[[[  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]]

  [[  0.   0.   0.   0.   0.   0.]
   [  0.   1.   8.   0.   1.  34.]
   [  0.   0.  30.   0.   0.  53.]
   [  0.   0.  39.   0.   2.  27.]
   [  0.   0.  26.  13.  16.   0.]
   [  0.   0.   6.   7.   7.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]
   [  0.   0.   0.   0.   0.   0.]]

  [[  0.   0.   0.   0.   0.   0.]
   [  0.   9.   7.   0.   4.  53.]
   [  0.   0.  28.   0.   0.  83.]
   [  0.   0.  48.   0.   0.  80.]
   [  0.   0.  5

[[[[  0.   0.  26.   0.   2.  25.   0.   0.   0.   0.  21.   0.   0.   0.
      0.   0.]
   [  0.   0.  29.   0.   0.  20.   0.   0.   0.   0.  21.   0.   0.   0.
      0.   0.]
   [  0.   0.  22.   3.   0.  17.   0.   0.   3.   0.  16.   0.   0.   0.
      0.   0.]
   [  0.   0.  22.  15.   0.  19.   0.   0.  12.   0.  10.   0.   0.   0.
      0.   0.]
   [  0.   0.   2.  22.   2.  24.   0.   0.  21.   0.   7.   0.   0.   0.
      0.   0.]
   [  0.   0.   0.  24.   0.  14.   0.   0.  31.   0.  12.   0.   0.   0.
      0.   0.]
   [  0.   0.   0.  25.   5.  20.   0.   0.   3.   0.  24.   0.   0.   0.
      0.   0.]
   [  0.   0.   0.  28.   0.  10.   0.   0.   0.   4.   3.   0.   0.   0.
      0.   0.]]

  [[  0.   6.  37.   0.   0.   0.   0.   0.   0.   1.   3.   0.   0.   5.
      0.   0.]
   [  0.   1.  39.   0.   0.   0.   0.   0.   0.   0.  22.   0.   0.   0.
      0.   0.]
   [  0.   0.  38.   0.   0.   0.   0.   0.   0.   0.  16.   0.   0.   0.
      0.   0.]
   [  0.   0.  27. 

[[  0.   5.   0.  33.   0.   8.  20.   0.   2.  18.   0.   7.   6.   0.
   28.  10.   0.  10.   0.   7.   0.   0.  29.  11.   0.   0.   0.   0.
   20.   0.   0.  23.   0.   0.   0.   0.   0.  17.  13.   0.  12.   0.
   38.   0.   0.   0.   6.   0.   7.   0.   3.   9.   0.  17.  18.  20.
   14.   0.  15.  21.  18.  36.   0.   8.   0.   5.  10.   0.   0.   0.
   31.  30.   0.   0.   0.   0.   0.   0.  16.   0.   8.   0.  16.   3.
    0.   0.   8.  13.  21.  36.   0.   0.   0.   0.  22.   0.   9.   4.
    0.   4.   0.   0.   0.   0.   3.   0.   0.   0.   0.   0.  17.   0.
   11.  10.   3.   0.   0.   0.  33.   0.]]
layer 10 out:
[[  3302.   3668.    331.   1244.    152.      0.      0.      0.   5950.
       0.    434.   3038.   4456.      0.   7320.      0.      0.     21.
       0.      0.   7476.   1570.   4547.   3855.      0.     14.      0.
    5591.      0.    497.   1388.      0.   2674.      0.      0.   1361.
       0.      0.   5287.   5894.   4712.      0.   1833.   5714.   15

In [39]:
print(max(maxmin), min(maxmin))

43459.0 -11748.0


In [64]:
model_discrete.save("model_discrete.h5")

In [79]:
testin = np.reshape(x_test[0], (1, 28, 28, 1))
out = keras_get_layer_output(model_discrete, model_discrete.layers[0], testin)

In [80]:
print(out.shape)

(1, 24, 24, 6)


In [82]:
weights1 = model_discrete.layers[0].get_weights()[0]

In [92]:
print(weights1.T[0])

[[[-10.   0. -12. -43. -28.]
  [ -9.   6. -34.   3. -25.]
  [ -9.  41.  40. -22. -43.]
  [  0. -22.  19.  -7. -26.]
  [ 30.  -9. -14. -19. -35.]]]


In [100]:
for i in range(0, 28):
    for j in range(0, 28):
        print(x_test[0][i][j][0], end=' ')
    print()

0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 84.0 185.0 159.0 151.0 60.0 36.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 222.0 254.0 254.0 254.0 254.0 241.0 198.0 198.0 198.0 198.0 198

In [104]:
for i in range(0, 24):
    for j in range(0, 24):
        print(out.T[0][i][j][0], end=' ')
    print()

0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6057.0 2010.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5120.0 3420.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 4453.0 9080.0 5213.0 1557.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 2137.0 8035.0 5802.0 1791.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 12.0 489.0 2546.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 961.0 0.0 3843.0 510.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 2529.0 0.0 3439.0 1980.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 248.0 
0.0 0.0 0.0 0.0 0.0 0.0 0.0 5637.0 3355.0 1219.0 267.0 0.0 0.0 0.0 

In [105]:
model_discrete.layers[0].get_weights()[0].shape

(5, 5, 1, 6)

In [107]:
model_discrete.layers[3].get_weights()[0].shape

(5, 5, 6, 16)

In [109]:
fh = open("flat_weights_discrete.txt", "w")
for i in range(0, len(model_discrete.layers)):
    layer = model_discrete.layers[i]
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        if 'conv' in layer.get_config()['name']:
            for i in range(0, len(weights[0][0][0])):
                for j in range(0, len(weights[0][0])):
                    for k in range(0, len(weights[0])):
                        for l in range(0, len(weights)):
                            fh.write(str(weights[l][k][j][i]) + " ")
        else:
            for weight in weights.T.flatten():
                fh.write(str(weight)+" ")
        fh.write("\n")
        if 'conv' in layer.get_config()['name']:
            #bias = model.layers[i].get_weights()[1]
            #for s in bias.shape:
            #    fh.write(str(s) + " ")
            #fh.write("\n")
            #for s in bias.flatten():
            #    fh.write(str(0) + " ")
            pass
        else:
            bias = wgt[1]
            for s in bias.shape:
                fh.write(str(s) + " ")
            fh.write("\n")
            for term in bias.flatten():
                fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close()