In [8]:
from __future__ import print_function
import keras
import operator
import numpy as np
from matplotlib import pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Lambda
from keras.activations import relu
from keras import backend as K

import tensorflow as tf

%load_ext autoreload
%autoreload 2
%matplotlib notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Load mnist dataset and shape it to be supported by Keras
Taken from Keras example on github

In [149]:
#set up data for training
batch_size = 128
num_classes = 10
epochs = 10

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


#### Our training model
We train our model with a sigmoid activation and floating point weights.  
The model trains fast and gets good accuracy (better than that reported in Yann LeCun's paper!)

In [150]:
#model for training with sigmoid activation
model = Sequential()
model.add(Conv2D(6, kernel_size=(5, 5),
                 input_shape=input_shape))
model.add(Activation(lambda x: relu(x)))#, max_value=1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, (5, 5)))
model.add(Activation(lambda x: relu(x)))#, max_value=1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(120))
model.add(Activation(lambda x: relu(x)))#, max_value=1)))
model.add(Dense(84))
model.add(Activation(lambda x: relu(x)))#, max_value=1)))
model.add(Dense(num_classes, activation='sigmoid'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

#show summary of our model
model.summary()

#train and test our model
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_75 (Conv2D)           (None, 24, 24, 6)         156       
_________________________________________________________________
activation_112 (Activation)  (None, 24, 24, 6)         0         
_________________________________________________________________
max_pooling2d_69 (MaxPooling (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_76 (Conv2D)           (None, 8, 8, 16)          2416      
_________________________________________________________________
activation_113 (Activation)  (None, 8, 8, 16)          0         
_________________________________________________________________
max_pooling2d_70 (MaxPooling (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_34 (Flatten)         (None, 256)               0         
__________

In [151]:
for layer in model.layers:
    try:
        print(np.amax(layer.get_weights()[0]), np.amin(layer.get_weights()[0]),
              np.amax(layer.get_weights()[1]), np.amin(layer.get_weights()[1]))
    except:
        continue

0.206849 -0.209384 0.0176228 -0.0864162
0.162782 -0.198778 0.0790347 -0.0681564
0.190349 -0.198106 0.0420867 -0.0362527
0.224855 -0.217727 0.0473939 -0.0331164
0.245287 -0.316267 0.0253375 -0.0318747


#### Same model, discrete weights, without the sigmoid activation
This model will make the same predicitions without the sigmoid activation, and with discrete weights. We ensure we don't cause overflow on the FPGA by dividing by 512 after convolution and dense layers.

In [152]:
val = K.variable(512, name="val")
def lfunc(x):
    return x/val

model_discrete = Sequential()
model_discrete.add(Conv2D(6, kernel_size=(5, 5),
                 input_shape=input_shape, activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(MaxPooling2D(pool_size=(2,2)))
model_discrete.add(Conv2D(16, (5, 5), activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(MaxPooling2D(pool_size=(2, 2)))
model_discrete.add(Flatten())
model_discrete.add(Dense(120, activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(Dense(84, activation='relu'))
model_discrete.add(Lambda(lfunc))
#model_discrete.add(Activation(lambda x: relu(x, max_value=65200)))
model_discrete.add(Dense(num_classes))

In [153]:
#manually evaluate our model on a test set
def evaluate_model(model, test_x, test_y):
    num_correct = 0
    for inp, outp in zip(test_x, test_y):
        pred = model.predict(np.reshape(inp, (1, 28, 28, 1)))
        max_index, max_value = max(enumerate(pred[0]), key=operator.itemgetter(1))
        if int(outp[max_index]) == 1:
            num_correct += 1
    return num_correct #return # correctly predicted

#magic function that applies an operation to every element in a numpy ndarray
def mod_ndarray(array, operation):
    if array.ndim == 1:
        return [operation(x) for x in array]
    else:
        return [mod_ndarray(x, operation) for x in array]

#turns a value from 0 to 1 into uint(8)
def mult_255(val):
    return round(val*255.0)

In [154]:
for i in range(0, len(model.layers)):
    try:
        model_discrete.layers[i].set_weights([np.asarray(mod_ndarray(model.layers[i].get_weights()[0], mult_255)), np.asarray(mod_ndarray(model.layers[i].get_weights()[1], mult_255))])
    except IndexError:
        continue #print("index error at i =", i)
        
num_correct = evaluate_model(model_discrete, x_test, y_test)
print(num_correct, "predicted correctly")
print(num_correct/10000, "% predicted correctly")

9804 predicted correctly
0.9804 % predicted correctly


#### Ok looks good. Let's save these weights and inputs.
Float weights for OpenCL, discrete weights for the FPGA.

In [171]:
fh = open("inputs.txt", "w")
for inp in x_test.flatten():
    fh.write(str(inp)+"\n")
fh.close()

fh = open("flat_weights_float.txt", "w")
for layer in model.layers:
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        bias = wgt[1]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.T.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for weight in weights.T.flatten():
            fh.write(str(weight)+" ")
        fh.write("\n")
        for s in bias.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for term in bias.flatten():
            fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close()

In [172]:
fh = open("flat_weights_discrete.txt", "w")
for layer in model_discrete.layers:
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        bias = wgt[1]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        if 'conv' in layer.get_config()['name']:
            for weight in weights.T.flatten():
                fh.write(str(weight)+" ")
        else:
            for weight in weights.T.flatten():
                fh.write
            for i in range(0, len(weights)):
                for j in range(0, len(weights[0])):
                    fh.write(str(weights[i][j]) + " ")
        fh.write("\n")
        for s in bias.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for term in bias.flatten():
            fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close()

In [180]:
print(model_discrete.layers[7].get_weights()[0])

[[-11.   9.   3. ..., -28. -36.  -7.]
 [ -9.   9.  24. ...,  31.  25. -12.]
 [ 20.   8. -25. ..., -13.  13.  20.]
 ..., 
 [ 24.  -1.  15. ..., -22. -21.   7.]
 [-19.  -5.  36. ..., -19.   6.   6.]
 [-28.  -4.  -0. ..., -28.  -8.  10.]]


In [182]:
print(len(model_discrete.layers[7].get_weights()[0]))

256
