In [5]:
from __future__ import print_function
import keras
import operator
import numpy as np
from matplotlib import pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

%load_ext autoreload
%autoreload 2
%matplotlib notebook

#### Load mnist dataset and shape it to be supported by Keras
Taken from Keras example on github

In [2]:
#set up data for training
batch_size = 128
num_classes = 10
epochs = 10

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


#### Our training model
We train our model with a sigmoid activation and floating point weights.  
The model trains fast and gets good accuracy (better than that reported in Yann LeCun's paper!)

In [4]:
#model for training with sigmoid activation
model = Sequential()
model.add(Conv2D(6, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(120, activation='relu'))
model.add(Dense(84, activation='relu'))
model.add(Dense(num_classes, activation='sigmoid'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

#show summary of our model
model.summary()

#train and test our model
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 24, 24, 6)         156       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 16)          2416      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 120)               30840     
_________________________________________________________________
dense_2 (Dense)              (None, 84)                10164     
__________

#### Compile the same model without the sigmoid activation
This model will make the same predicitions without the sigmoid activation.  
Keras can't evaluate a model unless outputs come between -1 and 1 so we'll have to create our own evaluate function.

In [6]:
#model for predicion without sigmoid activation
model2 = Sequential()
model2.add(Conv2D(6, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Conv2D(16, (5, 5), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Flatten())
model2.add(Dense(120, activation='relu'))
model2.add(Dense(84, activation='relu'))
model2.add(Dense(num_classes, activation='linear'))

model2.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [7]:
#manually evaluate our model on a test set
def evaluate_model(model, test_x, test_y):
    num_correct = 0
    for inp, outp in zip(test_x, test_y):
        pred = model2.predict(np.reshape(inp, (1, 28, 28, 1)))
        max_index, max_value = max(enumerate(pred[0]), key=operator.itemgetter(1))
        if int(outp[max_index]) == 1:
            num_correct += 1
    return num_correct #return # correctly predicted

def copy_model_weights(model1, model2):
    for i in range(0, len(model.layers)):
        try:
            model2.layers[i].set_weights(model1.layers[i].get_weights())
        except:
            continue


#### Now I prove that we don't need the sigmoid activation.
I'll predict with model2 (linear activation) using the same weights as our trained model, and the same test set, and we'll get the exact same result!

In [8]:
copy_model_weights(model, model2)
num_correct = evaluate_model(model2, x_test, y_test)
print(num_correct, "predicted correctly")
print(num_correct/10000, "precent predicted correctly")

9879 predicted correctly
0.9879 precent predicted correctly


#### Ok, great, now let's do this with integer weights.
We'll have to create some cool function that allows us to convert our float weights to integer weights

In [9]:
#magic function that applies an operation to every element in a numpy ndarray
def mod_ndarray(array, operation):
    if array.ndim == 1:
        return [operation(x) for x in array]
    else:
        return [mod_ndarray(x, operation) for x in array]

#our first operation on the weights
def mult_256(val):
    return int(val*256)

In [10]:
#let's convert all of our model2 weights to integers!
for i in range(0, len(model.layers)):
    try:
        model2.layers[i].set_weights([np.asarray(mod_ndarray(model.layers[i].get_weights()[0], mult_256), dtype=int), np.asarray(mod_ndarray(model.layers[i].get_weights()[1], mult_256), dtype=int)])
    except:
        continue

In [11]:
#using floating point inputs
num_correct = evaluate_model(model2, x_test, y_test)
print(num_correct, "predicted correctly")
print(num_correct/10000, "precent predicted correctly")

9879 predicted correctly
0.9879 precent predicted correctly


In [12]:
#this may look too good to be true, but it's real!
#here's proof that our weights are in fact integers:
#I'll print the bias terms for a 2d
print(model2.layers[2].get_config)
print(model2.layers[2].get_weights()[1])

<bound method Conv2D.get_config of <keras.layers.convolutional.Conv2D object at 0x123069dd8>>
[ -2.   4.   3.  -1.   4.   1.  -2.   6.  -4.   0.  -6.   0.  -3.  -1.  14.
   8.]


In [13]:
#using integer inputs
num_correct = evaluate_model(model2, np.asarray(mod_ndarray(x_test, mult_256), dtype=int), y_test)
print(num_correct, "predicted correctly")
print(num_correct/10000, "precent predicted correctly")

9878 predicted correctly
0.9878 precent predicted correctly


#### Ok looks good. Let's save these flattened weights.

In [14]:
fh = open("flat_weights.txt", "w")
for layer in model2.layers:
    wgt = layer.get_weights()
    if wgt:
        weights = wgt[0]
        bias = wgt[1]
        fh.write(layer.get_config()['name']+"\n")
        for s in weights.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for weight in weights.flatten():
            fh.write(str(weight)+" ")
        fh.write("\n")
        for s in bias.shape:
            fh.write(str(s) + " ")
        fh.write("\n")
        for term in bias.flatten():
            fh.write(str(term)+" ")
        fh.write("\n\n")
fh.close() 