In [1]:
import tensorflow as tf
import numpy as np

from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Input, ReLU, Flatten, Dense, Activation, Dropout, Softmax, GlobalAveragePooling2D

In [6]:
"""
quantize numpy array
return:
q_x: quantized array
fp_x: reverse quantized array to float
"""
def quantize_array(x, bit_depth=16):
    min_x = x.min() 
    max_x = x.max()

    #find number of integer bits to represent this range
    int_bits = int(np.ceil(np.log2(max(abs(min_x),abs(max_x)))))
    
    if int_bits < 0: int_bits = 0

    frac_bits = bit_depth - 1 - int_bits #remaining bits are fractional bits (1-bit for sign)

    #floating point weights are scaled and rounded to [-128,127], which are used in 
    #the fixed-point operations on the actual hardware (i.e., microcontroller)
    q_x = np.round(x*(2**frac_bits))

    #To quantify the impact of quantized weights, scale them back to
    # original range to run inference using quantized weights
    fp_x = q_x/(2**frac_bits)
    
    return q_x, fp_x, int_bits, frac_bits


"""
compute frac_bits based on max and min list
used for activation quantization
"""
def min_max_quantize(max_list, min_list, bit_depth=16):
    frac_list = []
    for i in range(len(max_list)):
        int_bits = int(np.ceil(np.log2(max(abs(min_list[i]),abs(max_list[i])))))
        if int_bits < 0: int_bits = 0
#         if int_bits > bit_depth-1: int_bits = bit_depth-1    
        
        frac_bits = bit_depth - 1 - int_bits #remaining bits are fractional bits (1-bit for sign)
        frac_list.append(frac_bits)
    return frac_list

In [19]:
# original model
def CNN():
    inputs = Input(shape=(28,28,1))
    
    conv1 = Conv2D(16, 3, padding='same', name='conv1')(inputs)
    conv1 = ReLU()(conv1)
    
    pool1 = MaxPool2D(2)(conv1)
        
    conv2 = Conv2D(32, 3, padding='same', name='conv2')(pool1)
    conv2 = ReLU()(conv2)
    
    pool2 = MaxPool2D(2)(conv2)
    
    conv3 = Conv2D(64, 3, padding='same', name='conv3')(pool2)
    conv3 = ReLU()(conv3)
    
    pool3 = GlobalAveragePooling2D()(conv3)
    
    fc1 = Flatten()(pool3)
    fc1 = Dense(64)(fc1)
    fc1 = ReLU()(fc1)
    
    fc2 = Dense(10)(fc1)
    outputs =Softmax()(fc2)
    
    return Model(inputs, outputs)

# difine a new model with shifting
def CNN_Q(shift=True, bit=16, shift_list=[0, 0, 0, 0, 0]):
    
    if shift:
        assert len(shift_list) == 5
    
    inputs = Input(shape=(28,28,1), name='input')
    
    x = Conv2D(16, 3, padding='same', name='conv1')(inputs)
    if shift:
        x = x / 2**shift_list[0]
        x = tf.floor(x)
        x = tf.clip_by_value(x, -2**(bit-1), 2**(bit-1)-1)
    x = ReLU()(x)
    
    x = MaxPool2D(2)(x)
        
    x = Conv2D(32, 3, padding='same', name='conv2')(x)
    if shift:
        x = x / 2**shift_list[1]
        x = tf.floor(x)
        x = tf.clip_by_value(x, -2**(bit-1), 2**(bit-1)-1)
    x = ReLU()(x)
    
    x = MaxPool2D(2)(x)
    
    x = Conv2D(64, 3, padding='same', name='conv3')(x)
    if shift:
        x = x / 2**shift_list[2]
        x = tf.floor(x)
        x = tf.clip_by_value(x, -2**(bit-1), 2**(bit-1)-1)
    x = ReLU()(x)
    
    x = GlobalAveragePooling2D()(x)
    
    x = Flatten()(x)
    x = Dense(64, name='fc1')(x)
    if shift:
        x = x / 2**shift_list[3]
        x = tf.floor(x)
        x = tf.clip_by_value(x, -2**(bit-1), 2**(bit-1)-1)
    x = ReLU()(x)
    
    x = Dense(10, name='fc2')(x)
    if shift:
        x = x / 2**shift_list[4]
        x = tf.floor(x)
        x = tf.clip_by_value(x, -2**(bit-1), 2**(bit-1)-1)
    
    if shift:
        outputs = x
    else:
        outputs =Softmax()(x)
    
    return Model(inputs, outputs)

def evaluation(model, inputs, target, input_shift=None):
    num = target.shape[0]
    cnt = 0
    
    if input_shift is not None:
        inputs = np.round(inputs*(2**input_shift))
        
    for i in range(num):
        predict = np.argmax(model(inputs[i:i+1])[0])
        label = np.argmax(target[i])
        if predict == label:
            cnt += 1
    acc = cnt / num
    print("accuracy: ", acc)   

In [3]:
x_test = np.load('./dataset/x_test.npy')
y_test = np.load('./dataset/y_test.npy')
x_test = 2*x_test - 1

model = CNN_Q(shift=False)
model.summary()
model.load_weights("./model/model.h5")
evaluation(model, x_test, y_test)

# num = y_test.shape[0]
# cnt = 0
# for i in range(num):
#     predict = np.argmax(model(x_test[i:i+1])[0])
#     label = np.argmax(y_test[i])
#     if predict == label:
#         cnt += 1
# acc = cnt / num
# print("accuracy: ", acc)

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 28, 28, 16)        160       
_________________________________________________________________
re_lu (ReLU)                 (None, 28, 28, 16)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 16)        0         
_________________________________________________________________
conv2 (Conv2D)               (None, 14, 14, 32)        4640      
_________________________________________________________________
re_lu_1 (ReLU)               (None, 14, 14, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)         

In [5]:
layer_names = []
for layer in model.layers:
    if len(layer.get_weights()) != 0:
        layer_names.append(layer.name)
print(layer_names)
layer_num = len(layer_names)

intermediate_output = [model.get_layer("input").output]
for i in range(layer_num):
    intermediate_output.append(model.get_layer(layer_names[i]).output)

intermediate_layer_model = Model(inputs = model.input, outputs = intermediate_output)

['conv1', 'conv2', 'conv3', 'fc1', 'fc2']


In [9]:
length = x_test.shape[0]

max_list = []
min_list = []

for i in range(length):
    pred = intermediate_layer_model(x_test[i: i+1])
    for j in range(len(pred)):
        pred_np = pred[j].numpy()
        max_np = pred_np.max()
        min_np = pred_np.min()
        if len(max_list) != len(pred):
            max_list.append(max_np)
            min_list.append(min_np)
        else:
            if max_np > max_list[j]:
                max_list[j] = max_np
            if min_np < min_list[j]:
                min_list[j] = min_np

In [15]:
frac_bits_activation_list = min_max_quantize(max_list, min_list, bit_depth=16)

print("input: ", frac_bits_activation_list[0])
for i in range(layer_num):
    print(layer_names[i], ":  ", frac_bits_activation_list[i+1])

input:  15
conv1 :   12
conv2 :   9
conv3 :   7
fc1 :   11
fc2 :   9


In [16]:
bit_depth = 16
frac_bits_weight_list = []
frac_bits_bias_list = []
q_weight_list = []
q_bias_list = []

for i in range(layer_num):
    weight, bias = model.get_layer(layer_names[i]).get_weights()
    q_weight, f_weight, int_bits_weight, frac_bits_weight = quantize_array(weight, bit_depth=bit_depth)
    q_bias, f_bias, int_bits_bias, frac_bits_bias = quantize_array(bias, bit_depth=bit_depth)
    
    q_weight_list.append(q_weight)
    q_bias_list.append(q_bias)
    frac_bits_weight_list.append(frac_bits_weight)
    frac_bits_bias_list.append(frac_bits_bias)
    print("**********")
    print(layer_names[i] + " - - weight - - Q"+str(int_bits_weight)+"."+str(frac_bits_weight))
    print(layer_names[i] + " - - bias - - Q"+str(int_bits_bias)+"."+str(frac_bits_bias))

**********
conv1 - - weight - - Q0.15
conv1 - - bias - - Q0.15
**********
conv2 - - weight - - Q1.14
conv2 - - bias - - Q0.15
**********
conv3 - - weight - - Q1.14
conv3 - - bias - - Q0.15
**********
fc1 - - weight - - Q1.14
fc1 - - bias - - Q0.15
**********
fc2 - - weight - - Q1.14
fc2 - - bias - - Q0.15


In [17]:
bias_shift_list = np.array(frac_bits_weight_list) + np.array(frac_bits_activation_list[0:-1]) - np.array(frac_bits_bias_list)
output_shift_list = np.array(frac_bits_weight_list) + np.array(frac_bits_activation_list[0:-1]) - np.array(frac_bits_activation_list[1:])
print(bias_shift_list)
print(output_shift_list)

[15 11  8  6 10]
[18 17 16 10 16]


In [18]:
q_model = CNN_Q(shift_list=output_shift_list)

for i in range(layer_num):
    q_model.get_layer(layer_names[i]).set_weights([q_weight_list[i], q_bias_list[i]*(2**bias_shift_list[i])])

In [20]:
evaluation(q_model, x_test, y_test, input_shift=frac_bits_activation_list[0])

accuracy:  0.9912


In [21]:
q_model.get_weights()

[array([[[[  3805.,  -5958., -19357., -25462.,  17702.,  -4173.,
            -7702.,  22197.,   5664., -10594.,   6790.,  -8613.,
            -5282.,  -8296.,  15614.,    181.]],
 
         [[ -4213.,  14597.,  -4920.,  12246.,  10889., -26768.,
            13048.,  18280.,   8663.,  -8045.,  -8811.,    445.,
            -5404.,  -8906.,  16863.,  15067.]],
 
         [[-23831.,   7545.,  22822.,  10542.,   9010., -20788.,
             3047.,  11497.,   6381.,  -6576., -12929.,  15176.,
            -3646.,  -8591.,  11336.,  17138.]]],
 
 
        [[[  9568., -16465.,  -7097.,  12701.,  -7625.,  25360.,
           -20016.,  17745.,  15052.,   -227.,   9371.,  -5114.,
            -4341.,  -2947.,   6308.,   1838.]],
 
         [[ 22940.,  18937.,   1137.,  20921.,  -6855.,  -1374.,
             3727.,  17453.,  17579.,  -5755.,    697.,  -6362.,
             -881.,  -2954.,  10200., -13213.]],
 
         [[  3554.,  -6191.,  14487.,  -9557.,  -7431.,  -1407.,
            14149.,  -1881.

### save weights

In [23]:
reordered_weight_list = []
for i in range(len(q_weight_list)):
    if len(q_weight_list[i].shape) == 4:
        reordered_weight_list.append(np.moveaxis(q_weight_list[i], 2, 0))
    else:
        reordered_weight_list.append(np.moveaxis(q_weight_list[i], 1, 0))
        

reordered_weight_flatten_list = []
for i in range(len(q_weight_list)):
    if len(reordered_weight_list[i].shape) == 4:
        reordered_weight_flatten_list.append(reordered_weight_list[i].flatten('F'))
    else:
        reordered_weight_flatten_list.append(reordered_weight_list[i].flatten())

In [24]:
f = open("./weight.h", "w")
for i in range(len(q_weight_list)):
    print("********" + layer_names[i] + "********")
    print("Bias Shift: ", bias_shift_list[i])
    print("Output Shift:  ", output_shift_list[i])
    print("Bias:  ", q_bias_list[i].astype(np.int16))
    print("Weight:  ", reordered_weight_flatten_list[i].astype(np.int16))
    
    f.write("#define " + layer_names[i].upper()+ "_WT {" + str(reordered_weight_flatten_list[i].astype(np.int16).tolist())[1:-1] + "}\n")
    f.write("#define " + layer_names[i].upper() + "_BIAS {" + str(q_bias_list[i].astype(np.int16).tolist())[1:-1] + "}\n")
    f.write("#define " + layer_names[i].upper() + "_BIAS_LSHIFT " + str(bias_shift_list[i]) + "\n")
    f.write("#define " + layer_names[i].upper() + "_OUT_RSHIFT " + str(output_shift_list[i]) + "\n\n")
    
f.close()

********conv1********
Bias Shift:  15
Output Shift:   18
Bias:   [   34  8424  7169  7528 -4077  6493  3365 11945  4238 -4259  -107 -1954
 -4130    66  8828 -1501]
Weight:   [  3805   9568 -16052  -4213  22940   7086 -23831   3554   4424  -5958
 -16465   1733  14597  18937  20895   7545  -6191  -1796 -19357  -7097
   4178  -4920   1137  12309  22822  14487   7658 -25462  12701  15027
  12246  20921   4332  10542  -9557 -29017  17702  -7625  -7271  10889
  -6855  -6480   9010  -7431  -4163  -4173  25360  12017 -26768  -1374
  14156 -20788  -1407  11817  -7702 -20016  -9221  13048   3727   1673
   3047  14149   5913  22197  17745  19735  18280  17453   5465  11497
  -1881 -28805   5664  15052 -24021   8663  17579 -26740   6381  13122
  -6729 -10594   -227   9584  -8045  -5755  11803  -6576  -2092   4470
   6790   9371  13784  -8811    697   7296 -12929 -14443   -934  -8613
  -5114  -9823    445  -6362  -9560  15176   7532  18558  -5282  -4341
  -4869  -5404   -881   9515  -3646   5315   

In [26]:
intermediate_output = []
intermediate_output.append(q_model.get_layer('input').output)

for i in range(layer_num):
    intermediate_output.append(q_model.get_layer(layer_names[i]).output)

q_intermediate_layer_model = Model(inputs = q_model.input, outputs = intermediate_output)

sample_output = q_intermediate_layer_model.predict(np.round(x_test[0: 1]*(2**frac_bits_activation_list[0])))

In [27]:
f = open("./sample_input_output.h", "w")
temp = np.squeeze(sample_output[0], axis=0)
temp = temp.transpose(2, 0, 1)
temp = temp.flatten("F")
f.write("#define " + "INPUT_DATA {" + str(temp.astype(np.int16).tolist())[1:-1] + "}\n")

for i in range(layer_num):
    temp = np.squeeze(sample_output[i+1], axis=0)
    print(temp.shape)
    if len(temp.shape) == 3:
        temp = temp.transpose(2, 0, 1)
        temp = temp.flatten("F")
    
    temp = temp.astype(np.int32) >> output_shift_list[i]
    f.write("#define " + layer_names[i].upper()+ " {" + str(temp.tolist())[1:-1] + "}\n") 

f.close()

(28, 28, 16)
(14, 14, 32)
(7, 7, 64)
(64,)
(10,)
