In [1]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow
import matplotlib
from tensorflow import keras
from tensorflow.keras.datasets import cifar10,mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import sys
sys.path.insert(0, '..')
from binarization_utils import *
from model_architectures import get_model

from tensorflow.keras import activations

In [2]:
dataset='CIFAR-10'
Train=True
Evaluate=False
batch_size=100
epochs=200

In [3]:
if dataset=="MNIST":
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # convert class vectors to binary class matrices
    X_train = X_train.reshape(-1,784)
    X_test = X_test.reshape(-1,784)
    use_generator=False
elif dataset=="CIFAR-10":
    use_generator=True
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [5]:
X_train=X_train.astype(np.float32)
X_test=X_test.astype(np.float32)
Y_train = to_categorical(y_train, 10)
Y_test = to_categorical(y_test, 10)
X_train /= 255
X_test /= 255
X_train=2*X_train-1
X_test=2*X_test-1


print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [6]:
# the following cell block defines the activation layer that simulates the errors

# prob stores the probability of a sign flipping (1 -> -1 or -1 -> 1)
prob = 0
    
class Nonideal_sign(Layer):
    def __init__(self, levels=1,**kwargs):
        self.levels=levels
        super(Nonideal_sign, self).__init__(**kwargs)
    def build(self, input_shape):
        ars=np.arange(self.levels)+1.0
        ars=ars[::-1]
        means=ars/np.sum(ars)
        self.means=[K.variable(m) for m in means]
        self._trainable_weights = self.means
    def call(self, x, mask=None):
        resid = x
        out_bin=0
        for l in range(self.levels):
            out=binarize(resid)*(K.abs(self.means[l]))*((2*tf.cast(tf.random.uniform(self.means[l].shape) > prob, tf.float32)) - 1)
            out_bin=out_bin+out
            resid=resid-out
        return out_bin
    
        # the following lines were an idea to implement flips using tensor operations
        '''positive_mask = tf.cast(out_bin > 0, tf.float32)
        negative_mask = tf.cast(out_bin < 0, tf.float32)
        
        positive_flips = tf.random.uniform(out_bin.shape) < p[1]
        positives = tf.math.multiply(positive_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[1], tf.float32) - 1))
        negatives = tf.math.multiply(negative_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[0], tf.float32) - 1))
        return'''

    def get_output_shape_for(self,input_shape):
        return input_shape
    def compute_output_shape(self,input_shape):
        return input_shape
    def set_means(self,X):
        means=np.zeros((self.levels))
        means[0]=1
        resid=np.clip(X,-1,1)
        approx=0
        for l in range(self.levels):
            m=np.mean(np.absolute(resid))
            out=np.sign(resid)*m
            approx=approx+out
            resid=resid-out
            means[l]=m
            err=np.mean((approx-np.clip(X,-1,1))**2)

        means=means/np.sum(means)
        sess=K.get_session()
        sess.run(self.means.assign(means))

In [10]:
# probability of weights flipping sign
prob = 0

# enter the model name
model_name = "newmodel"

if not(os.path.exists('models')):
    os.mkdir('models')
if not(os.path.exists('models/'+model_name)):
    os.mkdir('models/'+model_name)
resid_levels=1
sess=tf.compat.v1.keras.backend.get_session()

resid_levels=1
batch_norm_eps=1e-4
batch_norm_alpha=0.1#(this is same as momentum)

if dataset=="MNIST":
    model=Sequential()
    model.add(binary_dense(n_in=784,n_out=256,input_shape=[784]))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=256))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=256))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=256))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=10))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Activation('softmax'))
elif dataset=="CIFAR-10":
    model=Sequential()
    model.add(binary_conv(nfilters=64,ch_in=3,k=3,padding='valid',input_shape=[32,32,3]))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=64,ch_in=64,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(binary_conv(nfilters=128,ch_in=64,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=128,ch_in=128,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(binary_conv(nfilters=256,ch_in=128,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=256,ch_in=256,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    #model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(my_flat())

    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=512))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=512))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=10))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Activation(activations.softmax))

In [None]:
#gather all binary dense and binary convolution layers:
binary_layers=[]
for l in model.layers:
    if isinstance(l,binary_dense) or isinstance(l,binary_conv):
        binary_layers.append(l)

#gather all residual binary activation layers:
resid_bin_layers=[]
for l in model.layers:
    if isinstance(l,Residual_sign):
        resid_bin_layers.append(l)
lr=0.01
opt = keras.optimizers.Adam(lr=lr,decay=1e-6)#SGD(lr=lr,momentum=0.9,decay=1e-5)
model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])


weights_path='models/'+model_name+'.h5'
cback=keras.callbacks.ModelCheckpoint(weights_path, monitor='val_acc', save_best_only=True)
if use_generator:
    if dataset=="CIFAR-10":
        horizontal_flip=True
    datagen = ImageDataGenerator(
        width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=horizontal_flip)  # randomly flip images
    if keras.__version__[0]=='2':
        history=model.fit_generator(datagen.flow(X_train, y_train,batch_size=batch_size),steps_per_epoch=X_train.shape[0]/batch_size,
        epochs=epochs,validation_data=(X_test, y_test),verbose=2,callbacks=[cback])
    if keras.__version__[0]=='1':
        history=model.fit_generator(datagen.flow(X_train, y_train,batch_size=batch_size), samples_per_epoch=X_train.shape[0], 
        epochs=epochs, verbose=2,validation_data=(X_test,y_test),callbacks=[cback])

else:
    if keras.__version__[0]=='2':
        history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_test, y_test), verbose=2,epochs=epochs,callbacks=[cback])
    if keras.__version__[0]=='1':
        history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_test, y_test), verbose=2,nb_epoch=epochs,callbacks=[cback])
dic={'hard':history.history}
foo=open('models/'+model_name+'.pkl','wb')
pickle.dump(dic,foo)
foo.close()

2021-11-06 02:26:41.442630: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


Epoch 1/200
500/500 - 216s - loss: 1.8124 - accuracy: 0.0929 - val_loss: 1.6679 - val_accuracy: 0.1238
Epoch 2/200
500/500 - 218s - loss: 1.5256 - accuracy: 0.0943 - val_loss: 1.3532 - val_accuracy: 0.0896
Epoch 3/200
500/500 - 236s - loss: 1.3786 - accuracy: 0.0977 - val_loss: 1.2761 - val_accuracy: 0.0813
Epoch 4/200
500/500 - 217s - loss: 1.2856 - accuracy: 0.1004 - val_loss: 1.2691 - val_accuracy: 0.1194
Epoch 5/200
500/500 - 213s - loss: 1.2088 - accuracy: 0.1014 - val_loss: 1.1942 - val_accuracy: 0.1091
Epoch 6/200
500/500 - 212s - loss: 1.1563 - accuracy: 0.1007 - val_loss: 1.1552 - val_accuracy: 0.1003
Epoch 7/200
500/500 - 215s - loss: 1.0960 - accuracy: 0.1006 - val_loss: 0.9865 - val_accuracy: 0.1024
Epoch 8/200
500/500 - 215s - loss: 1.0578 - accuracy: 0.1003 - val_loss: 1.0711 - val_accuracy: 0.1121
Epoch 9/200
500/500 - 213s - loss: 1.0244 - accuracy: 0.1000 - val_loss: 0.9530 - val_accuracy: 0.1097
Epoch 10/200
500/500 - 214s - loss: 0.9999 - accuracy: 0.1020 - val_loss:

500/500 - 214s - loss: 0.7505 - accuracy: 0.1008 - val_loss: 0.7604 - val_accuracy: 0.1062
Epoch 47/200
500/500 - 214s - loss: 0.7475 - accuracy: 0.1008 - val_loss: 0.7456 - val_accuracy: 0.1099
Epoch 48/200
500/500 - 214s - loss: 0.7463 - accuracy: 0.1006 - val_loss: 0.7759 - val_accuracy: 0.0850
Epoch 49/200
500/500 - 214s - loss: 0.7486 - accuracy: 0.1018 - val_loss: 0.7545 - val_accuracy: 0.0977
Epoch 50/200
500/500 - 214s - loss: 0.7436 - accuracy: 0.1008 - val_loss: 0.7808 - val_accuracy: 0.0875
Epoch 51/200
500/500 - 215s - loss: 0.7375 - accuracy: 0.1013 - val_loss: 0.7507 - val_accuracy: 0.1170
Epoch 52/200
500/500 - 214s - loss: 0.7372 - accuracy: 0.1011 - val_loss: 0.7104 - val_accuracy: 0.1171
Epoch 53/200
500/500 - 213s - loss: 0.7361 - accuracy: 0.1016 - val_loss: 0.7160 - val_accuracy: 0.0967
Epoch 54/200
500/500 - 213s - loss: 0.7378 - accuracy: 0.1023 - val_loss: 0.7469 - val_accuracy: 0.0919
Epoch 55/200
500/500 - 213s - loss: 0.7361 - accuracy: 0.1006 - val_loss: 0.6