In [62]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow
import matplotlib
from tensorflow import keras
from tensorflow.keras.datasets import cifar10,mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import sys
sys.path.insert(0, '..')
from binarization_utils import *
from model_architectures import get_model

from tensorflow.keras import activations

In [118]:
print(tf.__version__)
print(keras.__version__)

2.4.4
2.4.0


In [None]:
## this function computes the probabiblities of errors, given the parameters of the resistive memory and the threshold
from scipy.stats import norm


def computeP(n, mu_L, mu_H, var_L, var_H, threshold):
    n_dists = n + 1
    p01 = []
    p10 = []
    means = [m * mu_L + (n_dists - 1 - m) * mu_H for m in range(0, n_dists)]
    variances = [m**2 * var_L + (n_dists - 1 - m)**2 * var_H for m in range(0, n_dists)]
    for t in range(0,n_dists):
        if t < n_dists - 1 - t:
            p01 = p01 + [1 - norm.cdf(threshold, means[t], np.sqrt(variances[t]))]
        if t > n_dists - 1 - t:
            p10 = p10 + [norm.cdf(threshold, means[t], np.sqrt(variances[t]))]
    return p01, p10

In [64]:
dataset='MNIST'
Train=True
Evaluate=False
batch_size=100
epochs=200

In [65]:
if dataset=="MNIST":
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # convert class vectors to binary class matrices
    X_train = X_train.reshape(-1,784)
    X_test = X_test.reshape(-1,784)
    use_generator=False
elif dataset=="CIFAR-10":
    use_generator=True
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [66]:
X_train=X_train.astype(np.float32)
X_test=X_test.astype(np.float32)
Y_train = to_categorical(y_train, 10)
Y_test = to_categorical(y_test, 10)
X_train /= 255
X_test /= 255
X_train=2*X_train-1
X_test=2*X_test-1


print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


X_train shape: (60000, 784)
60000 train samples
10000 test samples


In [67]:
# the following cell block defines the activation layer that simulates the errors

# prob stores the probability of a sign flipping (1 -> -1 or -1 -> 1)
prob = 0
    
class Nonideal_sign(Layer):
    def __init__(self, levels=1,**kwargs):
        self.levels=levels
        super(Nonideal_sign, self).__init__(**kwargs)
    def build(self, input_shape):
        ars=np.arange(self.levels)+1.0
        ars=ars[::-1]
        means=ars/np.sum(ars)
        self.means=[K.variable(m) for m in means]
        self._trainable_weights = self.means
    def call(self, x, mask=None):
        resid = x
        out_bin=0
        for l in range(self.levels):
            out=binarize(resid)*(K.abs(self.means[l])) *((2*tf.cast(tf.random.uniform(self.means[l].shape) > prob, tf.float32)) - 1)
            out_bin=out_bin+out
            resid=resid-out
        return out_bin
    
        # the following lines were an idea to implement flips using tensor operations
        '''positive_mask = tf.cast(out_bin > 0, tf.float32)
        negative_mask = tf.cast(out_bin < 0, tf.float32)
        
        positive_flips = tf.random.uniform(out_bin.shape) < p[1]
        positives = tf.math.multiply(positive_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[1], tf.float32) - 1))
        negatives = tf.math.multiply(negative_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[0], tf.float32) - 1))
        return'''

    def get_output_shape_for(self,input_shape):
        return input_shape
    def compute_output_shape(self,input_shape):
        return input_shape
    def set_means(self,X):
        means=np.zeros((self.levels))
        means[0]=1
        resid=np.clip(X,-1,1)
        approx=0
        for l in range(self.levels):
            m=np.mean(np.absolute(resid))
            out=np.sign(resid)*m
            approx=approx+out
            resid=resid-out
            means[l]=m
            err=np.mean((approx-np.clip(X,-1,1))**2)

        means=means/np.sum(means)
        sess=K.get_session()
        sess.run(self.means.assign(means))
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'levels': self.levels
        })
        return config


In [115]:
# binary dense layer with introduction of errors
class binary_dense_error(Layer):
    def __init__(self,n_in,n_out,p,**kwargs):
        self.n_in=n_in
        self.n_out=n_out
        self.p=p
        super(binary_dense_error,self).__init__(**kwargs)
    def build(self, input_shape):
        stdv=1/np.sqrt(self.n_in)
        w = np.random.normal(loc=0.0, scale=stdv,size=[self.n_in,self.n_out]).astype(np.float32)
        self.w=K.variable(w)
        self.gamma=K.variable(1.0)
        self._trainable_weights=[self.w,self.gamma]

    def call(self, x,mask=None):
        constraint_gamma=K.abs(self.gamma)#K.clip(self.gamma,0.01,10)
        self.clamped_w=constraint_gamma*binarize(self.w)*((2*tf.cast(tf.random.uniform(self.w.shape) > self.p, tf.float32)) - 1)
        self.out=K.dot(x,self.clamped_w)
        return self.out
    def  get_output_shape_for(self,input_shape):
        return (input_shape[0], self.n_out)
    def compute_output_shape(self,input_shape):
        return (input_shape[0], self.n_out)
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_in': self.n_in,
            'n_out': self.n_out,
            'p': self.p
        })
        return config
    
class binary_dense_errordot(Layer):
    def __init__(self,n_in,n_out,**kwargs):
        self.n_in=n_in
        self.n_out=n_out
        super(binary_dense_errordot,self).__init__(**kwargs)
    def build(self, input_shape):
        stdv=1/np.sqrt(self.n_in)
        w = np.random.normal(loc=0.0, scale=stdv,size=[self.n_in,self.n_out]).astype(np.float32)
        self.w=K.variable(w)
        self.gamma=K.variable(1.0)
        self._trainable_weights=[self.w,self.gamma]

    def call(self, x,mask=None):
        constraint_gamma=K.abs(self.gamma)#K.clip(self.gamma,0.01,10)
        self.clamped_w=constraint_gamma*binarize(self.w)
        self.out=K.dot(x,self.clamped_w)
        self.out=self.out*((2*tf.cast(tf.random.uniform(self.out.shape) > self.p, tf.float32)) - 1)
        return self.out
    def  get_output_shape_for(self,input_shape):
        return (input_shape[0], self.n_out)
    def compute_output_shape(self,input_shape):
        return (input_shape[0], self.n_out)
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_in': self.n_in,
            'n_out': self.n_out,
            'p': self.p
        })
        return config

In [112]:
# probability of weights flipping sign
prob = 0.01

# enter the model name
model_name = "newmodel"

if not(os.path.exists('models')):
    os.mkdir('models')
if not(os.path.exists('models/'+model_name)):
    os.mkdir('models/'+model_name)
resid_levels=1
sess=tf.compat.v1.keras.backend.get_session()

resid_levels=1
batch_norm_eps=1e-4
batch_norm_alpha=0.1#(this is same as momentum)

if dataset=="MNIST":
    model=Sequential()
    model.add(binary_dense_error(n_in=784,n_out=256,input_shape=[784],p=prob))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=10,p=prob))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Activation('softmax'))
elif dataset=="CIFAR-10":
    model=Sequential()
    model.add(binary_conv(nfilters=64,ch_in=3,k=3,padding='valid',input_shape=[32,32,3]))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=64,ch_in=64,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(binary_conv(nfilters=128,ch_in=64,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=128,ch_in=128,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(binary_conv(nfilters=256,ch_in=128,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_conv(nfilters=256,ch_in=256,k=3,padding='valid'))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    #model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

    model.add(my_flat())

    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=512))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=512))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Nonideal_sign(levels=resid_levels))
    model.add(binary_dense(n_in=int(model.output.get_shape()[1]),n_out=10))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Activation(activations.softmax))
    
# the following is a workaround so that the model weights can be saved
# https://github.com/tensorflow/tensorflow/issues/46871
j = 0
for w in model.weights:
    w._handle_name = 'model_' + str(j) + w.name
    j = j + 1
    

In [108]:
#gather all binary dense and binary convolution layers:
binary_layers=[]
for l in model.layers:
    if isinstance(l,binary_dense) or isinstance(l,binary_conv):
        binary_layers.append(l)

#gather all residual binary activation layers:
resid_bin_layers=[]
for l in model.layers:
    if isinstance(l,Residual_sign):
        resid_bin_layers.append(l)
lr=0.01
opt = keras.optimizers.Adam(lr=lr,decay=1e-6)#SGD(lr=lr,momentum=0.9,decay=1e-5)
model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])


weights_path='models/'+model_name+'.h5'
cback=keras.callbacks.ModelCheckpoint(weights_path, monitor='val_accuracy', save_best_only=True)
if use_generator:
    if dataset=="CIFAR-10":
        horizontal_flip=True
    datagen = ImageDataGenerator(
        width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=horizontal_flip)  # randomly flip images
    if keras.__version__[0]=='2':
        history=model.fit_generator(datagen.flow(X_train, y_train,batch_size=batch_size),steps_per_epoch=X_train.shape[0]/batch_size,
        epochs=epochs,validation_data=(X_test, y_test),verbose=2,callbacks=[cback])
    if keras.__version__[0]=='1':
        history=model.fit_generator(datagen.flow(X_train, y_train,batch_size=batch_size), samples_per_epoch=X_train.shape[0], 
        epochs=epochs, verbose=2,validation_data=(X_test,y_test),callbacks=[cback])

else:
    if keras.__version__[0]=='2':
        history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_test, y_test), verbose=2,epochs=epochs,callbacks=[cback])
    if keras.__version__[0]=='1':
        history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_test, y_test), verbose=2,nb_epoch=epochs,callbacks=[cback])
dic={'hard':history.history}
foo=open('models/'+model_name+'.pkl','wb')
pickle.dump(dic,foo)
foo.close()

Epoch 1/200


KeyboardInterrupt: 

In [117]:
# model with binary dense and batchnorm layers
prob = 0.01

model=Sequential()
model.add(binary_dense_error(n_in=784,n_out=256,input_shape=[784],p=prob))
model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
model.add(binary_dense_error(n_in=int(model.output.get_shape()[1]),n_out=256,p=prob))
model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
model.add(Activation('softmax'))

j = 0
for w in model.weights:
    w._handle_name = 'model_' + str(j) + w.name
    j = j + 1

lr=0.01
opt = keras.optimizers.Adam(lr=lr,decay=1e-6)#SGD(lr=lr,momentum=0.9,decay=1e-5)
model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])


weights_path='models/'+model_name+'.h5'
cback=keras.callbacks.ModelCheckpoint(weights_path, monitor='val_accuracy', save_best_only=True)

history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_test, y_test), verbose=2,epochs=epochs,callbacks=[cback])

dic={'hard':history.history}
foo=open('models/'+model_name+'.pkl','wb')
pickle.dump(dic,foo)
foo.close()


ValueError: in user code:

    /var/folders/6n/905d1knd5gl5czcs5q6mx57c0000gn/T/ipykernel_50917/435656699.py:52 call  *
        self.out=self.out*((2*tf.cast(tf.random.uniform(self.out.shape) > self.p, tf.float32)) - 1)
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/ops/random_ops.py:289 random_uniform
        shape = tensor_util.shape_tensor(shape)
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/framework/tensor_util.py:1035 shape_tensor
        return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/profiler/trace.py:163 wrapped
        return func(*args, **kwargs)
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1540 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    /Users/justinshao/opt/anaconda3/envs/rebnet/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py:355 _tensor_shape_tensor_conversion_function
        raise ValueError(

    ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 256)
