In [7]:
import numpy as np
import scipy as scp
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

#Keras Imports
from tensorflow import keras
from tensorflow.keras import backend as K

from tensorflow.keras.datasets import cifar10, mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Layer, Activation, Flatten, BatchNormalization
from tensorflow.keras.models import Sequential, Model,load_model
from tensorflow.keras import activations

import os
import sys


#Utility
# from utils import binarize
sys.path.insert(0, '..')
from binarization_utils import *
# from model_architectures import get_model

#Probability
import tensorflow_probability as tfp


In [8]:
print(tf.__version__)
print(keras.__version__)

2.6.0
2.6.0


In [9]:
dataset='MNIST'
# Train=True
# Evaluate=False

In [10]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# convert class vectors to binary class matrices
X_train = X_train.reshape(-1,784)
X_test = X_test.reshape(-1,784)

#Restructure Datea
X_train=X_train.astype(np.float32)
X_test=X_test.astype(np.float32)
# Y_train = to_categorical(y_train, 10)
# Y_test = to_categorical(y_test, 10)
X_train /= 255
X_test /= 255
X_train=2*X_train-1
X_test=2*X_test-1


#Train Val Split
X_train,X_val,y_train, y_val = train_test_split(X_train,y_train,test_size = 0.10)
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(y_train.shape[0], 'train samples')

print(X_val.shape[0], 'val samples')
print(y_val.shape[0], 'val samples')

print(X_test.shape[0], 'test samples')
print(y_test.shape[0], 'test samples')


X_train shape: (54000, 784)
54000 train samples
54000 train samples
6000 val samples
6000 val samples
10000 test samples
10000 test samples


In [22]:

# def computeP(n,k, mu_L, mu_H, var_L, var_H, threshold):
#     """
#     Computes probability of error for a specific input with final inner product k
    
#     n : Length of vector
#     k : input
#     mu : Mean
#     var : Variance
    
#     _h: High state
#     _l: low state
#     """

#     p01 = []
#     p10 = []
#     means = [m * mu_L + (n_dists - 1 - m) * mu_H for m in range(0, n_dists)]
#     variances = [m**2 * var_L + (n_dists - 1 - m)**2 * var_H for m in range(0, n_dists)]
#     for t in range(0,n_dists):
#         if t < n_dists - 1 - t:
#             p01 = p01 + [1 - norm.cdf(threshold, means[t], np.sqrt(variances[t]))]
#         if t > n_dists - 1 - t:
#             p10 = p10 + [norm.cdf(threshold, means[t], np.sqrt(variances[t]))]
#     return p01, p10


class Sign_layer(Layer):
    def __init__(self, levels=1,**kwargs):
        self.levels=levels
        super(Sign_layer, self).__init__(**kwargs)
    def build(self, input_shape):
        ars=np.arange(self.levels)+1.0
        ars=ars[::-1]
        means=ars/np.sum(ars)
        self.means=[K.variable(m) for m in means]
        self._trainable_weights = self.means
        
    def call(self, x, mask=None):
        resid = x
        out_bin=0
        for l in range(self.levels):
            out=binarize(resid)*(K.abs(self.means[l]))
#             print(out)
            out_bin=out_bin+out
            resid=resid-out
        return out_bin
    
        # the following lines were an idea to implement flips using tensor operations
        '''positive_mask = tf.cast(out_bin > 0, tf.float32)
        negative_mask = tf.cast(out_bin < 0, tf.float32)
        
        positive_flips = tf.random.uniform(out_bin.shape) < p[1]
        positives = tf.math.multiply(positive_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[1], tf.float32) - 1))
        negatives = tf.math.multiply(negative_mask, (tf.cast(tf.random.uniform(out_bin.shape) < p[0], tf.float32) - 1))
        return'''

    def get_output_shape_for(self,input_shape):
        return input_shape
    def compute_output_shape(self,input_shape):
        return input_shape
    def set_means(self,X):
        means=np.zeros((self.levels))
        means[0]=1
        resid=np.clip(X,-1,1)
        approx=0
        for l in range(self.levels):
            m=np.mean(np.absolute(resid))
            out=np.sign(resid)*m
            approx=approx+out
            resid=resid-out
            means[l]=m
            err=np.mean((approx-np.clip(X,-1,1))**2)

        means=means/np.sum(means)
        sess=K.get_session()
        sess.run(self.means.assign(means))
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'levels': self.levels
        })
        return config

class binary_dense_error_var(Layer):
    def __init__(self,n_in,n_out,error_type,error_dict,**kwargs):
        self.n_in=n_in
        self.n_out=n_out
        self.error_type=error_type
        self.error_dict= error_dict
        assert self.error_type in ["NoError", "Flip","InnerDot"], "error_type of {0} is invalid".format(error_type)
        super(binary_dense_error_var,self).__init__(**kwargs)
        
        if(error_type == "InnerDot"):
            max_pop = self.n_in
            self.uh = self.error_dict["uh"]
            self.ul = self.error_dict["ul"]
            self.sigl = self.error_dict["sigl"]
            self.sigh = self.error_dict["sigh"]

            #Not Used, Mainly used for information
            means = [m * self.ul + (max_pop - m) * self.uh for m in range(0, max_pop+1)]
            stdvs = [np.sqrt(m * (self.sigl**2) + (max_pop - m)*(self.sigh**2)) for m in range(0, max_pop+1)]
            self.distributions = [tfp.distributions.Normal(loc= m, scale = sig) for m,sig in zip(means,stdvs)]
    
        elif(error_type == "Flip"):
            self.p = self.error_dict["p"]
            
    def build(self, input_shape):
        stdv=1/np.sqrt(self.n_in)
        w = np.random.normal(loc=0.0, scale=stdv,size=[self.n_in,self.n_out]).astype(np.float32)
        self.w=K.variable(w)
        self.gamma_w=K.variable(1.0)
        self._trainable_weights=[self.w,self.gamma_w]
        
        
#         if(error_type == "InnerDot"):
#             max_pop = self.n_in+1
#             self.uh = self.error_dict["uh"]
#             self.ul = self.error_dict["ul"]
#             self.sigl = self.error_dict["sigl"]
#             self.sigh = self.error_dict["sigh"]

#             means = [m * ul + (max_pop - 1 - m) * uh for m in range(0, max_pop+1)]
#             stdvs = [np.sqrt(m * (sigl**2) + (max_pop - 1 - m)*(sigh**2)) for m in range(0, max_pop+1)]
#             self.distributions = [tfp.distributions.Normal(loc= m, scale = sig) for m,sig in zip(means,stdvs)]
            
    def call(self, x,mask=None):
        
        #Designed with Batch norm in mind, Binarization done after. 
        if(self.error_type == "NoError"):
            self.clamped_w=binarize(self.w)
            self.prod=K.abs(self.gamma_w)*K.dot(x,self.clamped_w)
            self.out = self.prod
            return self.out
        elif(self.error_type == "Flip"):
            self.clamped_w=binarize(self.w)*((2*tf.cast(tf.random.uniform(self.w.shape) > self.p, tf.float32)) - 1)
            self.prod=K.abs(self.gamma_w)*K.dot(x,self.clamped_w)
            self.out = self.prod
            return self.out
        elif(self.error_type == "InnerDot"):
            #No Error Model first
            self.clamped_w=binarize(self.w)
            self.clamped_x=binarize(x)
            gamma_x = tf.math.maximum(K.abs(tf.math.reduce_min(x)),K.abs(tf.math.reduce_max(x)))
            #self.prod = K.dot(x,self.clamped_w)
            self.pop = (K.dot(self.clamped_x,self.clamped_w)+self.n_in)/2

            #Add Error 
            #Change operations to popcount
#             pop_constraint_gamma = (self.gamma+self.n_in)/2
#             self.pop = (self.prod+self.n_in)/2
            self.dist = tfp.distributions.Normal(loc = self.pop * self.ul + (self.n_in - self.pop) * self.uh,
                                                 scale= tf.math.sqrt(self.pop * (self.sigl**2) + (self.n_in - self.pop)*(self.sigh**2)))
            self.samps = self.dist.sample(1)
#             print(self.samps.shape)
#             tf.print(pop_constraint_gamma)
            self.samps = tf.squeeze(self.samps, axis = 0)
#             tf.print(self.samps)
#             print(self.samps.shape)
            self.out = K.abs(self.gamma_w)*gamma_x*(2*self.samps-self.n_in)
            return self.out
        
        
        
        #No Batch Norm
#         if(self.error_type == "NoError"):
#             self.clamped_w=binarize(self.w)
#             self.prod=K.dot(x,self.clamped_w)-self.gamma
#             self.out = binarize(self.prod)
#             return self.out
#         elif(self.error_type == "Flip"):
#             self.clamped_w=binarize(self.w)*((2*tf.cast(tf.random.uniform(self.w.shape) > self.p, tf.float32)) - 1)
#             self.prod=K.dot(x,self.clamped_w)-self.gamma
#             self.out = binarize(self.prod)
#             return self.out
#         elif(self.error_type == "InnerDot"):
#             #No Error Model first
#             self.clamped_w=binarize(self.w)
#             self.prod = K.dot(x,self.clamped_w)
            
#             #Add Error 
#             #Change operations to popcount
#             pop_constraint_gamma = (self.gamma+self.n_in)/2
#             self.pop = (self.prod+self.n_in)/2
#             self.dist = tfp.distributions.Normal(loc = self.pop * self.ul + (self.n_in - self.pop) * self.uh,
#                                                  scale= tf.math.sqrt(self.pop * (self.sigl**2) + (self.n_in - self.pop)*(self.sigh**2)))
#             self.samps = self.dist.sample(1)
# #             print(self.samps.shape)
#             tf.print(pop_constraint_gamma)
#             self.samps = tf.squeeze(self.samps, axis = 0)
#             tf.print(self.samps)
# #             print(self.samps.shape)
#             self.out = binarize(self.samps-pop_constraint_gamma)
#             return self.out
        
    
    def get_output_shape_for(self,input_shape):
        return (self.n_in, self.n_out)
    
    
    def compute_output_shape(self,input_shape):
        return (self.n_in, self.n_out)
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'n_in': self.n_in,
            'n_out': self.n_out,
            'error_type': self.error_type,
            'error_dict': self.error_dict
        })
        return config

In [41]:
#Testing the Code
#Model Parameters
# 0 : No error, params ignored
# 1 : Flip error, only p 

# 2 : InnerProd error
# error_type = "NoError"
# error_dict = {}

#Flip Probabiliy
p=0.1
# error_type = "Flip"
# error_dict = {"p": p }

#InnerProd paramslayer.call([0,0,0])
ul = 1
uh = 0.01
sigl = 0.01
sigh = 0.01

error_type = "InnerDot"
error_dict = {"ul" : ul , "sigl" : sigl, "uh" : uh, "sigh" : sigh}

layer = binary_dense_error_var(n_in=700,n_out=256,input_shape=[3],error_type = error_type, error_dict = error_dict)

In [13]:
#Model Construction

def make_MNIST_model(error_type, p = 0.1, ul = 1, sigl = 0.01, uh=0.01, sigh = 0.01 ):
    if(error_type == 0):
        #No Error
        error_type = "NoError"
        error_dict = {}
    elif(error_type == 1):
        #Flip Error
        error_type = "Flip"
        error_dict = {"p": p }
    elif(error_type == 2):
        #InnerDot error 
        error_type = "InnerDot"
        error_dict = {"ul" : ul , "sigl" : sigl, "uh" : uh, "sigh" : sigh}
        
    #Fixed Parameters 
    batch_norm_eps=1e-4
    batch_norm_alpha=0.1#(this is same as momentum)
    
    #Model Def
    #5 Binary dense layers,final layer is softmax
    model=Sequential()
    #First Layer Group
    model.add(binary_dense_error_var(
        n_in=784,n_out=256,input_shape=[784],error_type = error_type, error_dict = error_dict))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Sign_layer(levels=1))

    #Subsequent Dense Layers
    #Change Layer Sizes
#     layer_sizes = [1024,1024]
    layer_sizes = [256,256,256]
    for i in range(len(layer_sizes)):
        model.add(binary_dense_error_var(
            n_in=int(model.output.get_shape()[1]),n_out=layer_sizes[i],error_type = error_type, error_dict = error_dict))
        model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
        model.add(Sign_layer(levels=1))

    
    #Final Layer
    model.add(binary_dense_error_var(
            n_in=int(model.output.get_shape()[1]),n_out=10,error_type = error_type, error_dict = error_dict))
    model.add(BatchNormalization(axis=-1, momentum=batch_norm_alpha, epsilon=batch_norm_eps))
    model.add(Activation('softmax'))
    
    # the following is a workaround so that the model weights can be saved
    # https://github.com/tensorflow/tensorflow/issues/46871
    j = 0
    for w in model.weights:
        w._handle_name = 'model_' + str(j) + w.name
        j = j + 1
        
    
    return model

# Training Script

In [23]:
#Model Parameters
# 0 : No error, params ignored
# 1 : Flip error, only p used
# 2 : InnerProd error
error_type = 2

#Flip Probabiliy
p=0.1

#InnerProd params
ul = 1 #l-low means bits match
uh = 0.1
sigl = 0.01
sigh = 0.01

#Make Model
model = make_MNIST_model(error_type, p = p, ul = ul, sigl = sigl, uh=uh, sigh = sigh )

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
binary_dense_error_var_20 (b (None, 256)               200705    
_________________________________________________________________
batch_normalization_20 (Batc (None, 256)               1024      
_________________________________________________________________
sign_layer_16 (Sign_layer)   (None, 256)               1         
_________________________________________________________________
binary_dense_error_var_21 (b (None, 256)               65537     
_________________________________________________________________
batch_normalization_21 (Batc (None, 256)               1024      
_________________________________________________________________
sign_layer_17 (Sign_layer)   (None, 256)               1         
_________________________________________________________________
binary_dense_error_var_22 (b (None, 256)              

In [24]:
#Training Script
    
#Training parameters
batch_size=50
epochs=100

#Makes Model Name
if(error_type == 0):
    #No Error
    model_name = "error_{0}".format(error_type)
elif(error_type == 1):
    model_name = "error_{0}_p_{1}".format(error_type,p)
elif(error_type == 2):
    #InnerDot error 
    model_name = "error_{0}_ul_{1}_uh_{2}_sigl_{3}_sigh_{4}".format(error_type,ul,uh,sigl,sigh)

if not(os.path.exists('models')):
    os.mkdir('models')
if not(os.path.exists('models/'+model_name)):
    os.mkdir('models/'+model_name)
    
    
# #gather all binary dense and binary convolution layers:
# binary_layers=[]
# for l in model.layers:
#     if isinstance(l,binary_dense) or isinstance(l,binary_conv):
#         binary_layers.append(l)

#Train
lr=0.01
opt = keras.optimizers.Adam(learning_rate=lr,decay=1e-6)#SGD(lr=lr,momentum=0.9,decay=1e-5)
model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])  


weights_path='models/'+model_name+'.h5'
cback=keras.callbacks.ModelCheckpoint(weights_path, monitor='val_accuracy', save_best_only=True,save_weights_only=True)


#Training
if keras.__version__[0]=='2':
    history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_val, y_val), verbose=2,epochs=epochs,callbacks=[cback])
if keras.__version__[0]=='1':
    history=model.fit(X_train, y_train,batch_size=batch_size,validation_data=(X_val, y_val), verbose=2,nb_epoch=epochs,callbacks=[cback])

#Save history    
dic={'hard':history.history}
foo=open('models/'+model_name+'.pkl','wb')
pickle.dump(dic,foo)
foo.close()

Epoch 1/10
1080/1080 - 8s - loss: 1.2929 - accuracy: 0.5614 - val_loss: 1.2386 - val_accuracy: 0.5762
Epoch 2/10
1080/1080 - 5s - loss: 1.0170 - accuracy: 0.6586 - val_loss: 0.8043 - val_accuracy: 0.7297
Epoch 3/10
1080/1080 - 5s - loss: 0.7902 - accuracy: 0.7361 - val_loss: 0.7695 - val_accuracy: 0.7448
Epoch 4/10
1080/1080 - 6s - loss: 0.7917 - accuracy: 0.7347 - val_loss: 0.7533 - val_accuracy: 0.7480
Epoch 5/10
1080/1080 - 5s - loss: 0.7843 - accuracy: 0.7404 - val_loss: 0.7845 - val_accuracy: 0.7402
Epoch 6/10
1080/1080 - 5s - loss: 0.7863 - accuracy: 0.7378 - val_loss: 0.8164 - val_accuracy: 0.7292
Epoch 7/10
1080/1080 - 5s - loss: 0.7839 - accuracy: 0.7383 - val_loss: 0.7453 - val_accuracy: 0.7533
Epoch 8/10
1080/1080 - 5s - loss: 0.7869 - accuracy: 0.7396 - val_loss: 0.7663 - val_accuracy: 0.7463
Epoch 9/10
1080/1080 - 5s - loss: 0.8205 - accuracy: 0.7266 - val_loss: 0.8979 - val_accuracy: 0.6967
Epoch 10/10
1080/1080 - 6s - loss: 0.9067 - accuracy: 0.6935 - val_loss: 0.8610 - 

# Testing Script

In [None]:
#Test Based on Error Type
#Model Parameters
# 0 : No error, params ignored
# 1 : Flip error, only p used
# 2 : InnerProd error
error_type = 0

#Flip Probabiliy
p=0.1

#InnerProd params
ul = 1
uh = 0.01
sigl = 0.001
sigh = 0.001

weights_path = "" #To Specify

if(error_type == 0)
    #No Error
    model=make_MNIST_model(error_type, p = p, ul = ul, sigl = sigl, uh=uh, sigh = sigh )
        
    #Load Weights
    model.load_weights(weights_path)0.001
    
    score=model.evaluate(X_test,y_test,verbose=0)
    print("No Error, test loss was %0.4f, test accuracy was %0.4f"%(p,score[0],score[1]))

elif(errro_type == 1):
    #Flip Error
    probs = np.logspace(-4, -1, 20)
    
    acc_1 = []
    for p in probs:
        
        model=make_MNIST_model(error_type, p = p, ul = ul, sigl = sigl, uh=uh, sigh = sigh )
        
        #Load Weights
        model.load_weights(weights_path)
        
        
        
        score=model.evaluate(X_test,y_test,verbose=0)
        print("With p= %d, test loss was %0.4f, test accuracy was %0.4f"%(p,score[0],score[1]))
        acc_1.append(score[1])
    
elif(error_type == 2)
    #InnerDot Error
    error_type = "InnerDot"
    
    ratio_l_h = np.linspace(1,10,20)
    acc_2 = []
    for ratio in ratio_l_h:
        
        model = make_MNIST_model(error_type, p = p, ul = ul, sigl = sigl, uh=uh, sigh = ratio*sigl )
        
        #Load Weights
        model.load_weights(weights_path)
        
        score=model.evaluate(X_test,y_test,verbose=0)
        print("With ratio %d, test loss was %0.4f, test accuracy was %0.4f"%(ratio,score[0],score[1]))
        acc_2.append(score[1])
