In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import colors

Model will take a decimal number and return the binary(ones and zeros) for that number.
</br>
ex:
</br>
    [0,1,0,1] -> 10
</br>
    [1,0,1,0] -> 5
</br>
    [0,1,1,0] -> 6
</br>
    [0,0,0,0] -> 0

The loss function will take the model input and the model output and give a score.

In [364]:
def getBinArray(num,bitDepth):
    trueBinArray = np.zeros(bitDepth)
    for i in range(1,bitDepth+1):
        trueBinArray[bitDepth-i] = num%2
        num = num//2
    return trueBinArray

def binaryLossFunction(binaryArray_pred,number_true):

    l = len(binaryArray_pred)
    trueBinArray = getBinArray(number_true,l)

    loss = 0
    gradient = np.ones(l)
    for i in range(l): 
        y_hat = binaryArray_pred[i]
        y = trueBinArray[i]
        if(y == y_hat):
            loss += 0
        else:
            y_hat = np.clip(y_hat,0.01,.99)
            loss += -(y * np.log(y_hat)) - ((1-y) * (np.log(1-y_hat)))
            gradient[i] = (-2*y + 1)/(y_hat)
    
    #print("\tLossRaw:",loss)

    loss = abs((1/l) * loss)
    gradient = (1/l) * gradient

    #print("\tloss:",loss)



    return loss,gradient



In [369]:
bitDepth = 4
ar = np.random.random(size = bitDepth)
n = np.random.randint(0,2**bitDepth)
print(ar,n)
l1,g = binaryLossFunction(ar,n)

print("loss:",l1)
print("Gradient:",g)
print()

ar -= g
ar = np.clip(ar,0,1)
print(ar,n)
l2,g = binaryLossFunction(ar,n)

print("loss:",l2)
print("diff: ",l1-l2)
print("Gradient:",g)



[0.29710664 0.12555679 0.03463331 0.53428973] 2
loss: 1.1534622326937687
Gradient: [ 0.84144871  1.99113086 -7.21848411  0.46791092]

[0.         0.         1.         0.06637881] 2
loss: 0.017171124389289208
diff:  1.1362911083044795
Gradient: [0.25       0.25       0.25       3.76626235]


In [338]:
#define model
def BuildModel(bitDepth):
    weightInitializer = tf.keras.initializers.RandomNormal(-2,3,seed=0)
    Input_x = tf.keras.layers.Input(shape=(1),name="x")

    denseLayer = tf.keras.layers.Dense(bitDepth,kernel_initializer=weightInitializer)(Input_x)
    #denseLayer = tf.keras.layers.Dense(degree,kernel_initializer=weightInitializer)(denseLayer)
    #denseLayer = tf.keras.layers.Dense(degree)(denseLayer)
    #denseLayer = tf.keras.layers.Dense(degree)(denseLayer)
    outputLayer = tf.keras.layers.Dense(bitDepth,activation='sigmoid',kernel_initializer=weightInitializer)(denseLayer)
    
    return tf.keras.Model(inputs= Input_x,outputs=outputLayer)

class Model_m9(tf.keras.Model):
    def __init__(self,degree):
        super(Model_m9, self).__init__()
        self.model = BuildModel(degree)

    def call(self,data,training = False):
        return self.model(data)

In [330]:
numPoints = 10
data_x = np.random.randint(0,2**bitDepth,size=numPoints)

In [340]:
model = Model_m9(bitDepth)#add one for the constant
model.compile()#, loss= CustomTensorFlowLoss())
print(model.model.summary())
print()
print(model.trainable_weights)
lossArray = []


Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 x (InputLayer)              [(None, 1)]               0         
                                                                 
 dense_6 (Dense)             (None, 4)                 8         
                                                                 
 dense_7 (Dense)             (None, 4)                 20        
                                                                 
Total params: 28
Trainable params: 28
Non-trainable params: 0
_________________________________________________________________
None

[<tf.Variable 'dense_6/kernel:0' shape=(1, 4) dtype=float32, numpy=array([[-7.091102 , -3.4441302, -3.9867108, -1.7389665]], dtype=float32)>, <tf.Variable 'dense_6/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'dense_7/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-

In [None]:
decaySteps = len(data_x)
print(decaySteps)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=.01,decay_steps=decaySteps,decay_rate=0.8,staircase=False)
#lr_schedule = 0.000001
#optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)#lr_schedule)
optimizer = tf.keras.optimizers.SGD(1e-8)


In [357]:
i = np.random.randint(0,len(data_x))
x = np.reshape(data_x[i],(1,1))
y_pred = model.predict(x)
print(x)
print(y_pred)

[[14]]
[[1. 1. 1. 1.]]


In [370]:
@tf.custom_gradient
def loss_fn(modelIn_num,modelOut_bin):
    #create the vectors to hold the values for loss and gradient

    loss_array = np.zeros(modelIn_num.shape)
    grad_array = np.zeros(modelOut_bin.shape)
    for i in range(len(modelIn_num)):
        trueNum = float(modelIn_num[i])
        predictedBin = np.array(modelOut_bin[i])
        lossVal, gradVal = binaryLossFunction(predictedBin,trueNum)

        loss_array[i] = lossVal
        grad_array[i] = gradVal
    

    lossTensor = tf.convert_to_tensor(loss_array,dtype='float32')
    gradTensor = tf.convert_to_tensor(grad_array,dtype='float32')
    
    def grad(upstream):
        pred_grad = (upstream * gradTensor)
        return pred_grad,pred_grad
    
    #print("lossVal = ",lossVal)
    return lossTensor, grad

In [374]:


# Open a GradientTape to record the operations run
# during the forward pass, which enables auto-differentiation.
with tf.GradientTape(persistent=True) as tape:

    # Run the forward pass of the layer.
    # The operations that the layer applies
    # to its inputs are going to be recorded
    # on the GradientTape.
    i = np.random.randint(0,len(data_x))
    x = np.reshape(data_x[i],(1,1))

    y_pred = model.model(x, training=True)  # Logits for this minibatch
    print("\nypred:",y_pred)

    loss_value = loss_fn(x,y_pred)
    print("\nabs loss:",loss_value)

    y_true = np.reshape(getBinArray(x[0],bitDepth),y_pred.shape)
    loss_value_mse = tf.keras.losses.binary_crossentropy(y_true,y_pred)
    print("\nmseLoss:",loss_value_mse)

layerGrad = tape.gradient(loss_value,y_pred,unconnected_gradients='zero')
layerGrad2 = tape.gradient(loss_value_mse,y_pred,unconnected_gradients='zero')

print("\nLoss gradient:",layerGrad)
print()
print("\nLoss gradient MSE:",layerGrad2)

# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)#,output_gradients=loss_gradient,unconnected_gradients='zero')
print("\nGradients:",grads)

grads = tape.gradient(loss_value_mse, model.trainable_weights)#,output_gradients=loss_gradient,unconnected_gradients='zero')
print("\nGradients MSE:",grads)

del tape


ypred: tf.Tensor([[1. 1. 1. 1.]], shape=(1, 4), dtype=float32)

abs loss: tf.Tensor([[1.1512926]], shape=(1, 1), dtype=float32)

mseLoss: tf.Tensor([61.576153], shape=(1,), dtype=float32)

Loss gradient: tf.Tensor([[0.25       0.25       0.25       0.25252524]], shape=(1, 4), dtype=float32)


Loss gradient MSE: tf.Tensor([[0. 0. 0. 0.]], shape=(1, 4), dtype=float32)

Gradients: [<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[-0.,  0.,  0.,  0.]], dtype=float32)>, <tf.Tensor: shape=(4,), dtype=float32, numpy=array([-0.,  0.,  0.,  0.], dtype=float32)>, <tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[-0., -0., -0., -0.],
       [-0., -0., -0., -0.],
       [-0., -0., -0., -0.],
       [-0., -0., -0., -0.]], dtype=float32)>, <tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

Gradients MSE: [<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[ -6.086383 ,  -4.7575665,   9.057434 , -21.932959 ]],
      dtype=float32)>, <tf.Tensor: sha

In [None]:
class CustomTensorflowLoss(tf.keras.losses.Loss):
    def __init__(self):
        super(CustomTensorflowLoss,self).__init__()
    
    def call(self,y_true,y_pred):
        return loss_fn(y_true,y_pred)

In [None]:

epochs = 500 
batchSize = numPoints//epochs
#loss_fn = CustomTensorFlowLoss()
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    batchLoss = []
    batchGradients = []
    for batchNum in range(batchSize):
        dataPoint_x = np.reshape(x_forModel[epoch*batchSize + batchNum,:],(1,PolyDegree+1))
        dataPoint_y = y_forModel[epoch*batchSize + batchNum]

        #print(dataPoint_x)
        #print(dataPoint_y)

        # Iterate over the batches of the dataset.

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape(persistent=True) as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            y_pred = model.model(dataPoint_x, training=True)  # Logits for this minibatch
            
            #with tape.stop_recording(): 
            #print("\nypred:",y_pred)
            loss_value = loss_fn(dataPoint_y,y_pred)
            #loss_value = custom_operation(y_forModel,y_pred)
            #loss_value = foo_custom_grad(y_pred)
            #print("\nabs loss:",loss_value)
            #loss_value = tf.keras.losses.MSE(y_forModel,y_pred)
            batchLoss.append(float(np.mean(loss_value)))
                
            #print("\nmseLoss:",loss_value)
            #print(loss_gradient)

        #layerGrad = tape.gradient(loss_value,y_pred,unconnected_gradients='zero')
        #layerGrad_var = tf.Variable(layerGrad)
        #layerGrad_var.assign(loss_gradient)
        #layerGrad[0] = loss_gradient
        #print(layerGrad_var)
        #print(layerGrad)
        
        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        #grads = tape.gradient(loss_value, model.trainable_weights)#,output_gradients=loss_gradient,unconnected_gradients='zero')
        if(batchNum == 0):
            grads = tape.gradient(loss_value, model.trainable_weights,unconnected_gradients='zero')
        else:
            grads += tape.gradient(loss_value, model.trainable_weights,unconnected_gradients='zero')
        #print()
        #print("\nGradients:",grads)
        #print()
        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        
    optimizer.apply_gradients(zip(grads,model.trainable_weights))
    #print(model.trainable_weights)

    # Log every 200 batches.
    lossArray.append(float(np.mean(batchLoss)))
    print(
        "Training loss at epoch {}: {:.4f}".format(epoch, lossArray[-1]))  
    
    #print("Seen so far: %s samples" % ((step + 1) * batch_size))
del tape

In [None]:
print(model.trainable_weights)

In [None]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-8),loss=tf.keras.losses.MSE)
hist = model.fit(x_forModel,y_forModel,
                 epochs=1,
                 #validation_split=.1,
                 shuffle=True)

In [None]:
def plotHistory(hist):
    fig,ax = plt.subplots(1,1)
    ax.set_title("")
    keys = hist.history.keys()
    y = np.arange(1,hist.params['epochs']+1)
    for key in keys:
        if('output' in key):
            minVal = min(hist.history[key])
            meanVal = np.mean(hist.history[key])
            maxVal = max(hist.history[key])
            if(minVal != maxVal):
                print("{}:\n\tmin:{}\n\tmean:{}\n\tmax:{}".format(key,minVal,meanVal,maxVal))
                #ax.plot(y,hist.history[key],linewidth=0.5,label=key)
        else:
            ax.plot(y,hist.history[key],label=key)
            minVal = min(hist.history[key])
            meanVal = np.mean(hist.history[key])
            maxVal = max(hist.history[key])
            print("{}:\n\tmin:{}\n\tmean:{}\n\tmax:{}".format(key,minVal,meanVal,maxVal))
    
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
    plt.show()

In [None]:
plotHistory(hist)

In [None]:
plt.plot(np.arange(len(lossArray)),lossArray)
plt.yscale('log')
plt.show()
plt.plot(np.arange(len(lossOfAllTrainings)),lossOfAllTrainings)
plt.yscale('log')
plt.show()

In [None]:
#visualizeOutputs

y_pred = model.predict(np.polynomial.polynomial.polyvander(x,PolyDegree))
print(y_pred.shape)
plt.plot(x,y_pred)