# Logic gates using neural networks via tf.keras
https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/

Here we are using 'tf.keras' as recommended. 

https://www.pyimagesearch.com/2019/10/21/keras-vs-tf-keras-whats-the-difference-in-tensorflow-2-0/

This is essentially, keras being included as a submodule inside tensorflow itself.



In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K

## Inputs

In [20]:
inputs = np.array([[0.,0.,1.,1.],[0.,1.,0.,1.]]).T
print(inputs)

[[0. 0.]
 [0. 1.]
 [1. 0.]
 [1. 1.]]


## Expected outputs


In [21]:
# AND function
outputAND = np.array([0.,0.,0.,1.])
outputAND = np.asarray([outputAND]).T
# OR function
outputOR = np.array([0.,1.,1.,1.])
outputOR = np.asarray([outputOR]).T
# NAND function
outputNAND = np.array([1.,1.,1.,0.])
outputNAND = np.asarray([outputNAND]).T
# XOR function
outputXOR = np.array([0.,1.,1.,1.])
outputXOR = np.asarray([outputXOR]).T

## Set initial weights and biases

In [22]:
# Initial guesses for weights
w1 = 0.30
w2 = 0.55
w3 = 0.20
w4 = 0.45
w5 = 0.50
w6 = 0.35
w7 = 0.15
w8 = 0.40
w9 = 0.25

# Initial guesses for biases
b1 = 0.60
b2 = 0.05

# need to use a list instead of a numpy array, since the 
#weight matrices at each layer are not of the same dimensions
weights = [] 
# Weights for layer 1 --> 2
weights.append(np.array([[w1,w4],[w2, w5], [w3, w6]]))
# Weights for layer 2 --> 3
weights.append(np.array([[w7, w8, w9]]))
# List of biases at each layer
biases = []
biases.append(np.array([b1,b1,b1]))
biases.append(np.array([b2]))

weightsOriginal = weights
biasesOriginal = biases

print('Weights matrices: ',weights)
print('Biases: ',biases)





Weights matrices:  [array([[0.3 , 0.45],
       [0.55, 0.5 ],
       [0.2 , 0.35]]), array([[0.15, 0.4 , 0.25]])]
Biases:  [array([0.6, 0.6, 0.6]), array([0.05])]


## Some more settings

In [23]:
nLayers = 2
nSamples = 4
eeta = 0.5

## Define model

In [24]:
# define the keras model
model = Sequential()
model.add(Dense(3, input_dim=2, activation='sigmoid', use_bias=True))
model.add(Dense(1, activation='sigmoid', use_bias=True))

## Model summary

In [25]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 3)                 9         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 4         
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


## Check the initial weights and biases for each layer

Note how the weights matrix is not 3x2 but rather 2x3

In [26]:
for i in range(nLayers):
    print('\n Weights for layer ',i+1)
    print(model.layers[i].get_weights()[0])
    print('\n Biases for layer ',i+1)
    print(model.layers[i].get_weights()[1])
# model.layers[0].get_biases()


 Weights for layer  1
[[-0.45773363 -0.84572554  0.38600314]
 [-0.53165615 -0.2992524   0.24187028]]

 Biases for layer  1
[0. 0. 0.]

 Weights for layer  2
[[ 0.65574324]
 [ 0.8888172 ]
 [-1.047615  ]]

 Biases for layer  2
[0.]


## Change initial weights and biases for each layer

In [27]:
# Layer 1
model.layers[0].set_weights([weightsOriginal[0].T, biasesOriginal[0]])

# Layer 2
model.layers[1].set_weights([weightsOriginal[1].T, biasesOriginal[1]])

## Compile model

In [28]:
# compile the keras model

# In the following manner we can't set the learning rate of the optimizer
# model.compile(loss='mse', optimizer='sgd', metrics=['mse'])

# So use the following instead
model.compile(loss='mse', optimizer=optimizers.SGD(learning_rate=0.5), metrics=['mse'])



## Forward feed

Note: Batch size=4 indicates that we are using all the training data(examples) and therefore this is the equivalent of my own implementation as I don't have stochastic/minibatch gradient descent implementation yet.

References: https://machinelearningmastery.com/how-to-control-the-speed-and-stability-of-training-neural-networks-with-gradient-descent-batch-size/

In [29]:
model.evaluate(inputs, outputAND, batch_size=4)



[0.3434138894081116, 0.3434139]

## Does tf.model.evaluate change the weights and biases?

In [30]:
for i in range(nLayers):
    print('\n Weights for layer ',i+1)
    print(model.layers[i].get_weights()[0])
    print('\n Biases for layer ',i+1)
    print(model.layers[i].get_weights()[1])
# model.layers[0].get_biases()


 Weights for layer  1
[[0.3  0.55 0.2 ]
 [0.45 0.5  0.35]]

 Biases for layer  1
[0.6 0.6 0.6]

 Weights for layer  2
[[0.15]
 [0.4 ]
 [0.25]]

 Biases for layer  2
[0.05]


From the above, we can be sure that it does not change the weights and biases

## Fit 1 epoch  (forward feed, backpropagation, updating the weights, biases)

Let us just try to see and compare the error after just 1 epoch. 

We should expect the model to perform forward feed, calculate loss/error,
perform backpropagation,
and adjust the weights and biases based on the learning rate.

Note: Batch size=4 indicates that we are using all the training data(examples) and therefore this is the equivalent of my own implementation as I don't have stochastic/minibatch gradient descent implementation yet.

References: https://machinelearningmastery.com/how-to-control-the-speed-and-stability-of-training-neural-networks-with-gradient-descent-batch-size/

In [31]:
# fit the keras model on the dataset
model.fit(inputs, outputAND, epochs=1, batch_size=4)

Train on 4 samples


<tensorflow.python.keras.callbacks.History at 0x141d234d0>

## Now the weights and biases must have been updated

In [32]:
for i in range(nLayers):
    print('\n Weights for layer ',i+1)
    print(model.layers[i].get_weights()[0])
    print('\n Biases for layer ',i+1)
    print(model.layers[i].get_weights()[1])
# model.layers[0].get_biases()


 Weights for layer  1
[[0.29931322 0.54829705 0.1988662 ]
 [0.4493876  0.49822634 0.34898397]]

 Biases for layer  1
[0.5969832 0.5921526 0.5948988]

 Weights for layer  2
[[0.08719039]
 [0.33587077]
 [0.1880536 ]]

 Biases for layer  2
[-0.04234041]


## Now let us do a forward feed again and calculate the loss/error

Note: Batch size=4 indicates that we are using all the training data(examples) and therefore this is the equivalent of my own implementation as I don't have stochastic/minibatch gradient descent implementation yet.

References: https://machinelearningmastery.com/how-to-control-the-speed-and-stability-of-training-neural-networks-with-gradient-descent-batch-size/

In [33]:
out = model.evaluate(inputs, outputAND, batch_size=4)
print(out)

[0.30411168932914734, 0.3041117]


### The above result, compares well with the PyTorch result as well as the result from my own implementation (when biases are updated independently)

## Now let us let the model train for 10^4 epochs

Note: Batch size=4 indicates that we are using all the training data(examples) and therefore this is the equivalent of my own implementation as I don't have stochastic/minibatch gradient descent implementation yet.

References: https://machinelearningmastery.com/how-to-control-the-speed-and-stability-of-training-neural-networks-with-gradient-descent-batch-size/

In [34]:
%%time
# fit the keras model on the dataset
history = model.fit(inputs, outputAND, epochs=10**4, batch_size=4, verbose=0)

CPU times: user 38.6 s, sys: 6.03 s, total: 44.7 s
Wall time: 26.6 s


Turns out that this was quite slow. At first, I thought that the problem was that it was printing at each epoch. So I set the verbose=0 (silent). But still, it was incredibly slow compared to my implementation as well as PyTorch, even though we used own for loop in both the cases.

In [17]:
history.history

{'loss': [0.29656221345067024,
  0.23853549733757973,
  0.2210054136812687,
  0.2138444073498249,
  0.21007107011973858,
  0.20859628729522228,
  0.20775542315095663,
  0.20478377677500248,
  0.20751046016812325,
  0.2044722056016326,
  0.20712593756616116,
  0.20543506834656,
  0.20548744313418865,
  0.20535280648618937,
  0.20622337143868208,
  0.20352253038436174,
  0.20606463961303234,
  0.2045597555115819,
  0.20542436838150024,
  0.2050496507436037,
  0.20461109932512045,
  0.2036317829042673,
  0.20374681241810322,
  0.2023001816123724,
  0.2039965558797121,
  0.2046814002096653,
  0.20429792441427708,
  0.20171229168772697,
  0.20431876368820667,
  0.20386942476034164,
  0.2025942075997591,
  0.20337327476590872,
  0.200950532220304,
  0.20339590031653643,
  0.20306825265288353,
  0.2028009295463562,
  0.20043280720710754,
  0.20047559216618538,
  0.2030652090907097,
  0.20181972905993462,
  0.2023759577423334,
  0.20189476292580366,
  0.20090340171009302,
  0.2015999285504222,

## Well, now let us check the output as well as the new weights and biases

In [18]:
out = model.evaluate(inputs, outputAND, batch_size=4, verbose=1)
print(out)

[3.8288468203973025e-05, 3.828847e-05]


In [85]:

for i in range(nLayers):
    print('\n Weights for layer ',i+1)
    print(model.layers[i].get_weights()[0])
    print('\n Biases for layer ',i+1)
    print(model.layers[i].get_weights()[1])
# model.layers[0].get_biases()


 Weights for layer  1
[[-0.06779319  3.6265917  -1.9713066 ]
 [ 0.05656172  3.6560497  -1.9379442 ]]

 Biases for layer  1
[ 0.7420185 -5.2531877  2.5809631]

 Weights for layer  2
[[-0.3004699]
 [ 8.487757 ]
 [-4.293654 ]]

 Biases for layer  2
[-2.5681367]


## Now also let us have a look at the predictions for the sake of the tutorial

In [86]:
# make probability predictions with the model
predictions = model.predict(inputs)
print(predictions)

[[0.00120628]
 [0.01536182]
 [0.01545319]
 [0.97878754]]


## We have seen how to initialize custom weights/biases, perform forward feed, train/optimize the model, and finally how to check the updated parameters.


## Let us now see how we can see the input/output at each layer for debugging purposes.

In [87]:

for i in range(nLayers):
    print('\n Inputs for layer ',i+1)
    print(model.layers[i].input)
    print('\n Outputs for layer ',i+1)
    print(model.layers[i].output)


 Inputs for layer  1
Tensor("dense_4_input:0", shape=(None, 2), dtype=float32)

 Outputs for layer  1
Tensor("dense_4/Identity:0", shape=(None, 3), dtype=float32)

 Inputs for layer  2
Tensor("dense_4/Identity:0", shape=(None, 3), dtype=float32)

 Outputs for layer  2
Tensor("dense_5/Identity:0", shape=(None, 1), dtype=float32)


In [89]:

for i in range(nLayers):
    print('\n Inputs for layer ',i+1)
    print(model.layers[i].input)
    print('\n Outputs for layer ',i+1)
    print(model.layers[i].output)


 Inputs for layer  1

 Outputs for layer  1
Tensor("dense_4/Identity:0", shape=(None, 3), dtype=float32)

 Inputs for layer  2

 Outputs for layer  2
Tensor("dense_5/Identity:0", shape=(None, 1), dtype=float32)


## We can't really see any numbers in the above output
## So we need to do something more

In [98]:
for i in range(nLayers):
    print('\n Inputs for layer ',i+1)
    func = K.function([model.get_layer(index=0).input], model.get_layer(index=i).input)
    layerInput = func([inputs])  # input_data is a numpy array
    print(layerInput)
    print('\n Outputs for layer ',i+1)
    func = K.function([model.get_layer(index=0).input], model.get_layer(index=i).output)
    layerOutput = func([inputs])  # input_data is a numpy array
    print(layerOutput)





 Inputs for layer  1
[[0. 0.]
 [0. 1.]
 [1. 0.]
 [1. 1.]]

 Outputs for layer  1
[[0.67743707 0.0052036  0.9296263 ]
 [0.6896707  0.16838202 0.65543556]
 [0.66244864 0.1642972  0.64786243]
 [0.67497796 0.88385504 0.20944276]]

 Inputs for layer  2
[[0.67743707 0.0052036  0.9296263 ]
 [0.6896707  0.16838202 0.65543556]
 [0.66244864 0.1642972  0.64786243]
 [0.67497796 0.88385504 0.20944276]]

 Outputs for layer  2
[[0.00120628]
 [0.01536182]
 [0.01545319]
 [0.97878754]]


## While the outputs match my own results, the inputs don't seem to be making much sense