Using Tensor Flow 2.0 to make a simpe feed forward network running on a toy data, and using Gradient tape for backpropagation.

In [1]:
import numpy as np
import tensorflow as tf
import time

The toy data is three different blobs of points spread as a gaussian distribution.

In [2]:
np.random.seed(42)

x1_label0 = np.random.normal(1, 1, (100,1))
x2_label0 = np.random.normal(1, 1, (100,1))
x1_label1 = np.random.normal(5, 1, (100,1))
x2_label1 = np.random.normal(4, 1, (100,1))
x1_label2 = np.random.normal(8, 1, (100,1))
x2_label2 = np.random.normal(0, 1, (100,1))

xs_label0 = np.hstack((x1_label0, x2_label0))
xs_label1 = np.hstack((x1_label1, x2_label1))
xs_label2 = np.hstack((x1_label2, x2_label2))
xs = np.vstack((xs_label0, xs_label1, xs_label2))
labels = np.matrix([[1.,0.,0.]]*len(x1_label0)+[[0.,1.,0.]]*len(x1_label1)+[[0.,0.,1.]]*len(x2_label2))

arr = np.arange(xs.shape[0])
np.random.shuffle(arr)
xs = xs[arr, :]
labels = labels[arr, :]

In [3]:
import matplotlib.pyplot as plt
plt.plot(x1_label0, x2_label0, 'x')
plt.plot(x1_label1, x2_label1, 'o')
plt.plot(x1_label2, x2_label2, '_')
plt.show()

<Figure size 640x480 with 1 Axes>

In [4]:
train_size, num_features = xs.shape
num_labels = 3
hidden_size = 2
learning_rate = 0.01
epochs = 100
mini_batch = 10

In [5]:
def Dense(x, W, b, activation):
    return activation(tf.matmul(x, W) + b)

In [6]:
Wh = tf.Variable(np.random.randn(num_features, hidden_size))
bh = tf.Variable(np.random.randn(hidden_size))
ach = tf.nn.relu

Wo = tf.Variable(np.random.randn(hidden_size, num_labels))
bo = tf.Variable(np.random.randn(num_labels))
aco = tf.nn.softmax

In [7]:
for i in range(epochs):
    print("EPOCH", i)
    start = time.time()
    for j in range(0, train_size, mini_batch):
        x = xs[j:j+mini_batch, :]
        y = labels[j:j+mini_batch, :]
        with tf.GradientTape(persistent=True, watch_accessed_variables=False) as t:
            t.watch([Wh, bh, Wo, bo])
            hidden = Dense(x, Wh, bh, ach)
            out = Dense(hidden, Wo, bo, aco)
            loss = -tf.reduce_sum(y*tf.math.log(out))
        Wh.assign_sub(learning_rate*t.gradient(loss,Wh))
        bh.assign_sub(learning_rate*t.gradient(loss,bh))
        Wo.assign_sub(learning_rate*t.gradient(loss,Wo))
        bo.assign_sub(learning_rate*t.gradient(loss,bo))
        del t
    end = time.time()
    print("loss at end of EPOCH:", loss.numpy())
    print("time taken: {:0.2f}s".format(end-start))
    print("Done with EPOCH")

EPOCH 0
loss at end of EPOCH: 4.66676420592313
time taken: 3.66s
Done with EPOCH
EPOCH 1
loss at end of EPOCH: 2.685293991497298
time taken: 0.23s
Done with EPOCH
EPOCH 2
loss at end of EPOCH: 1.9574193022850985
time taken: 0.22s
Done with EPOCH
EPOCH 3
loss at end of EPOCH: 1.568502035961147
time taken: 0.22s
Done with EPOCH
EPOCH 4
loss at end of EPOCH: 1.2838349501332416
time taken: 0.24s
Done with EPOCH
EPOCH 5
loss at end of EPOCH: 1.0567982332509342
time taken: 0.27s
Done with EPOCH
EPOCH 6
loss at end of EPOCH: 0.8774572923983797
time taken: 0.23s
Done with EPOCH
EPOCH 7
loss at end of EPOCH: 0.7390653971089624
time taken: 0.20s
Done with EPOCH
EPOCH 8
loss at end of EPOCH: 0.6361638409329212
time taken: 0.25s
Done with EPOCH
EPOCH 9
loss at end of EPOCH: 0.5466095345575687
time taken: 0.25s
Done with EPOCH
EPOCH 10
loss at end of EPOCH: 0.4759186620685819
time taken: 0.24s
Done with EPOCH
EPOCH 11
loss at end of EPOCH: 0.4172409931239781
time taken: 0.21s
Done with EPOCH
EPOCH 

loss at end of EPOCH: 0.02123284432476672
time taken: 0.39s
Done with EPOCH
EPOCH 98
loss at end of EPOCH: 0.021057187299612964
time taken: 0.39s
Done with EPOCH
EPOCH 99
loss at end of EPOCH: 0.02088566125470299
time taken: 0.30s
Done with EPOCH


In [8]:
y_model = Dense(Dense(xs, Wh, bh, ach), Wo, bo, aco)
correct_prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Final training accuracy:", accuracy.numpy())

Final training accuracy: 0.99


In [9]:
test_x1_label0 = np.random.normal(1, 1, (10,1))
test_x2_label0 = np.random.normal(1, 1, (10,1))
test_x1_label1 = np.random.normal(5, 1, (10,1))
test_x2_label1 = np.random.normal(4, 1, (10,1))
test_x1_label2 = np.random.normal(8, 1, (10,1))
test_x2_label2 = np.random.normal(0, 1, (10,1))
test_xs_label0 = np.hstack((test_x1_label0, test_x2_label0))
test_xs_label1 = np.hstack((test_x1_label1, test_x2_label1))
test_xs_label2 = np.hstack((test_x1_label2, test_x2_label2))
test_xs = np.vstack((test_xs_label0, test_xs_label1, test_xs_label2))
test_labels = np.matrix([[1.,0.,0.]]*10+[[0.,1.,0.]]*10+[[0.,0.,1.]]*10)

y_test = Dense(Dense(test_xs, Wh, bh, ach), Wo, bo, aco)
correct_prediction = tf.equal(tf.argmax(y_test, 1), tf.argmax(test_labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Final test accuracy:", accuracy.numpy())

Final test accuracy: 1.0
