<a href="https://colab.research.google.com/github/kaiserkonok/deep_neural_net_from_scratch/blob/master/Neural_Nets_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [4]:
W1 = tf.Variable(tf.random.normal([784, 256]))
b1 = tf.Variable(tf.zeros([256]))
W2 = tf.Variable(tf.random.normal([256, 128]))
b2 = tf.Variable(tf.zeros([128]))
W3 = tf.Variable(tf.random.normal([128, 10]))
b3 = tf.Variable(tf.zeros([10]))

In [5]:
x_train.shape

(60000, 28, 28)

In [6]:
x_test.shape

(10000, 28, 28)

In [7]:
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

In [8]:
x_train.shape

(60000, 784)

In [9]:
x_test.shape

(10000, 784)

In [10]:
def forward(x):
  Z1 = tf.matmul(x, W1) + b1
  A1 = tf.nn.sigmoid(Z1)
  Z2 = tf.matmul(A1, W2) + b2
  A2 = tf.nn.sigmoid(Z2)
  Z3 = tf.matmul(A2, W3) + b3
  output = tf.nn.softmax(Z3)

  return output

In [11]:
forward(x_train)

<tf.Tensor: shape=(60000, 10), dtype=float32, numpy=
array([[1.7164561e-01, 4.4948339e-01, 1.0457551e-03, ..., 3.5112008e-01,
        2.5770089e-05, 2.6560992e-02],
       [2.2762241e-03, 1.3402306e-02, 1.7749768e-03, ..., 2.9034346e-01,
        1.0448974e-04, 6.9206852e-01],
       [5.0363196e-03, 3.9597534e-02, 2.8712670e-03, ..., 7.9839617e-01,
        3.4222790e-05, 1.5405853e-01],
       ...,
       [1.5831707e-06, 7.7868273e-05, 9.7998727e-06, ..., 9.2274678e-01,
        1.7926710e-06, 7.7159166e-02],
       [1.2402186e-03, 1.5468192e-01, 3.1013584e-03, ..., 8.3831775e-01,
        2.8875380e-05, 2.6282109e-03],
       [8.1761088e-03, 5.8183455e-01, 1.8597567e-01, ..., 1.3921287e-02,
        3.8138609e-03, 2.0447516e-01]], dtype=float32)>

In [12]:
def compute_loss(output, labels):
  loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, output))
  return loss

In [13]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [18]:
def train(x, y):
  with tf.GradientTape() as tape:
    output = forward(x)
    loss = compute_loss(output, y)

  gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
  optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2, W3, b3]))

In [19]:
list(zip([1, 2, 3], [4, 5, 6]))

[(1, 4), (2, 5), (3, 6)]

In [20]:
# prompt: implement the training with epochs part using the train function

epochs = 10
batch_size = 64

for epoch in range(epochs):
  for batch in range(x_train.shape[0] // batch_size):
    start = batch * batch_size
    end = (batch + 1) * batch_size
    train(x_train[start:end], y_train[start:end])

  output = forward(x_test)
  predictions = tf.argmax(output, axis=1)
  accuracy = accuracy_score(y_test, predictions)
  print("Epoch:", epoch, "Accuracy:", accuracy)

Epoch: 0 Accuracy: 0.8655
Epoch: 1 Accuracy: 0.8981
Epoch: 2 Accuracy: 0.9151
Epoch: 3 Accuracy: 0.9242
Epoch: 4 Accuracy: 0.9311
Epoch: 5 Accuracy: 0.9365
Epoch: 6 Accuracy: 0.94
Epoch: 7 Accuracy: 0.941
Epoch: 8 Accuracy: 0.9432
Epoch: 9 Accuracy: 0.9467
