### Logistic Regression algorithm from scratch in Tensorflow 2.0

In [1]:
import tensorflow as tf
import numpy as np

#### We will be using the MNIST handwritten digits dataset that comes with tensorflow.

In [2]:
#Import the dataset
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
print(x_train.dtype)
print(x_test.dtype)

uint8
uint8


In [4]:
#Convert the float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

In [5]:
print(x_train.dtype)
print(x_test.dtype)

float32
float32


In [6]:
print(x_train.shape)
print(x_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [7]:
#Flatten the images to 1-D vecotr of 28*28 i.e. 784 features.
num_features = 28*28
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])

In [8]:
print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [9]:
#Normalize the values from 0-255 to 0-1.
x_train, x_test = x_train/255., x_test/255.

In [10]:
#0-9 digits
num_classes = 10

#Parameters
learning_rate = 0.01

training_steps = 1000

batch_size = 256

display_step = 50

In [11]:
#Shuffle and batch the data

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [12]:
#Initialize weights and biases.

#We have 784 features and 10 classes. So the Weight vector will be of dimension (784,10).
W = tf.Variable(tf.ones([num_features, num_classes]), name = "weight")

#Bias will have a shape of 10, the total number of classes.
b = tf.Variable(tf.zeros([num_classes]), name = 'bias')

#### Define the Logistic Regression and Cost Function
[Link to understand Cross-Entropy](https://machinelearningmastery.com/cross-entropy-for-machine-learning/)

In [13]:
#Logistic Regression (Wx + b)
#Since this is a multiclass classification we are using softmax instead of sigmoid.
#And if you are wondering what is softmax doing and where are those fancy exponents then, have a read on Softmax Regression.

def logistic_regression(x):
  return tf.nn.softmax(tf.matmul(x, W) + b)

#Cross-Entropy Loss function.
def cross_entropy(y_pred, y_true):
  #One-hot encoding
  y_true = tf.one_hot(y_true, depth = num_classes)

  #Clip prediction values to avoid log(0) error.
  y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)

  return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))

#### Define Optimizers and Accuracy Metrics

In [17]:
def accuracy(y_pred, y_true):
  #Predicted class is the index of highest score in prediction vector i.e. argmax.
  correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))

  return tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Optimizer
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)

In [15]:
#Calculate gradients and update weights and biases.

def optimize(x, y):

  with tf.GradientTape() as tape:
    #Prediction
    y_pred = logistic_regression(x)
    #Calculate loss
    loss = cross_entropy(y_pred, y)
  
  #Compute Gradients
  gradients = tape.gradient(loss, [W, b])

  #Update W and b.
  optimizer.apply_gradients(zip(gradients, [W, b]))

In [18]:
#The training loop

for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):

  optimize(batch_x, batch_y)

  if step % display_step == 0:
    pred = logistic_regression(batch_x)
    
    loss = cross_entropy(pred, batch_y)

    acc = accuracy(pred, batch_y)

    print("Step: {}, loss: {}, accuracy: {}".format(step, loss, acc))

Step: 50, loss: 93.64014434814453, accuracy: 0.88671875
Step: 100, loss: 66.70635986328125, accuracy: 0.95703125
Step: 150, loss: 92.08629608154297, accuracy: 0.90234375
Step: 200, loss: 52.10447692871094, accuracy: 0.93359375
Step: 250, loss: 86.71768188476562, accuracy: 0.89453125
Step: 300, loss: 70.26834106445312, accuracy: 0.9296875
Step: 350, loss: 79.16162872314453, accuracy: 0.91796875
Step: 400, loss: 76.02119445800781, accuracy: 0.91796875
Step: 450, loss: 57.410770416259766, accuracy: 0.94921875
Step: 500, loss: 95.61672973632812, accuracy: 0.9296875
Step: 550, loss: 59.11628723144531, accuracy: 0.92578125
Step: 600, loss: 80.53221130371094, accuracy: 0.91015625
Step: 650, loss: 81.55431365966797, accuracy: 0.91796875
Step: 700, loss: 58.65564727783203, accuracy: 0.9453125
Step: 750, loss: 102.43087768554688, accuracy: 0.9140625
Step: 800, loss: 67.85317993164062, accuracy: 0.91015625
Step: 850, loss: 71.95440673828125, accuracy: 0.91015625
Step: 900, loss: 62.60881042480469