# ML Toolbox 5 - Logistic Regression with Keras

In this notebook, we will use Keras for both binary and multiclass classification on the MNIST dataset.

In [13]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from sklearn.datasets import fetch_openml

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

import tensorflow as tf

np.random.seed(40)

In [7]:
# Load the MNIST dataset
tf.keras.datasets.mnist.load_data(path="mnist.npz")

# Load the training/testing data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [15]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', cache=False)

In [31]:
# Assign the data and target matrices
X = mnist["data"].astype('float64')
y = (mnist["target"] == '9').astype(np.int) # 1 if the handwritten number is 9, else 0

print("\nNo. of Samples: ", X.shape)
print("No. of Labels: ", y.shape)


No. of Samples:  (70000, 784)
No. of Labels:  (70000,)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = (mnist["target"] == '9').astype(np.int) # 1 if the handwritten number is 9, else 0


# Binary Classification

In [28]:
%%time

# Create the LNN

# Clear any Tensorflow sessions
tf.keras.backend.clear_session()


# To reproduce the same result by the model in each iteration, we use fixed seeds for random number generation. 
np.random.seed(40)
tf.random.set_seed(40)


# Create a Sequential model composed of a single stack of layers connected sequentially. 
model = tf.keras.models.Sequential(name="LNN_Binary_Classifier")
model.add(tf.keras.layers.InputLayer(input_shape=[784, ]))
model.add(tf.keras.layers.Dense(units=1, kernel_initializer="zeros", activation="sigmoid", use_bias=True))


# Display a summary of the model layers and its parameters
model.summary()

Model: "LNN_Binary_Classifier"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 785       
Total params: 785
Trainable params: 785
Non-trainable params: 0
_________________________________________________________________
Wall time: 113 ms


In [29]:
# Compile and train the LNN model

# Define the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.9)


# Compile the model
model.compile(loss="binary_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])


# Set the epochs and batch size
no_of_epochs = 50
size_of_mini_batch = 200


#Train the model
history = model.fit(X, y, 
                    batch_size=size_of_mini_batch, 
                    epochs=no_of_epochs,
                    validation_split=0.1, # Fraction of the training data to be used as validation data. 
                    verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [30]:
# Evaluate the LNN model
numOfEpochs = len(history.history['loss'])
print("Epochs: ", numOfEpochs)

print("\nTest Data: Loss & Accuracy: ", model.evaluate(X, y, verbose=0))


y_predicted_proba = model.predict(X)

print(len(y_predicted_proba))

y_predicted = y_predicted_proba

for i in range(len(y_predicted)):
    if(y_predicted_proba[i] >= 0.5):
        y_predicted[i] = 1
    else:
        y_predicted[i] = 0


y_predicted = y_predicted.ravel()
accuracy = np.mean(y_predicted == y)
print("\nAccuracy: ", accuracy)



print("\nConfusion Matrix:")
print(confusion_matrix(y, y_predicted))

print("\nClassification Report:")
print(classification_report(y, y_predicted))

Epochs:  50

Test Data: Loss & Accuracy:  [9310.49609375, 0.9104571342468262]
70000

Accuracy:  0.9104571428571429

Confusion Matrix:
[[62979    63]
 [ 6205   753]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     63042
           1       0.92      0.11      0.19      6958

    accuracy                           0.91     70000
   macro avg       0.92      0.55      0.57     70000
weighted avg       0.91      0.91      0.88     70000



# Multiclass Classification with Softmax Regression

In [34]:
# Assign the data and target matrices
X1 = mnist["data"].astype('float64')
y1 = mnist["target"].astype('int64')

print("\nNo. of Samples: ", X1.shape)
print("No. of Labels: ", y1.shape)
print("Number of classes: ", len(np.unique(y1)))
print("Class labels: ", np.unique(y1))


No. of Samples:  (70000, 784)
No. of Labels:  (70000,)
Number of classes:  10
Class labels:  [0 1 2 3 4 5 6 7 8 9]


In [35]:
%%time


# Delete the TensorFlow info before creating a new model, otherwise memory overflow will occur.
tf.keras.backend.clear_session()


# We use the same random seeds as before 


# Create a Sequential model composed of a single stack of layers connected sequentially. 
model = tf.keras.models.Sequential(name="LNN_Multiclass_Classifier")
model.add(tf.keras.layers.InputLayer(input_shape=[784,]))
model.add(tf.keras.layers.Dense(units=10, kernel_initializer="zeros", activation="softmax", use_bias=True))


# Display a summary of the model layers and its parameters
model.summary()

Model: "LNN_Multiclass_Classifier"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________
Wall time: 39 ms


In [36]:
weights, biases = model.layers[0].get_weights()

print("Hidden Layer Matrix:\n", weights)

print("\nHidden Layer Matrix Dimension:\n", weights.shape)

print("\nHidden Layer Bias:\n", biases)

print("\nHidden Layer Bias Dimension:\n", biases.shape)

Hidden Layer Matrix:
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Hidden Layer Matrix Dimension:
 (784, 10)

Hidden Layer Bias:
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Hidden Layer Bias Dimension:
 (10,)


In [37]:
%%time
# Define the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.3)

# Compile the model
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

# Set the epochs and batch size
no_of_epochs1 = 300
size_of_mini_batch = 200

# Train the model
history = model.fit(X1, y1, 
                    batch_size=size_of_mini_batch, 
                    epochs=no_of_epochs1,
                    validation_split=0.1, # Fraction of the training data to be used as validation data. 
                    verbose=1)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [39]:
numOfEpochs = len(history.history['loss'])
print("Epochs: ", numOfEpochs)

print("\nTest Data: Loss & Accuracy: ", model.evaluate(X1, y1, verbose=0))

y_predicted1 = model.predict(X1)
y_predicted1 = np.argmax(y_predicted1, axis=-1) # get the label/index of the highest probability class

accuracy = np.mean(y_predicted == y1)
print("\nAccuracy: ", accuracy)

print("\nConfusion Matrix:")
print(confusion_matrix(y1, y_predicted))

print("\nClassification Report:")
print(classification_report(y1, y_predicted))

Epochs:  300

Test Data: Loss & Accuracy:  [1635.85546875, 0.9219285845756531]

Accuracy:  0.9219285714285714

Confusion Matrix:
[[6529    1  100   18   24   56   75   44   51    5]
 [   1 7551  140   15    8   12    2   38   95   15]
 [   7   41 6574   53   55   16   42   75  120    7]
 [   8   19  319 6342    6  139   14   82  165   47]
 [   3   23   71   12 6366   12   45   64   65  163]
 [  27   15   81  235   60 5537   75   34  217   32]
 [  14    8  104   11   50  126 6524    7   30    2]
 [   2   15   93   12   38    7    2 7002   17  105]
 [   4   78  177  166   20  168   34   54 6090   34]
 [  13   17   44   75  176   58    4  458   93 6020]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.95      0.97      6903
           1       0.97      0.96      0.97      7877
           2       0.85      0.94      0.89      6990
           3       0.91      0.89      0.90      7141
           4       0.94      0.93      0.93  