In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images to be values between 0 and 1
train_images = train_images / 255.0
test_images = test_images / 255.0

# Convert labels to one-hot encoded format
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Build the model
model = Sequential([
    Flatten(input_shape=(28, 28)),  # Flatten the 28x28 images
    Dense(128, activation='relu'),  # First dense layer with 128 nodes
    Dense(10, activation='softmax') # Output layer with 10 nodes (for 10 classes) and softmax activation
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test accuracy:", test_acc)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.977400004863739


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Load data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0

# No need to one-hot encode when using SparseCategoricalCrossentropy

# Build model without softmax in the last layer
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10)  # No softmax activation here
])

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5, batch_size=32)

# Take one image from the test set to inspect
image = test_images[0:1]  # Take the first image from test set
true_label = test_labels[0]  # True label for the first test image

# Get raw logits
logits = model.predict(image)

print("Raw output (logits) from the model:\n", logits)

# Manual softmax calculation
def softmax(x):
    e_x = np.exp(x - np.max(x))  # Subtracting max for numerical stability (Overflow & Loss of precision)
    return e_x / e_x.sum(axis=1, keepdims=True)

probs = softmax(logits)
print("\nProbabilities computed manually using softmax:\n", probs)

# The class with the highest probability is the predicted class
predicted_label = np.argmax(probs)

print(f"\nTrue Label: {true_label}, Predicted Label: {predicted_label}")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Raw output (logits) from the model:
 [[ -3.5402524   -7.05632     -0.92576164   2.434422   -11.252341
   -4.0176864  -18.556313    12.153505    -0.869607    -1.1841999 ]]

Probabilities computed manually using softmax:
 [[1.5284760e-07 4.5420294e-09 2.0879393e-06 6.0121070e-05 6.8381960e-11
  9.4822553e-08 4.6011534e-14 9.9993360e-01 2.2085403e-06 1.6124238e-06]]

True Label: 7, Predicted Label: 7
