In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, losses

In [2]:
# Define the AlexNet model
class AlexNet(tf.keras.Model):
    def __init__(self):
        super(AlexNet, self).__init__()

        # Feature extraction layers
        self.features = models.Sequential([
            layers.Conv2D(64, kernel_size=5, strides=1, padding="same", input_shape=(28, 28, 1)),  # Input: 1 channel (grayscale)
            layers.ReLU(),
            layers.MaxPooling2D(pool_size=3, strides=2),
            layers.Conv2D(192, kernel_size=5, padding="same"),
            layers.ReLU(),
            layers.MaxPooling2D(pool_size=3, strides=2),
            layers.Conv2D(384, kernel_size=3, padding="same"),
            layers.ReLU(),
            layers.Conv2D(256, kernel_size=3, padding="same"),
            layers.ReLU(),
            layers.Conv2D(256, kernel_size=3, padding="same"),
            layers.ReLU(),
            layers.MaxPooling2D(pool_size=3, strides=2),
            layers.Dropout(0.5)
        ])

        # Classification layers
        self.classifier = models.Sequential([
            layers.Flatten(),
            layers.Dense(4096),
            layers.ReLU(),
            layers.Dropout(0.5),
            layers.Dense(4096),
            layers.ReLU(),
            layers.Dense(10)  # 10 output classes for MNIST
        ])

    def call(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Instantiate the model
model = AlexNet()

# Build the model by passing a dummy input tensor (for input shape specification)
model.build((None, 28, 28, 1))

# Show the model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
# Set device
device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"
print(f"Using device: {device}")

# Example of using the device
with tf.device(device):
    model = AlexNet()  # Instantiate your model within the device context
    # Any TensorFlow operations under this context will run on the specified device

Using device: /GPU:0


In [4]:
# Load and preprocess the MNIST dataset
(train_images, train_labels), (val_images, val_labels) = datasets.mnist.load_data()

# Preprocessing steps
# Resize to (32, 32), normalize, and add a channel dimension for grayscale images
train_images = tf.image.resize(train_images[..., tf.newaxis], [32, 32]) / 255.0
val_images = tf.image.resize(val_images[..., tf.newaxis], [32, 32]) / 255.0

# Normalize images to have values between -1 and 1
train_images = (train_images - 0.5) / 0.5
val_images = (val_images - 0.5) / 0.5

# Create data generators for batching and shuffling
batch_size = 2048

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_dataset = val_dataset.batch(batch_size)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [5]:
# Define loss function
criterion = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [7]:
# Set training parameters
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    # Training phase
    total_loss = 0
    num_batches = 0
    for idx, (images, labels) in enumerate(train_dataset):  # Train
        with tf.GradientTape() as tape:
            outputs = model(images, training=True)  # Forward pass with training=True
            loss = criterion(labels, outputs)

        # Backward pass and optimization
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        total_loss += loss.numpy()
        num_batches += 1

        # Print batch loss every 5 batches
        if idx % 5 == 0:
            print(f"Batch: {idx}, Batch loss: {loss.numpy():.4f}")

    # Validation phase
    correct = 0
    total = 0
    for images, labels in val_dataset:
        outputs = model(images, training=False)
        predictions = tf.argmax(outputs, axis=1, output_type=tf.int32)

        # Cast labels to int32 to match predictions type
        labels = tf.cast(labels, tf.int32)

        correct += tf.reduce_sum(tf.cast(predictions == labels, tf.float32)).numpy()
        total += labels.shape[0]

    # Calculate accuracy and average loss
    avg_loss = total_loss / num_batches
    accuracy = correct / total * 100
    print(f"Epoch [{epoch + 1}/{num_epochs}], Total loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")


print("Training complete!")


Batch: 0, Batch loss: 0.4930
Batch: 5, Batch loss: 0.4388
Batch: 10, Batch loss: 0.2606
Batch: 15, Batch loss: 0.2232
Batch: 20, Batch loss: 0.1796
Batch: 25, Batch loss: 0.1663
Epoch [1/5], Total loss: 0.2703, Accuracy: 96.54%
Batch: 0, Batch loss: 0.1175
Batch: 5, Batch loss: 0.1020
Batch: 10, Batch loss: 0.0810
Batch: 15, Batch loss: 0.0771
Batch: 20, Batch loss: 0.0935
Batch: 25, Batch loss: 0.0963
Epoch [2/5], Total loss: 0.0905, Accuracy: 98.05%
Batch: 0, Batch loss: 0.0760
Batch: 5, Batch loss: 0.0808
Batch: 10, Batch loss: 0.0431
Batch: 15, Batch loss: 0.0668
Batch: 20, Batch loss: 0.0779
Batch: 25, Batch loss: 0.0580
Epoch [3/5], Total loss: 0.0592, Accuracy: 98.74%
Batch: 0, Batch loss: 0.0524
Batch: 5, Batch loss: 0.0398
Batch: 10, Batch loss: 0.0278
Batch: 15, Batch loss: 0.0427
Batch: 20, Batch loss: 0.0354
Batch: 25, Batch loss: 0.0476
Epoch [4/5], Total loss: 0.0422, Accuracy: 98.91%
Batch: 0, Batch loss: 0.0399
Batch: 5, Batch loss: 0.0416
Batch: 10, Batch loss: 0.0332
