Tenserflow and Keras

In [1]:

import tensorflow as tf

from tensorflow.keras import layers, models, datasets



# Step 1: Load and Preprocess Data

(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

train_images, test_images = train_images / 255.0, test_images / 255.0



# Step 2: Define the Neural Network Architecture

model = models.Sequential([

    layers.Flatten(input_shape=(28, 28)),

    layers.Dense(128, activation='relu'),

    layers.Dropout(0.2),

    layers.Dense(10)

])



# Step 3: Compile the Model

model.compile(optimizer='adam',

              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),

              metrics=['accuracy'])



# Step 4: Train the Model

model.fit(train_images, train_labels, epochs=5)



# Step 5: Evaluate the Model

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

print(f'Test accuracy: {test_acc}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6us/step


  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8577 - loss: 0.4912
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9538 - loss: 0.1555
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9675 - loss: 0.1111
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9728 - loss: 0.0887
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9771 - loss: 0.0750
313/313 - 1s - 2ms/step - accuracy: 0.9772 - loss: 0.0780
Test accuracy: 0.9771999716758728


Pytorch

In [2]:

import torch

import torchvision

import torch.nn as nn

import torch.optim as optim

import torchvision.transforms as transforms



# Step 1: Load and Preprocess Data

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)



# Step 2: Define the Neural Network Architecture

class Net(nn.Module):

    def __init__(self):

        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)

        self.fc1 = nn.Linear(1600, 128)

        self.fc2 = nn.Linear(128, 10)



    def forward(self, x):

        x = torch.relu(self.conv1(x))

        x = torch.max_pool2d(x, kernel_size=2, stride=2)

        x = torch.relu(self.conv2(x))

        x = torch.max_pool2d(x, kernel_size=2, stride=2)

        x = x.view(-1, 1600)

        x = torch.relu(self.fc1(x))

        x = self.fc2(x)

        return x



model = Net()



# Step 3: Define Loss Function and Optimizer

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.01)



# Step 4: Train the Model

for epoch in range(5):

    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):

        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        if i % 100 == 99:

            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100}')

            running_loss = 0.0



print('Finished Training')
     

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|████████████████████████████████████████████████████████████████████| 9912422/9912422 [00:18<00:00, 544907.95it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 117390.70it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|████████████████████████████████████████████████████████████████████| 1648877/1648877 [00:07<00:00, 211343.35it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 4591595.27it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

Epoch: 1, Batch: 100, Loss: 2.2529763865470884
Epoch: 1, Batch: 200, Loss: 2.052003548145294
Epoch: 1, Batch: 300, Loss: 1.4005225563049317
Epoch: 1, Batch: 400, Loss: 0.7444566258788109
Epoch: 1, Batch: 500, Loss: 0.49590682715177536
Epoch: 1, Batch: 600, Loss: 0.402982544451952
Epoch: 1, Batch: 700, Loss: 0.3645714870095253
Epoch: 1, Batch: 800, Loss: 0.31477760449051856
Epoch: 1, Batch: 900, Loss: 0.28590532541275027
Epoch: 2, Batch: 100, Loss: 0.2620733288675547
Epoch: 2, Batch: 200, Loss: 0.2306802911311388
Epoch: 2, Batch: 300, Loss: 0.23794006183743477
Epoch: 2, Batch: 400, Loss: 0.2216922351717949
Epoch: 2, Batch: 500, Loss: 0.19419811762869357
Epoch: 2, Batch: 600, Loss: 0.17479369558393956
Epoch: 2, Batch: 700, Loss: 0.1830508702993393
Epoch: 2, Batch: 800, Loss: 0.1671824785321951
Epoch: 2, Batch: 900, Loss: 0.1569905024021864
Epoch: 3, Batch: 100, Loss: 0.14940424928441642
Epoch: 3, Batch: 200, Loss:

This code snippet demonstrates how to create and train neural networks to classify digits from the MNIST dataset, using both TensorFlow/Keras and PyTorch frameworks. Here's an explanation of each step for both implementations.

### TensorFlow/Keras Implementation
#### Step 1: Load and Preprocess Data
- `import tensorflow as tf`: Imports TensorFlow for building and training neural networks.
- `from tensorflow.keras import layers, models, datasets`: Imports specific modules for defining layers, models, and datasets.
- `datasets.mnist.load_data()`: Loads the MNIST dataset, containing 60,000 training images and 10,000 test images, with 28x28 grayscale images of handwritten digits.
- `train_images, train_labels, test_images, test_labels`: Assigns the loaded training and testing images and labels.
- `train_images, test_images = train_images / 255.0, test_images / 255.0`: Normalizes pixel values to be in the range [0, 1] by dividing by 255.0, making training more stable.

#### Step 2: Define the Neural Network Architecture
- `model = models.Sequential([ ... ])`: Creates a sequential model, where layers are added in sequence.
  - `layers.Flatten(input_shape=(28, 28))`: Flattens the 28x28 image into a 1D array of 784 elements.
  - `layers.Dense(128, activation='relu')`: Adds a dense (fully connected) layer with 128 units, using the ReLU activation function.
  - `layers.Dropout(0.2)`: Adds a dropout layer that randomly deactivates 20% of the neurons during training, reducing overfitting.
  - `layers.Dense(10)`: Adds an output dense layer with 10 units (one for each digit 0-9), representing the final output layer.

#### Step 3: Compile the Model
- `model.compile(...)`: Compiles the model with specified configurations:
  - `optimizer='adam'`: Uses the Adam optimizer for training, which is robust and adaptive.
  - `loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)`: Uses sparse categorical cross-entropy as the loss function because the output layer returns raw logits (not probabilities).
  - `metrics=['accuracy']`: Tracks accuracy during training.

#### Step 4: Train the Model
- `model.fit(train_images, train_labels, epochs=5)`: Trains the model on the training data for 5 epochs.

#### Step 5: Evaluate the Model
- `model.evaluate(test_images, test_labels, verbose=2)`: Evaluates the model on the test data to get the loss and accuracy. `verbose=2` indicates a moderate level of output detail.
- `print(f'Test accuracy: {test_acc}')`: Prints the test accuracy, indicating the model's performance on the test data.

### PyTorch Implementation
#### Step 1: Load and Preprocess Data
- `import torch, torchvision, torch.nn as nn, torch.optim as optim, torchvision.transforms as transforms`: Imports PyTorch libraries and utilities for neural networks, optimizers, and data processing.
- `transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])`: Defines a transformation to convert images to tensors and normalize them to mean 0.5 and standard deviation 0.5.
- `train_set = torchvision.datasets.MNIST(...)`: Loads the MNIST dataset for training, applying the specified `transform`.
- `train_loader = torch.utils.data.DataLoader(...)`: Creates a data loader to batch and shuffle the training data. This enables efficient data loading and batching.

#### Step 2: Define the Neural Network Architecture
- `class Net(nn.Module):`: Defines a neural network class inheriting from `nn.Module`.
  - `__init__()`: Constructor to initialize the network architecture.
    - `self.conv1 = nn.Conv2d(1, 32, kernel_size=3)`: Adds a 2D convolution layer with 32 filters and a 3x3 kernel size, with 1 input channel (grayscale).
    - `self.conv2 = nn.Conv2d(32, 64, kernel_size=3)`: Adds a second 2D convolution layer with 64 filters.
    - `self.fc1 = nn.Linear(1600, 128)`: Adds a fully connected (dense) layer with 128 units, receiving a flattened input of 1600 elements.
    - `self.fc2 = nn.Linear(128, 10)`: Adds a final fully connected layer with 10 output units, representing the 10 digits.
  - `forward(self, x)`: Defines the forward propagation through the network.
    - `x = torch.relu(self.conv1(x))`: Applies ReLU activation after the first convolution.
    - `x = torch.max_pool2d(x, kernel_size=2, stride=2)`: Applies a 2x2 max-pooling operation to reduce the spatial dimensions.
    - `x = torch.relu(self.conv2(x))`: Applies ReLU activation after the second convolution.
    - `x = torch.max_pool2d(x, kernel_size=2, stride=2)`: Applies max-pooling again.
    - `x = x.view(-1, 1600)`: Flattens the tensor to prepare for fully connected layers.
    - `x = torch.relu(self.fc1(x))`: Applies ReLU activation in the first fully connected layer.
    - `x = self.fc2(x)`: The final output layer, returning the raw logits.
- `model = Net()`: Creates an instance of the defined neural network.

#### Step 3: Define Loss Function and Optimizer
- `criterion = nn.CrossEntropyLoss()`: Defines the cross-entropy loss function for multi-class classification.
- `optimizer = optim.SGD(model.parameters(), lr=0.01)`: Sets up the Stochastic Gradient Descent (SGD) optimizer with a learning rate of 0.01 to update the network's parameters during training.

#### Step 4: Train the Model
- `for epoch in range(5):`: Runs the training loop for 5 epochs.
  - `running_loss = 0.0`: Initializes a variable to accumulate loss for reporting.
  - `for i, data in enumerate(train_loader, 0):`: Loops through the training data in batches.
    - `inputs, labels = data`: Unpacks the batch into `inputs` (images) and `labels` (corresponding targets).
    - `optimizer.zero_grad()`: Resets the gradients to zero.
    - `outputs = model(inputs)`: Forwards the batch through the network to get predictions.
    - `loss = criterion(outputs, labels)`: Calculates the loss based on the predictions and the labels.
    - `loss.backward()`: Performs backpropagation to compute gradients.
    - `optimizer.step()`: Updates the model's parameters based on the computed gradients.
    - `running_loss += loss.item()`: Adds the loss for this batch to `running_loss`.
    - `if i % 100 == 99:`: Outputs progress every 100 batches.
      - `print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100}')`: Prints the epoch, batch number, and average loss for the last 100 batches.
      - `running_loss = 0.0`: Resets `running_loss` for the next set of batches.

#### Step 5: Print Training Completion
- `print('Finished Training')`: Prints a message indicating the training is complete.

### Summary
This code snippet demonstrates the implementation and training of simple neural networks on the MNIST dataset using both TensorFlow/Keras and PyTorch. The code includes data loading and preprocessing, defining neural network architectures, compiling/training, and evaluating the model. It shows how to work with two popular deep learning frameworks to achieve similar outcomes.