# Lab 03: TensorFlow vs. PyTorch
- Train a model on MNIST in both TensorFlow and PyTorch, convert to TFLite and ONNX.  
- Use tf.GradientTape for Tensorflow custom training loop.



## TensorFlow Implementation

In [9]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255   # Fill in normalization factor
x_test = x_test / 255     # Fill in normalization factor
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),        # Fill input shape
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),  # Fill number of hidden neurons
    tf.keras.layers.Dense(10, activation='softmax')  # Fill number of output neurons
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',       # Fill name of loss function
              metrics=['accuracy'])

start = time.time()
model.fit(x_train, y_train, epochs=5)
end = time.time()
print(f"TF Training time: {end-start:.2f} seconds")       # Output training time
model.evaluate(x_test, y_test)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8634 - loss: 0.4835
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9554 - loss: 0.1509
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9685 - loss: 0.1069
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9760 - loss: 0.0808
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9794 - loss: 0.0679
TF Training time: 32.64 seconds
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9681 - loss: 0.1041


[0.08877000212669373, 0.9721999764442444]

## Convert TensorFlow model to TFLite

In [10]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmptutc89re'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name='keras_tensor_8')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  132513763736464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132513763734928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132513763734736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132520146821648: TensorSpec(shape=(), dtype=tf.resource, name=None)


## PyTorch Implementation

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time  # Added missing import

transform = transforms.Compose([transforms.ToTensor(), transforms.Lambda(lambda x: x.view(-1))])
train_loader = DataLoader(datasets.MNIST(root='./data', train=True, transform=transform, download=True), batch_size=32)
test_loader = DataLoader(datasets.MNIST(root='./data', train=False, transform=transform, download=True), batch_size=1000)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 128)    # Input size 784 (28x28), output size 128
        self.fc2 = nn.Linear(128, 10)     # Input size 128, output size 10 (digits 0-9)
    def forward(self, x):
        x = F.relu(self.fc1(x))    # Apply ReLU to first layer
        return self.fc2(x)         # Output from second layer (no activation for CrossEntropyLoss)

model = Net()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

start = time.time()
for epoch in range(5):
    for x, y in train_loader:
        optimizer.zero_grad()
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
end = time.time()
print(f"PyTorch Training time: {end - start:.2f} seconds")

model.eval()
correct = 0
with torch.no_grad():
    for x, y in test_loader:
        output = model(x)
        pred = output.argmax(1)
        correct += (pred == y).sum().item()
print(f"Test accuracy: {correct / len(test_loader.dataset):.4f}")

PyTorch Training time: 67.24 seconds
Test accuracy: 0.9722


## Convert PyTorch model to ONNX

In [12]:
# Install ONNX
!pip install onnx



In [13]:
dummy_input = torch.randn(1, 784)
torch.onnx.export(model, dummy_input, "model.onnx",
                  input_names=["input"], output_names=["output"])

## TensorFlow custom training loop using tf.GradientTape

In [14]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0   # Normalize pixel values to [0, 1]
x_test = x_test / 255.0     # Normalize pixel values to [0, 1]
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Prepare datasets
batch_size = 32         # Same batch size as PyTorch example
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),    # MNIST image size
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),    # 128 neurons with ReLU activation
    tf.keras.layers.Dense(10, activation='softmax')   # 10 output classes with softmax
])

# Define loss, optimizer, and metrics
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

# Training loop
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_acc_metric.update_state(y_batch, logits)

        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    print(f"Training Accuracy for epoch {epoch+1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()
end = time.time()
print(f"\nTF Training time: {end - start:.2f} seconds")

# Evaluation loop
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)
    test_acc_metric.update_state(y_batch, test_logits)

print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")


Epoch 1/5
Step 0, Loss: 2.4068, Accuracy: 0.0625
Step 100, Loss: 0.5937, Accuracy: 0.7720
Step 200, Loss: 0.2936, Accuracy: 0.8310
Step 300, Loss: 0.2247, Accuracy: 0.8556
Step 400, Loss: 0.2057, Accuracy: 0.8723
Step 500, Loss: 0.3599, Accuracy: 0.8812
Step 600, Loss: 0.3891, Accuracy: 0.8894
Step 700, Loss: 0.1735, Accuracy: 0.8963
Step 800, Loss: 0.0879, Accuracy: 0.9004
Step 900, Loss: 0.0517, Accuracy: 0.9046
Step 1000, Loss: 0.1653, Accuracy: 0.9079
Step 1100, Loss: 0.2062, Accuracy: 0.9116
Step 1200, Loss: 0.2912, Accuracy: 0.9143
Step 1300, Loss: 0.1972, Accuracy: 0.9167
Step 1400, Loss: 0.2843, Accuracy: 0.9189
Step 1500, Loss: 0.0444, Accuracy: 0.9209
Step 1600, Loss: 0.0476, Accuracy: 0.9233
Step 1700, Loss: 0.1609, Accuracy: 0.9254
Step 1800, Loss: 0.0397, Accuracy: 0.9270
Training Accuracy for epoch 1: 0.9283

Epoch 2/5
Step 0, Loss: 0.1016, Accuracy: 0.9375
Step 100, Loss: 0.0381, Accuracy: 0.9629
Step 200, Loss: 0.1785, Accuracy: 0.9624
Step 300, Loss: 0.2835, Accuracy:

## Performance Otimization with Graph Execution using @tf.function

In [15]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0   # Normalize pixel values to [0, 1]
x_test = x_test / 255.0     # Normalize pixel values to [0, 1]
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Prepare datasets
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),    # MNIST image size (28x28)
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),    # 128 neurons with ReLU activation
    tf.keras.layers.Dense(10, activation='softmax')   # 10 output classes with softmax
])

# Define loss, optimizer, and metrics
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

@tf.function  # compile the function into a graph for faster execution
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = model(x_batch, training=True)
        loss = loss_fn(y_batch, logits)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_acc_metric.update_state(y_batch, logits)
    return loss

# Training loop
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        loss = train_step(x_batch, y_batch)

        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    print(f"Training Accuracy for epoch {epoch+1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()
end = time.time()
print(f"\nTF Training time: {end - start:.2f} seconds")

# Evaluation loop
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)
    test_acc_metric.update_state(y_batch, test_logits)

print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")


Epoch 1/5
Step 0, Loss: 2.3434, Accuracy: 0.0938
Step 100, Loss: 0.4305, Accuracy: 0.7788
Step 200, Loss: 0.4286, Accuracy: 0.8296
Step 300, Loss: 0.4435, Accuracy: 0.8544
Step 400, Loss: 0.1264, Accuracy: 0.8702
Step 500, Loss: 0.3251, Accuracy: 0.8790
Step 600, Loss: 0.3530, Accuracy: 0.8869
Step 700, Loss: 0.3370, Accuracy: 0.8935
Step 800, Loss: 0.0713, Accuracy: 0.8987
Step 900, Loss: 0.2815, Accuracy: 0.9020
Step 1000, Loss: 0.1706, Accuracy: 0.9064
Step 1100, Loss: 0.1594, Accuracy: 0.9091
Step 1200, Loss: 0.2079, Accuracy: 0.9118
Step 1300, Loss: 0.0710, Accuracy: 0.9145
Step 1400, Loss: 0.0611, Accuracy: 0.9168
Step 1500, Loss: 0.0600, Accuracy: 0.9189
Step 1600, Loss: 0.2710, Accuracy: 0.9212
Step 1700, Loss: 0.0876, Accuracy: 0.9232
Step 1800, Loss: 0.0764, Accuracy: 0.9250
Training Accuracy for epoch 1: 0.9262

Epoch 2/5
Step 0, Loss: 0.0425, Accuracy: 1.0000
Step 100, Loss: 0.1759, Accuracy: 0.9567
Step 200, Loss: 0.0967, Accuracy: 0.9577
Step 300, Loss: 0.0234, Accuracy: