In [1]:
import torch
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [2]:
train_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:10<00:00, 957193.55it/s] 


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 159633.14it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1511512.33it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4464618.88it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [3]:
from torch.utils.data import DataLoader
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
test_dataloader = DataLoader(dataset=test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

In [4]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [5]:
def train_step(model,
               data_loader,
               loss_fn,
               optimizer,
               accuracy_fn,
               device=device):
    train_loss, train_acc = 0, 0
    model.train()
    for batch, (X, y) in enumerate(data_loader):
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss
        acc = accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        train_acc += acc
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train Loss: {train_loss:.5f} | Train Acc: {train_acc:.2f}%")

In [6]:
def test_step(model,
              data_loader,
              loss_fn,
              optimizer,
              accuracy_fn,
              device=device):
    test_loss, test_acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for batch, (X,y) in enumerate(data_loader):
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            test_loss += loss
            acc = accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
            test_acc += acc
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")

In [7]:
class MNIST_CNN(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,
                      out_features=output_shape)
        )
    def forward(self, x):
        x=self.conv_block_1(x)
        x=self.conv_block_2(x)
        x=self.classifier(x)
        return x

In [12]:
def _approximated_ReLU(x):
    return 0.117071 * x**2 + 0.5 * x + 0.375373

class Square(torch.nn.Module):
    def forward(self, x):
        return x**2

class ApproxReLU(torch.nn.Module):
    def forward(self, x):
        return _approximated_ReLU(x)

class Flatten(torch.nn.Module):
    def forward(self, x):
        return torch.flatten(x, 1)

In [13]:
class approx_tinyVGG(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.conv1_1 = nn.Conv2d(in_channels=input_shape,
                                 out_channels=hidden_units,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.relu1_1 = ApproxReLU()
        self.conv1_2 = nn.Conv2d(in_channels=hidden_units,
                                 out_channels=hidden_units,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.relu1_2 = ApproxReLU()
        self.maxpool1 = nn.AvgPool2d(kernel_size=2) #nn.MaxPool2d(kernel_size=2)

        self.conv2_1 = nn.Conv2d(in_channels=hidden_units,
                                 out_channels=hidden_units,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.relu2_1 = ApproxReLU()
        self.conv2_2 = nn.Conv2d(in_channels=hidden_units,
                                 out_channels=hidden_units,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.relu2_2 = ApproxReLU()
        self.maxpool2 = nn.AvgPool2d(kernel_size=2)

        self.flatten = nn.Flatten()
        self.fc = nn.Linear(in_features=hidden_units*7*7,
                            out_features=output_shape)

    def forward(self, x):
        x = self.conv1_1(x)
        x = self.relu1_1(x)
        x = self.conv1_2(x)
        x = self.relu1_2(x)
        x = self.maxpool1(x)

        x = self.conv2_1(x)
        x = self.relu2_1(x)
        x = self.conv2_2(x)
        x = self.relu2_2(x)
        x = self.maxpool2(x)

        x = self.flatten(x)
        x = self.fc(x)
        return x


In [9]:
class_names = train_data.classes
class_names

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [10]:
model = MNIST_CNN(input_shape=1,
                  hidden_units=10,
                  output_shape=len(class_names)).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(),
                            lr=0.1)

In [11]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch}\n --------------------------")
    train_step(model=model,
            accuracy_fn=accuracy_fn,
            loss_fn=loss_fn,
            data_loader=train_dataloader,
            optimizer=optimizer)
    test_step(model=model,
              accuracy_fn=accuracy_fn,
              loss_fn=loss_fn,
              data_loader=test_dataloader,
              optimizer=optimizer)

Epoch 0
 --------------------------
Train Loss: 0.29150 | Train Acc: 90.17%
Test Loss: 0.06661 | Test Acc: 97.96%
Epoch 1
 --------------------------
Train Loss: 0.06825 | Train Acc: 97.87%
Test Loss: 0.05069 | Test Acc: 98.34%
Epoch 2
 --------------------------
Train Loss: 0.05269 | Train Acc: 98.39%
Test Loss: 0.04888 | Test Acc: 98.51%
Epoch 3
 --------------------------
Train Loss: 0.04474 | Train Acc: 98.60%
Test Loss: 0.04513 | Test Acc: 98.57%
Epoch 4
 --------------------------
Train Loss: 0.03900 | Train Acc: 98.80%
Test Loss: 0.03755 | Test Acc: 98.68%
Epoch 5
 --------------------------
Train Loss: 0.03589 | Train Acc: 98.87%
Test Loss: 0.03873 | Test Acc: 98.71%
Epoch 6
 --------------------------
Train Loss: 0.03311 | Train Acc: 98.95%
Test Loss: 0.04767 | Test Acc: 98.43%
Epoch 7
 --------------------------
Train Loss: 0.02999 | Train Acc: 99.02%
Test Loss: 0.03848 | Test Acc: 98.71%
Epoch 8
 --------------------------
Train Loss: 0.02814 | Train Acc: 99.12%
Test Loss: 0

In [14]:
model = approx_tinyVGG(input_shape=1,
                  hidden_units=10,
                  output_shape=len(class_names)).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(),
                            lr=0.1)

In [15]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch}\n --------------------------")
    train_step(model=model,
            accuracy_fn=accuracy_fn,
            loss_fn=loss_fn,
            data_loader=train_dataloader,
            optimizer=optimizer)
    test_step(model=model,
              accuracy_fn=accuracy_fn,
              loss_fn=loss_fn,
              data_loader=test_dataloader,
              optimizer=optimizer)

Epoch 0
 --------------------------
Train Loss: nan | Train Acc: 11.19%
Test Loss: nan | Test Acc: 9.79%
Epoch 1
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 2
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 3
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 4
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 5
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 6
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 7
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 8
 --------------------------
Train Loss: nan | Train Acc: 9.87%
Test Loss: nan | Test Acc: 9.79%
Epoch 9
 --------------------------
Train Loss: nan | Train Acc