In [3]:
import torch
from torch import nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

In [15]:
print(torch.__version__)
print(torch.cuda.get_device_properties(0))

2.5.1+cu121
_CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15102MB, multi_processor_count=40, uuid=d12f8d69-20e6-8436-f0ca-f9ae2c416fcc, L2_cache_size=4MB)


In [4]:
train_dataset = datasets.FashionMNIST(
    root="data",
    transform=transforms.ToTensor(),
    download=True,
)

test_dataset = datasets.FashionMNIST(
    root="data",
    train=False,
    transform=transforms.ToTensor(),
    download=True
)

len(train_dataset), len(test_dataset)

(60000, 10000)

In [5]:
train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size=32,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=32)

len(train_dataloader), len(test_dataloader)

(1875, 313)

In [6]:
class FashionMNISTModelV3(nn.Module):
    def __init__(self, input_shape: int, output_shape: int, hidden_units: int):
        super().__init__()
        self.conv_block_0 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1,
                      stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1,
                      stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1,
                      stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*49,
                      out_features=output_shape)
        )
    def forward(self, x):
        return self.classifier(self.conv_block_1(self.conv_block_0(x)))

In [7]:
torch.manual_seed(42)
model = FashionMNISTModelV3(input_shape=1, output_shape=10, hidden_units=32)
next(model.parameters()).device

device(type='cpu')

In [8]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc
parameters = model.parameters()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=parameters, lr=0.1)

In [9]:
def test_speed(device, epochs=3, random_seed=42):
  torch.manual_seed(random_seed)
  model.to(device)
  for epoch in range(epochs):
      print("---------------")
      train_loss, train_acc = 0, 0
      model.train()
      for batch, (X, y) in enumerate(train_dataloader):
          X, y = X.to(device), y.to(device)
          y_pred = model(X)
          loss = loss_fn(y_pred, y)
          train_loss += loss
          train_acc += accuracy_fn(y_true=y, y_pred=torch.argmax(y_pred, dim=1))
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
      train_loss /= len(train_dataloader)
      train_acc /= len(train_dataloader)
      print(f"Train Loss: {train_loss:.2f} | Train Acc: {train_acc:.2f}%")

      # Testing

      test_loss, test_acc = 0, 0
      model.eval()
      with torch.inference_mode():
          for X, y in test_dataloader:
              X, y = X.to(device), y.to(device)
              test_pred = model(X)
              test_loss += loss_fn(test_pred, y)
              test_acc += accuracy_fn(y_true=y, y_pred=torch.argmax(test_pred, dim=1))
          test_loss /= len(test_dataloader)
          test_acc /= len(test_dataloader)
      print(f"Test Loss: {test_loss:.2f} | Test Acc: {test_acc:.2f}%")

In [10]:
%%timeit

test_speed(device="cpu", epochs=1)

---------------
Train Loss: 0.51 | Train Acc: 81.45%
Test Loss: 0.34 | Test Acc: 87.80%
---------------
Train Loss: 0.31 | Train Acc: 88.95%
Test Loss: 0.30 | Test Acc: 89.35%
---------------
Train Loss: 0.27 | Train Acc: 90.31%
Test Loss: 0.27 | Test Acc: 90.15%
---------------
Train Loss: 0.25 | Train Acc: 91.15%
Test Loss: 0.26 | Test Acc: 90.51%
---------------
Train Loss: 0.23 | Train Acc: 91.73%
Test Loss: 0.25 | Test Acc: 90.87%
---------------
Train Loss: 0.21 | Train Acc: 92.35%
Test Loss: 0.25 | Test Acc: 90.76%
---------------
Train Loss: 0.20 | Train Acc: 92.71%
Test Loss: 0.26 | Test Acc: 90.57%
---------------
Train Loss: 0.19 | Train Acc: 93.11%
Test Loss: 0.26 | Test Acc: 90.72%
1min 58s ± 5.23 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit

test_speed(device="cuda", epochs=1)

---------------
Train Loss: 0.18 | Train Acc: 93.36%
Test Loss: 0.26 | Test Acc: 90.75%
---------------
Train Loss: 0.18 | Train Acc: 93.60%
Test Loss: 0.27 | Test Acc: 90.51%
---------------
Train Loss: 0.17 | Train Acc: 93.92%
Test Loss: 0.28 | Test Acc: 90.37%
---------------
Train Loss: 0.16 | Train Acc: 94.04%
Test Loss: 0.27 | Test Acc: 90.54%
---------------
Train Loss: 0.16 | Train Acc: 94.31%
Test Loss: 0.28 | Test Acc: 90.60%
---------------
Train Loss: 0.15 | Train Acc: 94.41%
Test Loss: 0.29 | Test Acc: 90.43%
---------------
Train Loss: 0.15 | Train Acc: 94.70%
Test Loss: 0.30 | Test Acc: 90.50%
---------------
Train Loss: 0.14 | Train Acc: 94.72%
Test Loss: 0.29 | Test Acc: 90.65%
13 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
