In [None]:
import torch
from torch import nn

import torchvision as tv
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from tqdm.auto import tqdm

from timeit import default_timer as timer

from torch.utils.data import DataLoader

In [None]:
train_data = datasets.FashionMNIST(
    root="data", # where to download
    train=True, # training data set included
    download=True, # downloaded
    transform=ToTensor(), # in Tensor form
    target_transform=None # transform the label targets?
)
test_data = datasets.FashionMNIST(
    root="data", # where to download
    train=False, # training data set included
    download=True, # downloaded
    transform=tv.transforms.ToTensor(), # in Tensor form
    target_transform=None # transform the label targets?
)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
class_dict= train_data.class_to_idx
class_list = train_data.classes

In [None]:
BATCH_SIZE= 32
train_dataloader= DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True )
test_dataloader= DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False )
print (f"Original size / batched data sets : {len(train_data)}/{len(train_dataloader)} so batch size is {len(train_data)/len(train_dataloader)} \nSimilarly training data is {len(test_data)}/{len(test_dataloader)} : {len(test_data)/len(test_dataloader)}")

Original size / batched data sets : 60000/1875 so batch size is 32.0 
Similarly training data is 10000/313 : 31.94888178913738


In [None]:
### Loss, Optimizer and Acc function

loss_fn = nn.CrossEntropyLoss().to(device)

def acc_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
from tqdm.auto import tqdm

torch.manual_seed(42)
def train_model(epochs:int, model:torch.nn.Module):
  opp_fn = torch.optim.Adam(model.parameters(), lr=0.001)
  loss_fn = nn.CrossEntropyLoss().to(device)
  model.to(device)  # Ensure model is on the correct device
  start_time = timer()
  epochs = epochs

  for epoch in tqdm(range(epochs)):
      print(f"Epoch: {epoch + 1}/{epochs}\n---")
      train_loss, train_acc = 0, 0

      # Training loop
      for batch, (X, y) in enumerate(train_dataloader):
          X, y = X.to(device), y.to(device)  # Move to device
          model.train()

          y_pred = model(X)  # X is already on the device
          loss = loss_fn(y_pred, y)  # Ensure loss_fn is compatible
          train_loss += loss.item()  # Use .item() for scalar tensors

          opp_fn.zero_grad()
          loss.backward()
          opp_fn.step()

          if batch % 400 == 0:
              print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

      train_loss /= len(train_dataloader)
      print(f"Avg Train Loss: {train_loss:.4f}")

      # Evaluation loop
      test_loss, test_acc = 0, 0
      model.eval()
      with torch.inference_mode():
          for X_test, y_test in test_dataloader:
              X_test, y_test = X_test.to(device), y_test.to(device)
              test_pred = model(X_test)
              test_loss += loss_fn(test_pred, y_test).item()
              test_acc += acc_fn(y_true=y_test, y_pred=test_pred.argmax(dim=1))

      test_loss /= len(test_dataloader)
      test_acc /= len(test_dataloader)
      print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}\n")

  end_timer = timer()
  print(f"Training complete in {end_timer - start_time:.2f} seconds.")


In [None]:
"""
  Creating a tinyVGG from CNN github website
"""

class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.conv_block_1= nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv_block_2= nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier= nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=hidden_units * 7*7,
                      out_features=output_shape)
        )

    def forward(self, x):
        #print(x.shape)
        x=self.conv_block_1(x)
        #print(x.shape)
        x=self.conv_block_2(x)
        #print(x.shape)
        x=self.classifier(x)
        return x


# Initialize the model
model_1 = FashionMNISTModelV1(input_shape=1, #1 color channel
                              hidden_units=64,
                              output_shape=len(class_list)).to(device)
print(model_1)


FashionMNISTModelV1(
  (conv_block_1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=3136, out_features=10, bias=True)
  )
)


In [None]:
train_model(50,model_1)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1/50
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples
Avg Train Loss: 0.0950
Test Loss: 0.2343 | Test Acc: 92.8215

Epoch: 2/50
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples
Avg Train Loss: 0.0952
Test Loss: 0.2153 | Test Acc: 93.2808

Epoch: 3/50
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples
Avg Train Loss: 0.0927
Test Loss: 0.2326 | Test Acc: 93.1210

Epoch: 4/50
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples
Avg Train Loss: 0.0940
Test Loss: 0.2249 | Test Acc: 92.8914

Epoch: 5/50
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Lo

In [None]:
from pathlib import Path

MODEL_PATH= Path("Models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME= "O1_fashionMNIST_model.pth"
MODEL_SAVE_PATH= MODEL_PATH/MODEL_NAME

torch.save(obj=model_1.state_dict(), f=MODEL_SAVE_PATH)

In [None]:
loaded_model = FashionMNISTModelV1(input_shape=1, hidden_units=64, output_shape=10)
loaded_model.load_state_dict(torch.load(MODEL_SAVE_PATH))
loaded_model.eval()  # Set model to evaluation mode

  loaded_model.load_state_dict(torch.load(MODEL_SAVE_PATH))


FashionMNISTModelV1(
  (conv_block_1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=3136, out_features=10, bias=True)
  )
)

In [None]:
train_model(1,loaded_model)

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch: 1/1
---
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples
Avg Train Loss: 0.0837
Test Loss: 0.2266 | Test Acc: 93.1909

Training complete in 13.61 seconds.
