<a href="https://colab.research.google.com/github/nreyesh/pytorch_initials/blob/main/05_1_Custom_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
! pip install wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=adba3fd2a4a5b030eb4a38760a5622a6825bfa3c3d5ef9d48e18f05b69dda6a4
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


# 1. Getting the Dataset

In [39]:
import wget

url = 'https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip'
wget.download(url)

'pizza_steak_sushi.zip'

In [40]:
import os
import zipfile
from pathlib import Path

data_path = Path('dataset/')
zip_path = '/content/pizza_steak_sushi.zip'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    if not os.path.isdir(data_path):
        os.mkdir(data_path)
    zip_ref.extractall(data_path)

# 2. Loading the Data
### Option 1: Image Loader

In [41]:
from torchvision.datasets import ImageFolder
from torchvision import transforms

train_dir = data_path / 'train'
test_dir = data_path / 'test'

train_transforms = transforms.Compose([
        transforms.Resize(size=(128,128)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor()
      ])
test_transforms = transforms.Compose([
        transforms.Resize(size=(128,128)),
        transforms.ToTensor()
      ])

train_imageFolder = ImageFolder(train_dir,
                                transform=train_transforms)

test_imageFolder = ImageFolder(test_dir,
                               transform=test_transforms)

In [42]:
train_imageFolder

Dataset ImageFolder
    Number of datapoints: 225
    Root location: dataset/train
    StandardTransform
Transform: Compose(
               Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
           )

In [43]:
train_imageFolder.classes

['pizza', 'steak', 'sushi']

In [44]:
train_imageFolder.class_to_idx

{'pizza': 0, 'steak': 1, 'sushi': 2}

In [45]:
train_imageFolder[0]

(tensor([[[0.1137, 0.1137, 0.1059,  ..., 0.1098, 0.1098, 0.1137],
          [0.1137, 0.1098, 0.1059,  ..., 0.1216, 0.1216, 0.1255],
          [0.1098, 0.1059, 0.1020,  ..., 0.1294, 0.1294, 0.1333],
          ...,
          [0.0824, 0.0863, 0.0902,  ..., 0.1647, 0.1686, 0.1765],
          [0.0902, 0.0863, 0.0824,  ..., 0.1647, 0.1686, 0.1765],
          [0.0863, 0.0824, 0.0824,  ..., 0.1569, 0.1569, 0.1608]],
 
         [[0.0706, 0.0745, 0.0706,  ..., 0.0549, 0.0549, 0.0588],
          [0.0745, 0.0706, 0.0745,  ..., 0.0627, 0.0627, 0.0627],
          [0.0745, 0.0745, 0.0745,  ..., 0.0627, 0.0627, 0.0667],
          ...,
          [0.1059, 0.1098, 0.1098,  ..., 0.2275, 0.2275, 0.2314],
          [0.1059, 0.1020, 0.1020,  ..., 0.2275, 0.2314, 0.2314],
          [0.1020, 0.0980, 0.0980,  ..., 0.2353, 0.2275, 0.2275]],
 
         [[0.0941, 0.0980, 0.0902,  ..., 0.0196, 0.0196, 0.0196],
          [0.0941, 0.0941, 0.0902,  ..., 0.0235, 0.0235, 0.0235],
          [0.0941, 0.0941, 0.0902,  ...,

In [87]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16
train_loader = DataLoader(train_imageFolder,
                          batch_size=BATCH_SIZE,
                          shuffle=True)
test_loader = DataLoader(test_imageFolder,
                         batch_size=BATCH_SIZE,
                         shuffle=True)

In [47]:
imgs, labels = next(iter(train_loader))
len(imgs), len(labels)

(8, 8)

In [70]:
len(train_loader)

29

## Option 2 Custom Image Loader

In [49]:
aux = Path('dataset/train/').glob('*/*.jpg')
aux = list(aux)
aux

[PosixPath('dataset/train/sushi/1552504.jpg'),
 PosixPath('dataset/train/sushi/2871052.jpg'),
 PosixPath('dataset/train/sushi/2175561.jpg'),
 PosixPath('dataset/train/sushi/3107839.jpg'),
 PosixPath('dataset/train/sushi/268990.jpg'),
 PosixPath('dataset/train/sushi/686426.jpg'),
 PosixPath('dataset/train/sushi/121940.jpg'),
 PosixPath('dataset/train/sushi/2797464.jpg'),
 PosixPath('dataset/train/sushi/2980779.jpg'),
 PosixPath('dataset/train/sushi/1551817.jpg'),
 PosixPath('dataset/train/sushi/542188.jpg'),
 PosixPath('dataset/train/sushi/17704.jpg'),
 PosixPath('dataset/train/sushi/2813454.jpg'),
 PosixPath('dataset/train/sushi/1280119.jpg'),
 PosixPath('dataset/train/sushi/3579071.jpg'),
 PosixPath('dataset/train/sushi/3737197.jpg'),
 PosixPath('dataset/train/sushi/390178.jpg'),
 PosixPath('dataset/train/sushi/2641778.jpg'),
 PosixPath('dataset/train/sushi/1138695.jpg'),
 PosixPath('dataset/train/sushi/14046.jpg'),
 PosixPath('dataset/train/sushi/794647.jpg'),
 PosixPath('dataset/tra

In [50]:
from PIL import Image
from torch.utils.data import Dataset

class CustomImageLoader(Dataset):
  def __init__(self, root_directory, transform=None):
    super().__init__()
    self.paths = list(Path(root_directory).glob('*/*.jpg'))
    self.transforms = transform
    self.classes = sorted([x.parts[-1] for x in Path(root_directory).iterdir() if x.is_dir()])
    self.class_to_idx = {key:idx for idx,key in enumerate(self.classes)}

  def __len__(self):
    return len(self.paths)

  def __getitem__(self, idx):
    img = Image.open(self.paths[idx])
    label_name = self.paths[idx].parts[-2]
    label = self.class_to_idx[label_name]

    if transforms:
      img = self.transforms(img)

    return img, label


In [88]:
train_customImageLoader = CustomImageLoader('dataset/train/',
                                            transform=train_transforms)
test_customImageLoader = CustomImageLoader('dataset/test/',
                                            transform=test_transforms)

In [68]:
train_customImageLoader.paths

<__main__.CustomImageLoader at 0x785a38d06e60>

In [53]:
train_customImageLoader.classes

['pizza', 'steak', 'sushi']

In [54]:
train_customImageLoader.class_to_idx

{'pizza': 0, 'steak': 1, 'sushi': 2}

In [55]:
img, label = train_customImageLoader[0]
img.shape, label

(torch.Size([3, 128, 128]), 2)

In [89]:
from torch.utils.data import DataLoader

train_customDataLoader = DataLoader(train_customImageLoader,
                                    batch_size=BATCH_SIZE,
                                    shuffle=True)
test_customDataLoader = DataLoader(test_customImageLoader,
                                   batch_size=BATCH_SIZE,
                                   shuffle=True)

In [57]:
imgs, labels = next(iter(train_customDataLoader))
imgs.shape, labels.shape

(torch.Size([8, 3, 128, 128]), torch.Size([8]))

In [67]:
len(test_customDataLoader)

10

# 3. Visualization

# Model

In [90]:
import torch
from torch import nn

class MiniVGG(nn.Module):
  def __init__(self,
               in_channels,
               im_size,
               hidden_channels,
               n_classes ):
    super().__init__()

    self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels,
                      out_channels=hidden_channels,
                      kernel_size=(3,3),
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_channels),
            nn.Conv2d(in_channels=hidden_channels,
                      out_channels=hidden_channels,
                      kernel_size=(3,3),
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_channels),
            nn.MaxPool2d(kernel_size=(2,2),
                          stride=2,
                          padding=0),
            nn.Dropout(0.1)
          )

    self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_channels,
                      out_channels=hidden_channels*2,
                      kernel_size=(3,3),
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_channels*2),
            nn.Conv2d(in_channels=hidden_channels*2,
                      out_channels=hidden_channels*2,
                      kernel_size=(3,3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_channels*2),
            nn.MaxPool2d(kernel_size=(2,2),
                          stride=2,
                          padding=1),
            nn.Dropout(0.1)
            )

    self.linear = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_channels*2*32*32,
                  out_features=512),
        nn.Linear(in_features=512,
                  out_features=n_classes),
        nn.Softmax()
        )

  def forward(self, x):
    x = self.block_1(x)
    x = self.block_2(x)
    x = self.linear(x)
    return x

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = MiniVGG(3,128,32,3).to(device)
model

MiniVGG(
  (block_1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.1, inplace=False)
  )
  (block_2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=(2, 2), stride=2, padding=1, dilation=1, ceil_mode=False)
    (7): Dropout(

In [59]:
img = torch.randn(1,3,128,128).to(device)
pass_1 = model.block_1(img)
pass_1.shape

torch.Size([1, 32, 64, 64])

In [60]:
pass_2 = model.block_2(pass_1)
pass_2.shape

torch.Size([1, 64, 32, 32])

In [61]:
pass_3 = model.linear(pass_2)
pass_3.shape

  return self._call_impl(*args, **kwargs)


torch.Size([1, 3])

In [62]:
pass_3

tensor([[0.3478, 0.1978, 0.4545]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

# Training

In [63]:
!pip install torcheval



In [91]:
from torch.optim import Adam

optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

In [93]:
import math
from torcheval.metrics import MulticlassAccuracy

epochs = 10
metric = MulticlassAccuracy()

model = MiniVGG(3,128,32,3).to(device)

for epoch in range(epochs):
  print(f'# Epoch: {epoch}')
  # Train Initialization
  model.train()
  loss_acum, acc_acum = 0, 0
  for X,y in train_customDataLoader:
    X = X.to(device)
    y = y.to(device)

    # 1. Predictions
    y_pred = model(X)

    # 2. Loss calculation
    loss = loss_fn(y_pred, y)
    loss_acum += loss

    metric.update(y_pred,y)
    acc_acum += metric.compute()

    # 3. Gradient reset
    optimizer.zero_grad()

    # 4. Gradient Calculation
    loss.backward()

    # 5. Updating weights
    optimizer.step()

  loss_acum /= len(train_customDataLoader)
  acc_acum /= len(train_customDataLoader)

  print(f'Train loss: {loss_acum:.5f} | Train accuracy: {acc_acum:.2f}')

  model.eval()
  with torch.inference_mode():
    loss_acum, acc_acum = 0, 0
    for X,y in test_customDataLoader:
      X = X.to(device)
      y = y.to(device)

      # 1. Predictions
      y_pred = model(X)

      # 2. Loss calculation
      loss = loss_fn(y_pred, y)
      loss_acum += loss

      metric.update(y_pred,y)
      acc_acum += metric.compute()

    loss_acum /= len(train_customDataLoader)
    acc_acum /= len(train_customDataLoader)

    print(f'Test loss: {loss_acum:.5f} | Test accuracy: {acc_acum:.2f}')

# Epoch: 0


  return self._call_impl(*args, **kwargs)


Train loss: 1.11160 | Train accuracy: 0.39
Test loss: 0.36580 | Test accuracy: 0.12
# Epoch: 1
Train loss: 1.10796 | Train accuracy: 0.36
Test loss: 0.36575 | Test accuracy: 0.12
# Epoch: 2
Train loss: 1.09951 | Train accuracy: 0.36
Test loss: 0.36510 | Test accuracy: 0.12
# Epoch: 3
Train loss: 1.08521 | Train accuracy: 0.36
Test loss: 0.36453 | Test accuracy: 0.12
# Epoch: 4
Train loss: 1.10857 | Train accuracy: 0.37
Test loss: 0.36645 | Test accuracy: 0.12
# Epoch: 5
Train loss: 1.09222 | Train accuracy: 0.36
Test loss: 0.36754 | Test accuracy: 0.12
# Epoch: 6
Train loss: 1.09366 | Train accuracy: 0.36
Test loss: 0.36721 | Test accuracy: 0.12
# Epoch: 7
Train loss: 1.08303 | Train accuracy: 0.36
Test loss: 0.36778 | Test accuracy: 0.12
# Epoch: 8
Train loss: 1.09899 | Train accuracy: 0.36
Test loss: 0.36684 | Test accuracy: 0.12
# Epoch: 9
Train loss: 1.10106 | Train accuracy: 0.36
Test loss: 0.36877 | Test accuracy: 0.12


In [83]:
len(train_customDataLoader)

29