<h3> Datasets preparation

In [16]:
# import glob
# import numpy as np
# import torch
# from torch.utils.data import Dataset, DataLoader
# import albumentations as A
# import matplotlib.pyplot as plt


# # CUDA for PyTorch
# use_cuda = torch.cuda.is_available()
# device = torch.device("cuda:0" if use_cuda else "cpu")
# torch.backends.cudnn.benchmark = True


# class BottleDataset(Dataset):
#     def __init__(self, folder_path):
#         super().__init__()
#         # Image transformation
#         self.aug = A.Compose({
#             A.Resize(200, 300),
#             A.CenterCrop(100, 100),
#             A.RandomCrop(80, 80),
#             A.HorizontalFlip(p=0.5),
#             A.Rotate(limit=(-90, 90)),
#             A.VerticalFlip(p=0.5),
#             A.Normalize((0.5,), (0.5,)), # mean 0, std 1
#         })
#         self.image_list = glob.glob(folder_path)
#         # image list has all path names and labels
#         self.image_list = [(i, i.rsplit('\\', 2)[-2]) for i in self.image_list]

#     def train_test_split(self, train_size=0.8):
#         np.random.shuffle(self.image_list)
#         train_length = round(len(self.image_list) * train_size)
#         training_list = self.image_list[:train_length]
#         val_list = self.image_list[train_length:]
#         return training_list, val_list

#     def __len__(self):
#         return len(self.image_list)

#     def __getitem__(self, i):
#         # Returns the image and its label
#         image = plt.imread(self.image_list[0][i])
#         image = self.aug(image=np.array(image))['image']
#         return torch.tensor(image, dtype=torch.float32), torch.tensor(self.image_list[1][i], dtype=torch.float32)


In [17]:
# # DataLoader Parameters
# params = {'batch_size': 32,
#           'shuffle': True,
#           'num_workers': 6}
# max_epochs = 100
# path = 'Bottle images\*\*'
# bottle = BottleDataset(path)
# training_set, val_set = bottle.train_test_split()
# training_generator = torch.utils.data.DataLoader(training_set, **params)
# validation_generator = torch.utils.data.DataLoader(val_set, **params)


Reference
<br> https://towardsdatascience.com/how-to-tune-pytorch-lightning-hyperparameters-80089a281646
<br> https://towardsdatascience.com/supercharge-your-ai-research-with-pytorch-lightning-337948a99eec
<br> https://towardsdatascience.com/pytorch-lightning-machine-learning-zero-to-hero-in-75-lines-of-code-7892f3ba83c0
<br> https://towardsdatascience.com/from-pytorch-to-pytorch-lightning-a-gentle-introduction-b371b7caaf09
<br> https://www.youtube.com/watch?v=e47f__x7KSE
<br> https://dev.to/krypticmouse/pytorch-lightning-datamodules-callbacks-tpu-and-loggers-4nhb


In [18]:
import os
import glob
import torch
import numpy as np
from PIL import Image
from torch import nn, optim
import pytorch_lightning as pl
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import random_split, DataLoader


class BottleDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = None, batch_size: int = 32, num_workers: int = 4):
        super().__init__()
        self.data_dir = data_dir or os.getcwd()
        self.num_workers = num_workers
        self.batch_size = batch_size
        self.transforms = transforms.Compose(
            [transforms.Scale(128),
             transforms.RandomHorizontalFlip(p=0.5),
             transforms.RandomVerticalFlip(p=0.5),
             transforms.Grayscale(num_output_channels=1),
             transforms.ToTensor(),
             transforms.Normalize((0.5,), (0.5,))]
        )

    def prepare_data(self):
        # Get data
        self.image_list = glob.glob(self.data_dir)
        label_count = -1
        self.labels = dict()
        # image list has all path names and labels
        for index, val in enumerate(self.image_list):
            string_label = val.rsplit('\\', 2)[-2]
            if not(string_label in self.labels):
                label_count += 1
            self.labels[string_label] = label_count
            # Add encoded label to the image list
            image = self.transforms(Image.open(val))
            self.image_list[index] = (torch.tensor(image, dtype=torch.float32), torch.tensor(
                self.labels[string_label],  dtype=torch.float32))

    def setup(self, train_ratio: float = 0.8, stage=None):
        train_amount = int(len(self.image_list) * train_ratio)
        self.train_data, self.val_data = random_split(
            self.image_list, [train_amount, len(self.image_list) - train_amount])

    def __len__(self):
        return len(self.image_list)

    def train_dataloader(self):
        return DataLoader(self.train_data, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(self.val_data, batch_size=self.batch_size, num_workers=self.num_workers)


In [19]:
class BottleClassifier(pl.LightningModule):
    def __init__(self):
        super(BottleClassifier, self).__init__()
        '''
        Conv: 
        W = Image width
        H = Image height
        F = Kernel width or height
        P = Kernel padding
        S = Stride
        ((W-F+2*P)/S)+1 * ((H-F+2*P)/S)+1
       
        Pooling: 
        n: input size
        f: filter size
        s: stride
        (n - f) / s + 1 * (n - f) / s + 1
        '''
        self.layer1 = nn.Sequential(
            # Input : (1, 128, 128)
            nn.Conv2d(in_channels=1, out_channels=4,
                      kernel_size=3, stride=1),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=8,
                      kernel_size=3, stride=1),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        self.fc1 = nn.Sequential(
            nn.Linear(61, 32),
            nn.BatchNorm1d(32))
        self.fc2 = nn.Sequential(
            nn.Linear(32, 16),
            nn.BatchNorm1d(16))
        self.out = nn.Linear(16, 8)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, X):
        X = self.layer1(X)
        X = self.layer2(X)
        X = nn.MaxPool2d(2)(X) # -> (16, 61, 61)
        # batch_size, _, _, _ = X.size()
        X = X.view(32, -1)
        X = self.fc1(X)
        X = self.fc2(X)
        X = self.out(X)
        return F.log_softmax(X)

    def configure_optimizers(self):
        return optim.Adam(self.parameters())

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.loss(logits, y)
        # Logging the loss
        self.log('train/loss', loss, on_epoch=True)
        return loss

    def validation_step(self, valid_batch, batch_idx):
        x, y = valid_batch
        logits = self.forward(x)
        loss = self.loss(logits, y)

        # Logging the loss
        self.log('valid/loss', loss, on_epoch=True)
        return loss


In [20]:
path = 'Bottle images\*\*'
data_module = BottleDataModule(path)
model = BottleClassifier()
trainer = pl.Trainer(fast_dev_run=True, auto_lr_find=True,
                     auto_scale_batch_size=True, gpus=0,)
trainer.fit(model, data_module)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
  self.image_list[index] = (torch.tensor(image, dtype=torch.float32), torch.tensor(
