In [4]:
import torch 
from torchvision import datasets
from torch import nn
import platform
from torchinfo import summary

In [21]:
if platform.system().lower() == 'windows':
    DATA_PATH = 'C:\\Users\\MK9\\Projects\\python__object-detection\\raw\\training_data_48x48'

In [17]:
class CNNBubbleDetector(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.flatten = nn.Flatten()
        
        self.feature = nn.Sequential(
            # input: rgb 3 channels
            
            # [bs, 3, 48, 48] --> [bs, 8, 44, 44]
            nn.Conv2d(3, 8, 5), 
            
            # [bs, 8, 44, 44] --> [bs, 8, 22, 22]
            nn.MaxPool2d(2),
            
            # [bs, 8, 22, 22] --> [bs, 16, 18, 18]
            nn.Conv2d(8, 16, 5),
            
            # [bs, 16, 18, 18] --> [bs, 16, 9, 9]
            nn.MaxPool2d(2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(16*9*9, 75), 
            nn.ReLU(), 
            nn.Linear(75, 1)
        )
        
        
    def forward(self, x):
        x = self.feature(x)
        x = self.flatten(x)
        x = self.fc(x)
        return torch.sigmoid(x)

In [18]:
cnn_bubble_detector = CNNBubbleDetector()
summary(cnn_bubble_detector, input_size=[10, 3, 48, 48])

Layer (type:depth-idx)                   Output Shape              Param #
CNNBubbleDetector                        --                        --
├─Sequential: 1-1                        [10, 16, 9, 9]            --
│    └─Conv2d: 2-1                       [10, 8, 44, 44]           608
│    └─MaxPool2d: 2-2                    [10, 8, 22, 22]           --
│    └─Conv2d: 2-3                       [10, 16, 18, 18]          3,216
│    └─MaxPool2d: 2-4                    [10, 16, 9, 9]            --
├─Flatten: 1-2                           [10, 1296]                --
├─Sequential: 1-3                        [10, 1]                   --
│    └─Linear: 2-5                       [10, 75]                  97,275
│    └─ReLU: 2-6                         [10, 75]                  --
│    └─Linear: 2-7                       [10, 1]                   76
Total params: 101,175
Trainable params: 101,175
Non-trainable params: 0
Total mult-adds (M): 23.16
Input size (MB): 0.28
Forward/backward pass size

In [22]:
from torchvision.datasets import ImageFolder
import torchvision.transforms as T

img_transform = {
    'train': T.Compose([
        T.RandomRotation(degrees=15), 
        T.RandomHorizontalFlip(), 
        T.ToTensor()
    ]), 
    
    'valid': T.Compose([
        T.ToTensor()
    ]), 
    
    'test': T.Compose([
        T.ToTensor()
    ])
}

bubble_dataset = ImageFolder(DATA_PATH, transform=T.ToTensor())
print(len(bubble_dataset))

1393


In [28]:
train_len = int(0.8 * len(bubble_dataset))
valid_len = len(bubble_dataset) - train_len
train_data, valid_data = torch.utils.data.random_split(bubble_dataset, [train_len, valid_len])
print(len(train_data), len(valid_data), len(bubble_dataset))
train_data.dataset.transform = img_transform['train']

1114 279 1393


In [39]:
sum([train_data[i][1] for i in range(len(train_data))])

79

In [55]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_data, batch_size=32)
valid_dataloader = DataLoader(valid_data, batch_size=32)

In [43]:
X = torch.rand(10, 3, 48, 48)
pred = cnn_bubble_detector(X)

In [45]:
pred

tensor([[0.4865],
        [0.4833],
        [0.4870],
        [0.4890],
        [0.4856],
        [0.4889],
        [0.4835],
        [0.4861],
        [0.4824],
        [0.4862]], grad_fn=<SigmoidBackward0>)

In [58]:
from torch import optim
import numpy as np

loss_func = nn.BCELoss()
optimizer = optim.Adam(cnn_bubble_detector.parameters(), lr=0.001)


def train_loop(dataloader, model, loss_fn, optimizer):  
    data_size = len(dataloader.dataset) 
    
    # Set to training mode
    model.train()
    
    for batch, (X, y) in enumerate(dataloader):
        
        # forward pass
        pred = model(X)
        y = y.unsqueeze(1).float()
        loss = loss_fn(pred, y)
        
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # display progress
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}, [{current:>d}]/[{data_size:>d}]")
            
            
def valid_loop(dataloader, model, loss_fn):
    
    # Set to evaluation mode
    model.eval()
    
    data_size = len(dataloader.dataset)
    valid_loss, correct = 0, 0
    
    with torch.no_grad():
        for (X, y) in dataloader:
            pred = model(X)
            
            valid_loss += loss_fn(pred, y.unsqueeze(1).float()).item()
            correct += (pred[:,0] > .5).type(torch.float64).sum().item()
    
    valid_loss /= data_size
    correct /= data_size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {valid_loss:>8f}\n")
        
        
# Start trainning
epochs = 300
for epoch in range(epochs):
    print(f"Epoch: {epoch+1}/{epochs}\n------------------------------------------")
    train_loop(train_dataloader, cnn_bubble_detector, loss_func, optimizer)
    valid_loop(valid_dataloader, cnn_bubble_detector, loss_func)


Epoch: 1/300
------------------------------------------
loss: 0.003167, [0]/[1114]
loss: 0.008933, [320]/[1114]
loss: 0.014678, [640]/[1114]
loss: 0.073124, [960]/[1114]
Test Error: 
 Accuracy: 5.4%, Avg loss: 0.005915

Epoch: 2/300
------------------------------------------
loss: 0.001743, [0]/[1114]
loss: 0.010941, [320]/[1114]
loss: 0.011032, [640]/[1114]
loss: 0.022165, [960]/[1114]
Test Error: 
 Accuracy: 5.0%, Avg loss: 0.006616

Epoch: 3/300
------------------------------------------
loss: 0.064267, [0]/[1114]
loss: 0.003051, [320]/[1114]
loss: 0.017033, [640]/[1114]
loss: 0.033196, [960]/[1114]
Test Error: 
 Accuracy: 6.1%, Avg loss: 0.005030

Epoch: 4/300
------------------------------------------
loss: 0.041819, [0]/[1114]
loss: 0.013377, [320]/[1114]
loss: 0.009731, [640]/[1114]
loss: 0.098691, [960]/[1114]
Test Error: 
 Accuracy: 6.1%, Avg loss: 0.004456

Epoch: 5/300
------------------------------------------
loss: 0.007378, [0]/[1114]
loss: 0.001734, [320]/[1114]
loss: 0.