Perform Image Augmentation and build custom CNN model

In [1]:
!pip install torch torchvision matplotlib tqdm torchsummary


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchsummary import summary 

In [3]:
augmentation = True

In [4]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # using adaptive pooling layer because image sizes are not fixed. 
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) 
        
        self.dropout = nn.Dropout(0.5)

        self.fc = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x106063db0>

In [6]:
# Define data transformations and normalizations

if augmentation == False:
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }
else:
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.RandomRotation(degrees=15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }

In [7]:
data_dir_train = '../final_dataset/train'
data_dir_test = '../final_dataset/test'

image_datasets = {
    'train': datasets.ImageFolder(root=data_dir_train, transform=data_transforms['train']),
    'val': datasets.ImageFolder(root=data_dir_test, transform=data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=4)
}

In [8]:
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Sequential(
    (0): Linear(in_features=64, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=2, bias=True)
  )
)

In [9]:
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 224, 224]             896
              ReLU-2         [-1, 32, 224, 224]               0
         MaxPool2d-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 64, 112, 112]          18,496
              ReLU-5         [-1, 64, 112, 112]               0
         MaxPool2d-6           [-1, 64, 56, 56]               0
 AdaptiveAvgPool2d-7             [-1, 64, 1, 1]               0
            Linear-8                  [-1, 128]           8,320
              ReLU-9                  [-1, 128]               0
           Linear-10                    [-1, 2]             258
Total params: 27,970
Trainable params: 27,970
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 41.35
Params size (MB): 0.11
Estimated To

In [10]:
num_epochs = 50 
for epoch in range(num_epochs):
    print("Epoch {}...".format(epoch))
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = corrects.double() / len(image_datasets[phase])

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

Epoch 0...
train Loss: 0.6832 Acc: 0.5628
val Loss: 0.6890 Acc: 0.5613
Epoch 1...
train Loss: 0.6568 Acc: 0.6129
val Loss: 0.6703 Acc: 0.5943
Epoch 2...
train Loss: 0.6470 Acc: 0.6360
val Loss: 0.6602 Acc: 0.6048
Epoch 3...
train Loss: 0.6444 Acc: 0.6387
val Loss: 0.6718 Acc: 0.5907
Epoch 4...
train Loss: 0.6462 Acc: 0.6324
val Loss: 0.6471 Acc: 0.6237
Epoch 5...
train Loss: 0.6376 Acc: 0.6406
val Loss: 0.6428 Acc: 0.6328
Epoch 6...
train Loss: 0.6262 Acc: 0.6530
val Loss: 0.6335 Acc: 0.6384
Epoch 7...
train Loss: 0.6247 Acc: 0.6554
val Loss: 0.6219 Acc: 0.6657
Epoch 8...
train Loss: 0.6232 Acc: 0.6436
val Loss: 0.6329 Acc: 0.6370
Epoch 9...
train Loss: 0.6229 Acc: 0.6481
val Loss: 0.6354 Acc: 0.6300
Epoch 10...
train Loss: 0.6169 Acc: 0.6655
val Loss: 0.6009 Acc: 0.6776
Epoch 11...
train Loss: 0.6111 Acc: 0.6645
val Loss: 0.6402 Acc: 0.6230
Epoch 12...
train Loss: 0.5990 Acc: 0.6736
val Loss: 0.5815 Acc: 0.6910
Epoch 13...
train Loss: 0.6005 Acc: 0.6749
val Loss: 0.5941 Acc: 0.6861
Ep