Perform Image Augmentation and build custom CNN model

In [1]:
pip install torch torchvision matplotlib tqdm torchsummary

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchsummary import summary
import torchvision.models as models

In [3]:
augmentation = True

In [21]:
import torch
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Add the third set of convolutional, activation, and pooling layers
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        # using adaptive pooling layer because image sizes are not fixed. 
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) 
        
        self.dropout = nn.Dropout(0.2)
        self.flatten = torch.nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        x = self.pool1(self.conv1(x))
        x = self.pool2(self.conv2(x))
        x = self.pool3(self.conv3(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.flatten(x)
        x = self.dropout(x)  # You can choose to include or exclude dropout based on your requirements
        x = self.fc(x)
        return x


In [22]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f7858180c10>

In [23]:
# Define data transformations and normalizations

if augmentation == False:
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }
else:
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.RandomRotation(degrees=15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }

In [24]:
data_dir_train = '../final_dataset/train'
data_dir_test = '../final_dataset/test'

image_datasets = {
    'train': datasets.ImageFolder(root=data_dir_train, transform=data_transforms['train']),
    'val': datasets.ImageFolder(root=data_dir_test, transform=data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=16, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=4)
}

In [25]:
dataloaders['train']

<torch.utils.data.dataloader.DataLoader at 0x7f788cb5f670>

In [26]:
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (dropout): Dropout(p=0.2, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=2, bias=True)
  )
)

In [27]:
num_epochs = 10 
for epoch in range(num_epochs):
    print("Epoch {}...".format(epoch))
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)
#             labels = labels.unsqueeze(1).float()

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = corrects.double() / len(image_datasets[phase])

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

Epoch 0...
train Loss: 0.6628 Acc: 0.6099
val Loss: 0.6611 Acc: 0.6076
Epoch 1...
train Loss: 0.6587 Acc: 0.6287
val Loss: 0.6562 Acc: 0.6181
Epoch 2...
train Loss: 0.6454 Acc: 0.6378
val Loss: 0.6584 Acc: 0.6307
Epoch 3...
train Loss: 0.6388 Acc: 0.6409
val Loss: 0.6181 Acc: 0.6826
Epoch 4...
train Loss: 0.6368 Acc: 0.6463
val Loss: 0.6339 Acc: 0.6545
Epoch 5...
train Loss: 0.6291 Acc: 0.6463
val Loss: 0.6343 Acc: 0.6433
Epoch 6...
train Loss: 0.6300 Acc: 0.6524
val Loss: 0.6313 Acc: 0.6440
Epoch 7...
train Loss: 0.6267 Acc: 0.6488
val Loss: 0.6409 Acc: 0.6713
Epoch 8...


KeyboardInterrupt: 

In [19]:
class ResNet18(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet18, self).__init__()
        # Load the pre-trained ResNet-32 model
        self.resnet18 = models.resnet18(pretrained=True)
        
        # Modify the final fully connected layer to match the number of classes in your problem
        in_features = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet18(x)

# Create an instance of the ResNet32 model
resnet_model = ResNet18()

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam([
    {'params': resnet_model.resnet18.conv1.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer1.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer2.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer3.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer4.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.fc.parameters(), 'lr': 0.001},
], lr=0.001)


num_epochs = 10
for epoch in range(num_epochs):
    print("Epoch {}...".format(epoch))
    for phase in ['train', 'val']:
        if phase == 'train':
            resnet_model.train()
        else:
            resnet_model.eval()

        running_loss = 0.0
        corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)
            

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = resnet_model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = corrects.double() / len(image_datasets[phase])

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

Epoch 0...
train Loss: 0.4583 Acc: 0.7878
val Loss: 0.1789 Acc: 0.9306
Epoch 1...
train Loss: 0.3662 Acc: 0.8358
val Loss: 0.1813 Acc: 0.9306
Epoch 2...
train Loss: 0.3451 Acc: 0.8476
val Loss: 0.1864 Acc: 0.9383
Epoch 3...


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 142, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 142, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 119, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 160, in collate_tensor_fn
    storage = elem._typed_storage()._new_shared(numel, device=elem.device)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/storage.py", line 866, in _new_shared
    untyped_storage = torch.UntypedStorage._new_shared(size * self._element_size(), device=device)
  File "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/storage.py", line 260, in _new_shared
    return cls._new_using_filename_cpu(size)
RuntimeError: torch_shm_manager at "/Users/vansh/opt/anaconda3/lib/python3.9/site-packages/torch/bin/torch_shm_manager": could not generate a random directory for manager socket
