Perform Image Augmentation and build custom CNN model

In [None]:
pip install torch torchvision matplotlib tqdm torchsummary

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchsummary import summary
import torchvision.models as models

In [2]:
augmentation = True

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f8e78b0cbb0>

In [4]:
# Define data transformations and normalizations

if augmentation == False:
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }
else:
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.RandomRotation(degrees=15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }

In [5]:
data_dir_train = '../final_dataset/train'
data_dir_test = '../final_dataset/test'

image_datasets = {
    'train': datasets.ImageFolder(root=data_dir_train, transform=data_transforms['train']),
    'val': datasets.ImageFolder(root=data_dir_test, transform=data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=16, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=4)
}

In [12]:
def train_model(model, optimizer, num_epochs):
    criterion = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        print("Epoch {}...".format(epoch))
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)


                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = corrects.double() / len(image_datasets[phase])
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

In [None]:
# CNN Model

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Add the third set of convolutional, activation, and pooling layers
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        # using adaptive pooling layer because image sizes are not fixed. 
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) 
        
        self.dropout = nn.Dropout(0.2)
        self.flatten = torch.nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        x = self.pool1(self.conv1(x))
        x = self.pool2(self.conv2(x))
        x = self.pool3(self.conv3(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.flatten(x)
        x = self.dropout(x)  # You can choose to include or exclude dropout based on your requirements
        x = self.fc(x)
        return x


In [None]:
# CNN Model Hyperparamters

num_epochs = 10
cnn_model = SimpleCNN()
cnn_optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cnn_model.to(device)

train_model(cnn_model, cnn_optimizer, num_epochs)

In [None]:
# Resnet Model

class ResNet18(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet18, self).__init__()
        # Load the pre-trained ResNet-32 model
        self.resnet18 = models.resnet18(pretrained=True)
        
        # Modify the final fully connected layer to match the number of classes in your problem
        in_features = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet18(x)

# Create an instance of the ResNet32 model
resnet_model = ResNet18()

In [None]:
# Resnet Hyperparameters

resnet_num_epochs = 10
criterion = nn.CrossEntropyLoss()
resnet_optimizer = optim.Adam([
    {'params': resnet_model.resnet18.conv1.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer1.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer2.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer3.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.layer4.parameters(), 'lr': 0.0001},
    {'params': resnet_model.resnet18.fc.parameters(), 'lr': 0.001},
], lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet_model.to(device)

train_model(resnet_model, resnet_optimizer, resnet_optimizer)

In [None]:
# Shuffle Net Model

shufflenet_model = models.shufflenet_v2_x1_0(pretrained=True)
shufflenet_model.fc = nn.Linear(1024, 2)

shufflenet_model.to(device)

Downloading: "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth" to /Users/vansh/.cache/torch/hub/checkpoints/shufflenetv2_x1-5666bf0f80.pth
100%|██████████████████████████████████████| 8.79M/8.79M [00:00<00:00, 22.9MB/s]

Epoch 0...





train Loss: 0.4612 Acc: 0.7869
val Loss: 0.2782 Acc: 0.8900
Epoch 1...
train Loss: 0.3868 Acc: 0.8227
val Loss: 0.1746 Acc: 0.9397
Epoch 2...
train Loss: 0.3847 Acc: 0.8306
val Loss: 0.1975 Acc: 0.9131
Epoch 3...
train Loss: 0.3712 Acc: 0.8455
val Loss: 0.1928 Acc: 0.9222
Epoch 4...
train Loss: 0.3466 Acc: 0.8482
val Loss: 0.1634 Acc: 0.9439
Epoch 5...
train Loss: 0.3500 Acc: 0.8424
val Loss: 0.1941 Acc: 0.9432
Epoch 6...


In [None]:
shufflenet_optimizer = optim.Adam(shufflenet_model.parameters(), lr=0.001)
shufflenet_num_epochs = 10

train_model(shufflenet_model, shufflenet_optimizer, shufflenet_num_epochs)