import required libraries

In [1]:
%matplotlib inline
#a script from https://github.com/Bjarten/early-stopping-pytorch#:~:text=Early%20stopping%20is%20a%20form,a%20row%20the%20training%20stops.
from early_stopping_script import EarlyStopping
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

In [2]:
#pipeline to transform images into normalized tensors with mean and std at 0.5 for each
#color channel (3) for quicker learning
#resize to 300,300 which is the largest image in the dataset
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)), 
    transforms.Resize((300,300))]
)

In [3]:
#create dataset from image files
trainset = torchvision.datasets.ImageFolder(
    root='/kaggle/input/intel-image-classification/seg_train/seg_train', transform=transform)
#create shuffled indices for splitting into train-val for early stopping
indices = list(range(len(trainset)))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, val_indices = indices[:round(len(trainset)*0.9)], indices[round(len(trainset)*0.9):]
#create samplers
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices)
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_indices)
#create dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,
                                          num_workers=2,
                                         sampler=train_sampler)
valloader = torch.utils.data.DataLoader(trainset, batch_size=100,
                                          num_workers=2,
                                       sampler=val_sampler)

In [4]:
#retrieve class names -- imageFolder creates its own labels
classes = trainset.class_to_idx
#create list so that class mapping is easier to access
classes = [class_name for (class_name, index) in classes.items()]
classes

In [6]:
#print a batch of training images to test the dataloader
data_iter = iter(trainloader)
images, labels = data_iter.next()

def print_image(image):
    x = image/2 +0.5 #reverse normalization
    np_image = x.numpy() #convert to numpy for plt viz
    plt.imshow(np.transpose(np_image, (1, 2, 0))) #transpose to convert from tensor to numpy
    
# show images
print_image(torchvision.utils.make_grid(images[:5, :, :]))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(5)))

training and validation dataloader are created. create test dataloader.

In [7]:
#load testing image dataset into dataloader
testset = torchvision.datasets.ImageFolder(
    root='/kaggle/input/intel-image-classification/seg_test/seg_test', transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                          shuffle=True, num_workers=2)

In [8]:
#print a batch of testing images
data_iter = iter(testloader)
images, labels = data_iter.next()
    
# show images
print_image(torchvision.utils.make_grid(images[:5, :, :]))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(5)))

both dataloaders are prepared. create basic CNN.

In [9]:
class AlexNet(nn.Module):
    #replicating AlexNet, filter layers from below link
    #https://www.analyticsvidhya.com/blog/2021/03/introduction-to-the-architecture-of-alexnet/
    def __init__(self):
        #initialise inherited methods and properties from nn Module
        super(AlexNet, self).__init__()
        #first convolutional layer - 96 filters, 11x11, stride 4
        self.conv1 = nn.Conv2d(3, 96, 11, stride=4)
        #max pooling layer - 3x3, stride 2
        self.max1 = nn.MaxPool2d(3, stride=2)
        #second convolutional layer - 256 filters, 5x5, stride 1
        self.conv2 = nn.Conv2d(96, 256, 5)
        #second max pooling
        self.max2 = nn.MaxPool2d(3, stride=2)
        #third conv
        self.conv3 = nn.Conv2d(256, 384, 3)
        #fourth conv
        self.conv4 = nn.Conv2d(384, 384, 3)
        #fifth conv
        self.conv5 = nn.Conv2d(384, 256, 3)
        #max pool 3
        self.max3 = nn.AdaptiveMaxPool2d(6)
        #dropout
        self.drop1 = nn.Dropout(p=0.5)
        #flatten
        self.flat = nn.Flatten()
        #FC1
        self.full1 = nn.Linear(6*6*256, 4096)
        #dropout
        self.drop2 = nn.Dropout(p=0.5)
        #FC2
        self.full2 = nn.Linear(4096, 1000) 
        #using 1000 since number of classes is much lower than in original AlexNet
        
        #FC3
        self.full3 = nn.Linear(1000, 6)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.max1(x)
        x = F.relu(self.conv2(x))
        x = self.max2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.max3(x)
        x = self.drop1(x)
        x = self.flat(x)
        x = F.relu(self.full1(x))
        x = self.drop2(x)
        x = F.relu(self.full2(x))
        x = self.full3(x)
        return x

In [10]:
#create loss function and optimizer
#initialize CNN
cnn = AlexNet()
#cross entropy loss for classification
lossFunction = nn.CrossEntropyLoss()
#stochastic gradient descent to converge more quickly using mini batch samples
#momentum: adds on part of the previous step's gradient to accelerate gradient descent
#https://paperswithcode.com/method/sgd-with-momentum
optimizer = optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)

In [11]:
cnn

In [12]:
#training function
def train(cnn, lossFunction, optimizer, trainloader, valloader, max_epochs=50):
    print('training started')
    avg_vallosses = []
    avg_trainlosses = []
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(verbose=True)
    
    for epoch in range(max_epochs):
        print(f'epoch: {epoch}')

        #training phase, set to training mode so dropout layers work properly
        cnn.train()
        train_losses = []
        
        #iterate through training batches from data loader
        for (i, batch) in enumerate(trainloader, 0):
            #reset gradient of optimizer to 0 to limit gradients to each batch only
            optimizer.zero_grad()
            #split into images and labels
            images, labels = batch
            #pass batch through CNN
            output = cnn(images)
            #get loss
            loss = lossFunction(output, labels)
            #backward pass onto parameters
            loss.backward()
            #take one step
            optimizer.step()
            #add to train_losses list
            train_losses.append(loss.item())
        avg_trainloss = np.average(train_losses)
        
        #evaluate accuracy based on validation set
        cnn.eval()
        val_losses = []
        with torch.no_grad(): #do not calculate gradients, for inference
            for data in valloader:
                images, labels = data
                output = cnn(images)
                #get validation loss
                valloss = lossFunction(output, labels)
                val_losses.append(valloss.item())
        
        #check for early stopping
        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        avg_valloss = np.average(val_losses)
        avg_vallosses.append(avg_valloss)
        
        early_stopping(avg_valloss, cnn)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
        avg_trainlosses.append(avg_trainloss)
        print(f'epoch: {epoch+1}, loss: {avg_trainloss}, loss: {avg_valloss}')
        
#     load the last checkpoint with the best model
    cnn.load_state_dict(torch.load('checkpoint.pt'))

    return cnn, avg_trainlosses, avg_vallosses

In [13]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [14]:
device = get_default_device()
device

In [15]:
trainloader = DeviceDataLoader(trainloader, device)
valloader = DeviceDataLoader(valloader, device)
testloader = DeviceDataLoader(testloader, device)

In [16]:
cnn = to_device(cnn, device)

In [17]:
#run training loop
model, avg_trainlosses, avg_vallosses = \
train(cnn, lossFunction, optimizer, trainloader, valloader)

In [18]:
correct = 0
total = 0
with torch.no_grad(): #do not calculate gradients, for inference
    for data in testloader:
        images, labels = data
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1) #along each example get the max position (the second returned value)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

add training curve using cv loss then early stopping function