In [1]:
# ***************************************************************************
# --------------------------------- Imports ---------------------------------
# ***************************************************************************

import numpy as np # linear algebra

import os # file handling
import json # file handling
import pickle # file handling
import zipfile # file handling

import torch # deep learning
import torch.nn as nn # model architecture components
import torchvision # deep learning for computer vision
from torch.utils.data import Dataset # shortcuts for writing dataset

import tqdm # progress bar

import PIL # image processing

import random # for data loader

import cv2 # image processing

In [2]:
# ***************************************************************************
# --------------- Image Classification Dataset Class ------------------------
# ***************************************************************************

class PortClassificationDataset(Dataset):
    def __init__(self,root,pkl_images,pkl_labels,image_size=(128,128),max_len=None):
        '''
        dataset for Port

        arguments
            root: the path of the zip file containing the data
            pkl_images: the path of the pickled (list) version of the image filenames within the zip folder
            pkl_labels: the path of the pickled (list) version of the image classes within the zip folder

        Note that roboflow did all the transforming before we downloaded the data. If we need more transformations, we can go back and download the unedited version, then implement our own transformations.
        '''
        self.root=root
        self.image_size=image_size
        print(f'opening zipped data...')
        self.zf=zipfile.ZipFile(self.root,'r')
        # get other parameters from zipped outputs of port-dataset-for-classification
        print(f'loading labels...')
        with self.zf.open(pkl_labels,'r') as labels:
            self.labels=pickle.load(labels)
        print(f'loading images...')
        with self.zf.open(pkl_images,'r') as images:
            self.images=pickle.load(images)

        # -------------- balance classes -------------- #
        # numbers of each class
        labels0_idx=[]
        labels1_idx=[]
        labels2_idx=[]
        print(f'length of labels: {len(self.labels)}')
        for i,label in enumerate(self.labels):
            if label.item()==0:
                labels0_idx.append(i)
            if label.item()==1:
                labels1_idx.append(i)
            if label.item()==2:
                labels2_idx.append(i)
        # number of instances of the smaller class
        num_positive_labels=min(len(labels1_idx),len(labels2_idx),len(labels0_idx))
        # random sample of classes 1 and 2, entire population of class 0
        idx0=random.sample(labels0_idx,k=num_positive_labels)
        idx1=random.sample(labels1_idx,k=num_positive_labels)
        idx2=random.sample(labels2_idx,k=num_positive_labels)
        # images and labels for class 0
        labels0=[self.labels[i] for i in idx0]
        images0=[self.images[i] for i in idx0]
        # images and labels for class 1
        labels1=[self.labels[i] for i in idx1]
        images1=[self.images[i] for i in idx1]
        # images and labels for class 2
        labels2=[self.labels[i] for i in idx2]
        images2=[self.images[i] for i in idx2]
        # reconstruct data lists
        self.labels=labels0+labels1+labels2
        self.images=images0+images1+images2

        # ---------------- enforce max length constraint ------------------- #
        if max_len is not None and max_len<len(self.labels):
            subsample_idx=random.sample(range(len(self.labels)),k=max_len)
            self.labels=[self.labels[i] for i in subsample_idx]
            self.images=[self.images[i] for i in subsample_idx]

        # ---------------- transforms ------------------- #
        self.transforms=torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Resize(self.image_size),
            torchvision.transforms.RandomAffine(degrees=2.5,translate=(0.01,0.01),shear=1.5),
            torchvision.transforms.ColorJitter(brightness=0.1,contrast=0.1,saturation=0.1),     
            torchvision.transforms.CenterCrop(size=(72,72)),
            torchvision.transforms.Resize(size=(64,64))
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self,idx):
        if type(idx) is not int:
            raise ValueError(f'expected idx to be an integer, got {type(idx)}')
        with self.zf.open(self.images[idx],'r') as img:
            image=self.transforms(np.copy(np.asarray(PIL.Image.open(img))))
        label=self.labels[idx]
        return image,label

    def get_label_counts(self):
        counts={}
        for l in self.labels:
            c=counts.get(l.item(),0)
            counts[l.item()]=c+1
        return counts

train_dataset=PortClassificationDataset(
    '/kaggle/input/port-dataset-for-classification/_output_.zip',
    'train_images.pkl',
    'train_labels.pkl'
)
test_dataset=PortClassificationDataset(
    '/kaggle/input/port-dataset-for-classification/_output_.zip',
    'test_images.pkl',
    'test_labels.pkl',
    # max_len=400
)
valid_dataset=PortClassificationDataset(
    '/kaggle/input/port-dataset-for-classification/_output_.zip',
    'valid_images.pkl',
    'valid_labels.pkl',
    max_len=400
)

opening zipped data...
loading labels...
loading images...
length of labels: 131119
opening zipped data...
loading labels...
loading images...
length of labels: 1235
opening zipped data...
loading labels...
loading images...
length of labels: 2308


In [3]:
# ***************************************************************************
# ---------------------- Custom Data Loader ---------------------------------
# ***************************************************************************
# PyTorch's data loader class did not work for the original object detection task we trained for.
# Thus, this class was created as a custom dataloader, and was never replaced once we switched to classification.
# As such, the training loop is subtly different from the typical PyTorch training loop.

class PortClassificationDataLoader():
    def __init__(self,dataset,batch_size):
        self.dataset=dataset
        self.batch_size=batch_size
        self.epochs_completed=0
        self.current_batch_index_order=random.sample(range(len(self.dataset)),k=len(self.dataset))
        self.batch_index=0

    def _get_new_batch_order(self):
        self.current_batch_index_order=random.sample(range(len(self.dataset)),k=len(self.dataset))
        self.batch_index=0
        self.epochs_completed+=1

    def get_batch(self):
        if self.batch_index+self.batch_size>=len(self.dataset):
            self._get_new_batch_order()
        images=torch.stack([self.dataset[idx][0] for idx in self.current_batch_index_order[self.batch_index:self.batch_index+self.batch_size]],dim=0)
        labels=torch.stack([self.dataset[idx][1] for idx in self.current_batch_index_order[self.batch_index:self.batch_index+self.batch_size]],dim=0)
        self.batch_index+=self.batch_size
        return images,labels

    def reset(self):
        self._get_new_batch_order
        self.epochs_completed=0

    def get_percent_of_epoch_used(self):
        return self.batch_index/len(self.dataset)

In [4]:
# ***************************************************************************
# ---------------------- ResNet18 Implementation ----------------------------
# ***************************************************************************
# Inspired heavily by PyTorch's implementation, but customized for our use-case.

def conv3x3(in_planes:int,out_planes:int,stride:int=1,groups:int=1,dilation:int=1)->nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=dilation,
        groups=groups,
        bias=False,
        dilation=dilation,
    )


def conv1x1(in_planes:int,out_planes:int,stride:int=1)->nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=stride,bias=False)
    

class BottleneckBlock(nn.Module): # size halves iff we multiply channels by 2 (architecture decision, not code enforced)
    def __init__(self,in_channels,out_channels,stride):
        """
        Basic residual building block of the resnet.
        """
        super(BottleneckBlock,self).__init__()
        self.conv1=conv1x1(in_channels,in_channels)
        self.bn1=nn.BatchNorm2d(in_channels) 
        self.conv2=conv3x3(in_channels,out_channels,stride) # Use the Bottleneck approach (downsample input on 3x3 conv)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.conv3=conv1x1(out_channels,out_channels)
        self.bn3=nn.BatchNorm2d(out_channels)
        self.relu=nn.ReLU(inplace=True)
        self.stride=stride
        self.identity=lambda x: x
        # reshape residual connection to match outputs (need to downsamples)
        if stride!=1 or in_channels!=out_channels:
             self.identity = nn.Sequential(
                conv1x1(in_channels,out_channels,stride),
                nn.BatchNorm2d(out_channels)
            )


    def forward(self,x:torch.Tensor)->torch.Tensor:
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        
        out=self.conv2(out)
        out=self.bn2(out)
        out=self.relu(out)
        
        out=self.conv3(out)
        out=self.bn3(out)
        out+=self.identity(x)
        out=self.relu(out)

        return out


class ResNet18(nn.Module):
    def __init__(self,num_classes:int):
        """
        Creates a ResNet18 module with num_classes classes.
        """
        super(ResNet18,self).__init__()
        self.num_classes=num_classes
        self.in_channels=64 # update the in_channels for the next layer after we make a layer
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=self.in_channels,kernel_size=(7,7),stride=2)
        self.bn1=nn.BatchNorm2d(self.in_channels)
        self.relu=nn.ReLU(inplace=True)
        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

        self.layer1=self._make_layer(out_channels=self.in_channels,stride=1) #in_channels=64
        self.layer2=self._make_layer(out_channels=self.in_channels*2,stride=2) #in_channels=64
        self.layer3=self._make_layer(out_channels=self.in_channels*2,stride=2) #in_channels=128
        self.layer4=self._make_layer(out_channels=self.in_channels*2,stride=2) #in_channels=256

        self.avgpool=nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.fc=nn.Linear(in_features=self.in_channels,out_features=self.num_classes)

        self.softmax=nn.Softmax(dim=1)

        # initialize weights using Kaiming initialization
        self.apply(self._init_weights)

    
    def forward(self,x:torch.Tensor)->torch.Tensor:
        x=self.conv1(x)
        x=self.bn1(x)
        x=self.relu(x)
        x=self.maxpool(x)
        
        x=self.layer1(x)
        x=self.layer2(x)
        x=self.layer3(x)
        x=self.layer4(x)

        x=self.avgpool(x)
        x=torch.flatten(x,1)
        x=self.fc(x)

        return x


    def predict(self,x):
        probs=self.softmax(self.forward(x))
        return torch.argmax(probs,axis=1)
        

    def _make_layer(self,out_channels:int,stride:int)->nn.Sequential:
        """
        Makes a block layer.
        The first block has stride 1 to preserve the dimension of the input, and the second block has stride "stride" to achieve the output dimension.
        """
        layer=nn.Sequential(
            BottleneckBlock(self.in_channels,out_channels,1),
            BottleneckBlock(out_channels,out_channels,stride)
        )
        self.in_channels=out_channels
        return layer


    def _init_weights(self,m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            # Kaiming initialization (good for ReLU-based nets)
            nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
            if m.bias is not None:
                nn.init.constant_(m.bias,0.0)

In [6]:
# ***************************************************************************
# --------------------------- Training Function -----------------------------
# ***************************************************************************

def train(model,loader,val_loader,epochs,criterion,optim,device,initial_lr=0.1):
    """
    Train the model on the given loader
    
    Args:
        model (torch.nn.Module): classifier to evaluate.
        loader (PortClassificationDataLoader): data loader for the training set.
        val_loader (PortClassificationDataLoader): data loader for the validation set.
        criterion (callable): loss function.
        optim (torch.optim): optimizer class (NOT an instance of an optimizer. e.g. torch.optim.SGD or torch.optim.ADAM)
        device (torch.device or string): device to use for training and evaluation.

    Returns:
        list[float]: the losses for each batch.
    """
    learning_rate=initial_lr
    print(f'[INFO] Training model with learning rate {learning_rate}...')
    optimizer=optim(model.parameters(),lr=learning_rate)
    
    model=model.to(device)
    pbar=tqdm.notebook.tqdm(total=len(loader.dataset),desc=f'Epoch {loader.epochs_completed+1}/{epochs}')
    cur_epoch=0
    last_lr_update=0
    best_accuracy=0
    train_losses=[]
    val_losses=[]
    val_accuracies=[]

    while loader.epochs_completed<epochs:
        model.train()
        inputs,labels=loader.get_batch()
        inputs=inputs.to(device)
        labels=labels.to(device)
        
        optimizer.zero_grad()
        logits=model(inputs)
        loss=criterion(logits,labels)
    
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

        # end of epoch triggers
        if loader.epochs_completed>cur_epoch:
            val_loss,val_accuracy=evaluate(model,val_loader,criterion,device)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)
            print(f'Epoch {loader.epochs_completed} Validation Loss = {val_loss:.5f}\tValidation Accuracy = {val_accuracy:.4f}')
            cur_epoch=loader.epochs_completed
            # save weights
            torch.save(model.state_dict(),'/kaggle/working/model_weights.path')
            # udpate best weights
            if val_accuracy>best_accuracy:
                torch.save(model.state_dict(),'/kaggle/working/best_weights.pth')
                best_accuracy=val_accuracy
        # print loss every 100 batches
        if int(loader.batch_index/loader.batch_size)%250==0:
            val_loss,val_accuracy=evaluate(model,val_loader,criterion,device)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)
            print(f'\tBatch {int(loader.batch_index/loader.batch_size)} of Epoch {loader.epochs_completed+1}: Validation Loss = {val_loss:.5f}\tValidation Accuracy = {val_accuracy:.4f}')
        
        # update learning rate if loss stagnates
        if len(val_losses)>0:
            long_moving_avg_loss=np.mean(val_losses[-35:])
            short_moving_avg_loss=np.mean(val_losses[-15:])
            if loader.epochs_completed>10 and last_lr_update>100 and abs(long_moving_avg_loss-short_moving_avg_loss)<0.005:
                last_lr_update=0
                print(f'\t\tEpoch {loader.epochs_completed+1}: decreasing learning rate from {learning_rate} to {learning_rate/10}')
                learning_rate/=10
                optimizer=optim(model.parameters(),lr=learning_rate)

        # update progress bar
        pbar.n=int(loader.get_percent_of_epoch_used()*len(loader.dataset))
        pbar.set_description(f'Epoch {loader.epochs_completed+1}/{epochs}\tLoss = {loss.item():.5f}')
        pbar.update()
    # reset loader for if we train after this
    loader.reset()
    
    return train_losses,val_losses,val_accuracies


def evaluate(model,test_loader,criterion,device):
    """
    Evaluate the classifier on the test set.

    Args:
        model: classifier to evaluate.
        test_loader (torch.utils.data.DataLoader): Data loader for the test set.
        criterion (callable): Loss function to use for evaluation.
        device (torch.device): Device to use for evaluation.

    Returns:
        float: Average loss on the test set.
        float: Accuracy on the test set.
    """
    model=model.to(device)
    model.eval()
    with torch.no_grad():
        total_loss = 0.0
        num_correct = 0
        num_samples = 0

        while test_loader.epochs_completed<1:
            inputs,labels=test_loader.get_batch()
            inputs=inputs.to(device)
            labels=labels.to(device)
            # compute the logits and loss
            logits=model(inputs)
            total_loss+=criterion(logits,labels).item()
            # compute the accuracy
            _,predictions=torch.max(logits,dim=1)
            num_correct+=(predictions==labels).sum().item()
            num_samples+=len(inputs)

    # reset the validation loader
    test_loader.reset()
    # print(f'\t\tnumber of samples examined during validation: {num_samples}')
    # compute the average loss and accuracy
    avg_loss=total_loss/len(test_loader.dataset)
    accuracy=num_correct/num_samples

    return avg_loss,accuracy

In [7]:
# ***************************************************************************
# ----------------------------- Run Training --------------------------------
# ***************************************************************************

# training variables
loader=PortClassificationDataLoader(train_dataset,256)
val_loader=PortClassificationDataLoader(valid_dataset,50)
model=ResNet18(3)
state_dict = torch.load('/kaggle/input/port-classifier/best_weights.pth')
model.load_state_dict(state_dict)
criterion=torch.nn.CrossEntropyLoss()
optim=torch.optim.Adam
device='cuda'

# training periods (coarsely train, then fine tune with smaller learning rate)
# train(model,loader,val_loader,9,criterion,optim,device,initial_lr=0.1)
# train(model,loader,val_loader,7,criterion,optim,device,initial_lr=0.01)
# train(model,loader,val_loader,7,criterion,optim,device,initial_lr=0.001)

In [8]:
# ***************************************************************************
# ------------------------------ Test Model ---------------------------------
# ***************************************************************************

test_loader=PortClassificationDataLoader(test_dataset,32)
criterion=torch.nn.CrossEntropyLoss()
avg_loss,avg_accuracy=evaluate(model,test_loader,criterion,device)
print(f'average test-set accuracy: {avg_accuracy}')

In [10]:
# ***************************************************************************
# -------------------------- Further Evaluation -----------------------------
# ***************************************************************************

def confusion_matrix(model,dataset,device):
    model.eval()
    model.to(device)
    # ground-truth labels
    gt=[l.item() for l in dataset.labels]

    # predicted labels (run entire dataset as one batch)
    batch=torch.stack([d[0] for d in dataset],dim=0).to(device)
    preds=[p.item() for p in model.predict(batch)]

    # class 0 specific statistics
    FN0=[] # incorrectly say a 0 label is 1 or 2
    FP0=[] # incorrectly say a 1 or 2 label is 0
    TP0=[] # correctly identify a label 0
    
    # class 1 specific statistics
    FN1=[] # incorrectly say a 1 label is 0 or 2
    FP1=[] # incorrectly say a 0 or 2 label is 1
    TP1=[] # correctly identify a label 1

    # class 2 specific statistics
    FN2=[] # incorrectly say a 2 label is 0 or 1
    FP2=[] # incorrectly say a 0 or a 1 label is 2
    TP2=[] # correctly identify a label 2

    # general statistics
    FN=[] # incorrectly say a 1 or 2 label is 0
    FP=[] # incorrectly say a 0 label is 1 or 2
    TN=[] # correctly identify 0
    TP=[] # correctly identify 1 or 2
    
    for i in range(len(preds)):
        # ---------- class 0 stats ---------- #
        if preds[i]!=0 and gt[i]==0:
            FN0.append(i)
        if preds[i]==0 and gt[i]!=0:
            FP0.append(i)
        if preds[i]==gt[i] and gt[i]==0:
            TP0.append(i)
        # ---------- class 1 stats ---------- 3
        if preds[i]!=1 and gt[i]==1:
            FN1.append(i)
        if preds[i]==1 and gt[i]!=1:
            FP1.append(i)
        if preds[i]==gt[i] and gt[i]==1:
            TP1.append(i)
        # ---------- class 2 stats ---------- #
        if preds[i]!=2 and gt[i]==2:
            FN2.append(i)
        if preds[i]==2 and gt[i]!=2:
            FP2.append(i)
        if preds[i]==gt[i] and gt[i]==2:
            TP2.append(i)
        # ---------- general stats ---------- #
        # all false negatives
        if preds[i]==0 and gt[i]!=0:
            FN.append(i)
        # all false positives
        if preds[i]!=0 and gt[i]==0:
            FP.append(i)
        # all true negatives
        if preds[i]==0 and gt[i]==0:
            TN.append(i)
        # all true positives (1 or 2)
        if (preds[i]== 1 and gt[i]==1) or (preds[i]==2 and gt[i]==2):
            TP.append(i)

    return {
        0:{'FN':FN0,'FP':FP0,'TP':TP0},
        1:{'FN':FN1,'FP':FP1,'TP':TP1},
        2:{'FN':FN2,'FP':FP2,'TP':TP2},
        -1:{'FN':FN,'FP':FP,'TN':TN,'TP':TP}
    }

stats=confusion_matrix(model,test_dataset,'cuda')

In [None]:
def analyze_confusion_matrix(m,dataset):
    print(f'Analzing confusion matrix.See the function confusion_matrix() for explanations.')
    class_labels=['background','empty','connected']
    for c in [0,1,2]:
        stats=m[c]
        print(f'Class number: {c}, Class label: {class_labels[c]}')
        print(f'\tNumber of :\t{len(stats["FN"])},\t% of total:\t{100*len(stats["FN"])/len(dataset):.3f}%')
        print(f'\tNumber of FP:\t{len(stats["FP"])},\t% of total:\t{100*len(stats["FP"])/len(dataset):.3f}%')
        print(f'\tNumber of TP:\t{len(stats["TP"])},\t% of total:\t{100*len(stats["TP"])/len(dataset):.3f}%')
        
    stats=m[-1]
    print('General statistics:')
    print(f'\tNumber of FN:\t{len(stats["FN"])},\t% of total:\t{100*len(stats["FN"])/len(dataset):.3f}%')
    print(f'\tNumber of FP:\t{len(stats["FP"])},\t% of total:\t{100*len(stats["FP"])/len(dataset):.3f}%')
    print(f'\tNumber of TN:\t{len(stats["TN"])},\t% of total:\t{100*len(stats["TN"])/len(dataset):.3f}%')
    print(f'\tNumber of TP:\t{len(stats["TP"])},\t% of total:\t{100*len(stats["TP"])/len(dataset):.3f}%')

analyze_confusion_matrix(stats,test_dataset)