In [None]:
# ***************************************************************************
# --------------------------------- Imports ---------------------------------
# ***************************************************************************

import numpy as np # linear algebra

import os # file handling
import json # file handling
import pickle # file handling

import torch # deep learning
import torchvision # deep learning for computer vision
from torch.utils.data import Dataset, DataLoader # shortcuts for writing dataset objects

import matplotlib.pyplot as plt # image saving

In [None]:
# ***************************************************************************
# ----------------- --Object Detection Dataset Class ------------------------
# ***************************************************************************

class PortDataset(Dataset):
    def __init__(self,root,pkl_images,pkl_targets):
        '''
        dataset for Port object detection (the original data)
    
         Args:
            root (str): the root path of the folder where the images live
            pkl_images (str): the path of the pickled (list) version of the image filenames
            pkl_labels (str): the path of the pickled (list) version of the image classes
    
        Note that roboflow did all the transforming before we downloaded the data. If we need more transformations, we can go back and download the unedited version, then implement our own transformations.
        '''
        self.root=root
        self.filenames=pkl_images
        self.targets=pkl_targets
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self,idx):
        if type(idx) is not int:
            raise ValueError(f'expected idx to be an integer, got {type(idx)}')
        # Image tensor
        image=torchvision.io.read_image(os.path.join(self.root,self.filenames[idx])).to(torch.float32)
        # Targets tensor
        boxes=torch.tensor(self.targets[idx]['boxes'])
        labels=torch.tensor(self.targets[idx]['labels'])
        targets={'boxes':boxes,'labels':labels}      
        
        return image,targets

# ***************************************************************************
# ------------------ Image Classification Dataset Class ---------------------
# ***************************************************************************

class PortClassificationDataset(Dataset):
    def __init__(self,root,pkl_images,pkl_s):
        '''
        dataset for Port classification (sub-images of oroginal PortDataset).
        
        Args:
            root (str): the root path of the folder where the images live
            pkl_images (str): the path of the pickled (list) version of the image filenames
            pkl_labels (str): the path of the pickled (list) version of the image classes
        
        Returns:
            torch.utils.data.Dataset: A dataset of the images and labels specified by pkl_images and pkl_labels.
        
        Note that roboflow did some transforming before we downloaded the data. If we need more transformations, we can implement our own through a data loader.
        '''
        self.root=root
        self.images=pkl_images
        self.labels=pkl_labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self,idx):
        if type(idx) is not int:
            raise ValueError(f'expected idx to be an integer, got {type(idx)}')
        # Image tensor
        image=torchvision.io.read_image(os.path.join(self.root,self.images[idx])).to(torch.float32)
        # Labels tensor
        label=torch.tensor(self.labels[idx]['labels'])   
        
        return image,label

In [None]:
# ***************************************************************************
# ---------- Turn COCO json Data into Usable Format (Lists) -----------------
# ***************************************************************************
# for each image we make a dictionary:
        # boxes: (N,4) Tensor row=(x,y,width,height) (note the COCO data is in (x1,y1,x2,y1) format)
        # labels: (N) Tensor element=class label

def pickle_data(subset,root):
    '''
    turns the data from port-by-ds-on-robolab into lists of filepaths, boxes, and labels which can be used by pytorch.
    '''
    if subset not in ['test','train','valid']:
        raise ValueError(f'please enter the string "test", "valid", or "train". Received {subset}')
    
    # Read in COCO json data
    fpath=f'{root}/{subset}/_annotations.coco.json'
    with open(fpath,encoding='utf-8') as f:
        data=json.load(f)
        
    # Make images: a list of the image pathways
    images=[None]*len(data['images'])
    
    # Populate images
    for image in data['images']:
        idx=image['id']
        images[idx]=os.path.join(root,subset,image['file_name'])
        
    # Make targets: a list of distinct dictionaries
    targets=[None]*len(data['images'])
    for i in range(len(targets)):
        targets[i]={'boxes':[],'labels':[]}
    
    # Populate targets
    for note in data['annotations']:
        # Get image index
        image_idx=note['image_id']
        # change bounding box representation from (x,y,w,h) (upper-left and size) to (x1,y1,x2,y2) (upper-left and bottom-right)
        x,y,w,h=note['bbox'].copy()
        bbox=[x,y,x+w,y+h]
        if bbox[0]>=bbox[2] or bbox[1]>=bbox[3]:
            raise Exception(f'expected x1,y1 to be less than x2,y2 respectively. Got box {bbox}')
        # Add box to d
        targets[image_idx]['boxes'].append(bbox)
        # Add label to d
        targets[image_idx]['labels'].append(note['category_id'])
    
    # Check that boxes and labels are in bijective correspondence
    total_boxes=0
    for i in range(0,len(targets)):
        assert(len(targets[i]['boxes'])==len(targets[i]['labels']))
        total_boxes+=len(targets[i]['boxes'])

    # Remove images with no labeled boxes
    i=0
    while i<len(images):
        if len(targets[i]['boxes'])==0:
            targets.pop(i)
            images.pop(i)
        else:
            i+=1
    
    # Save data
    with open(f'/kaggle/working/{subset}_images.pkl','wb') as f:
        pickle.dump(images, f)
    with open(f'/kaggle/working/{subset}_targets.pkl','wb') as f:
        pickle.dump(targets, f)

# generate pickled data
pickle_data('test','/kaggle/input/port-by-ds-on-robolab')
pickle_data('train','/kaggle/input/port-by-ds-on-robolab')
pickle_data('valid','/kaggle/input/port-by-ds-on-robolab')

In [None]:
# ***************************************************************************
# --------- Generate Classification Data from Object Detection Data ---------
# ***************************************************************************
# The data are stored in '/kaggle/working/classifier_{subset}/', where {subset} is train, test, or valid.
# Both the positive and negative samples are stored in that directory.
# Ordered lists of the image pathways (in the zipped folder) and their labels are also stored in the working directory for use in the dataset class.

def generate_samples(subset):
    """
    Given the port images from port-by-ds-on-robolab intended for object detection, generates a classification dataset.
    Extracts the positive images specified by the bounding boxes, and generates background images by random sampling of 
    the original image, checking each time that there is minimal overlap with existing boxes.
    """
    ##############################################################################
    # Background images (regions of images that do not overlap with bounding boxes)
    
    print(f'generating negative {subset} images...')
    
    train_images=pickle.load(open(f'/kaggle/working/{subset}_images.pkl','rb'))
    train_targets=pickle.load(open(f'/kaggle/working/{subset}_targets.pkl','rb'))
    root=f'/kaggle/input/port-by-ds-on-robolab/{subset}'
    
    w=132
    h=132
    overlap_threshold=0.05
    max_attempts=60
    
    # make sample container
    negative_samples=[]
    
    # make image folder
    directory_name=f'/kaggle/working/classifier_{subset}'
    image_list_path=f'classifier_{subset}'
    try:
        os.mkdir(directory_name)
        print(f"\t\tDirectory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"\t\tDirectory '{directory_name}' already exists.")
    
    for i,path in enumerate(train_images):
        for attempt in range(max_attempts):
            # read in image
            image=torchvision.io.read_image(os.path.join(root,path)).to(torch.float32)
            n,m=image.shape[1:] # [H,W] (rows,cols)
        
            # pick box in range
            np.random.seed(i*(attempt+1))
            x1=np.random.randint(low=0,high=m-w)
            y1=np.random.randint(low=0,high=n-h)
            x2,y2=x1+w,y1+h
        
            # check for collisions
            valid_box=True
            for bbox in train_targets[i]['boxes']:
                x3,y3,x4,y4=bbox
                if x1<=x4 and y1<=y4 and x2>=x3 and y2>=y3: # overlapping boxes
                    shared_area=(min(x2,x4)-max(x1,x3))*(min(y2,y4)-max(y1,y3))
                    overlap=shared_area/(w*h)
                    if max([shared_area/(w*h),shared_area/((x4-x3)*(y4-y3))])>=overlap_threshold: # one of the boxes is too covered
                        valid_box=False
                        break
                
            # save image
            name=os.path.join(image_list_path,f'negative_train_{i}_attempt_{attempt}_dims_{h}x{w}.jpg')
            if valid_box:
                plt.imsave(name,image[:,y1:y2,x1:x2].permute((1,2,0)).detach().numpy().astype(np.uint8))
                negative_samples.append(name)

    neg_images=negative_samples
    neg_labels=[torch.tensor(0) for f in neg_images]

    #############################################################################
    # Port images (extract sub-images defined by bounding boxes of original data)

    print(f'generating positive {subset} images')
    
    dataset=PortDataset(f'/kaggle/input/port-by-ds-on-robolab/{subset}',
                      pickle.load(open(f'/kaggle/working/{subset}_images.pkl','rb')),
                      pickle.load(open(f'/kaggle/working/{subset}_targets.pkl','rb'))
                     )
    
    # make image folder
    directory_name=f'/kaggle/working/classifier_{subset}'
    image_list_path=f'classifier_{subset}'
    try:
        os.mkdir(directory_name)
        print(f"\t\tDirectory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"\t\tDirectory '{directory_name}' already exists.")
    
    pos_images=[]
    pos_labels=[]
    for i,(image,targets) in enumerate(dataset):
        for j,box in enumerate(targets['boxes']):
            x1,y1,x2,y2=[int(box[idx].item()) for idx in range(0,len(box))]
            if (x2-x1)*(y2-y1)<=4096: # image too small
                continue
            name=os.path.join(image_list_path,f'image_{i}_box_{j}.jpg')
            pos_images.append(name)
            pos_labels.append(targets['labels'][j])

            ########################################################################################################################
            # Failed method: tried expanding the iamges by a multiplicative factor to help the model learn the surroundings of ports.
            # Did not improve performance on the object detection task.
            
            # # add a little extra area to the sides:
            # scale=1.35
            # h,w=image.shape[1:]
            # # get expaned range (within image)
            # x_min=int(max(0,x1-(scale-1)*(x2-x1)/2))
            # x_max=int(min(w-1,x2+(scale-1)*(x2-x1)/2))
            # y_min=int(max(0,y1-(scale-1)*(y2-y1)/2))
            # y_max=int(min(h-1,y2+(scale-1)*(y2-y1)/2))
            # sub_image=image[:,y_min:y_max,x_min:x_max].permute((1,2,0)).detach().numpy().astype(np.uint8)

            #########################################################################################################################################################
            # Failed method: tried shrinking the images by a multiplicative factor to reduce the number of false positive port identifications during classification.
            # Despite reducing false positives in the classification test, the model behaved strangely during object detection; only identifying background.
            # Probably need to investigate this more.

            # remove area from the size (take the center 25% of image)
            scale=0.5
            x_remove=scale*(x2-x1)/2
            y_remove=scale*(y2-y1)/2
            x_min=int(x1+x_remove)
            x_max=int(x2-x_remove)
            y_min=int(y1+y_remove)
            y_max=int(y2-y_remove)

            # get sub-images
            sub_image=image[:,y_min:y_max,x_min:x_max].permute((1,2,0)).detach().numpy().astype(np.uint8)

            #########################################################################################
            # Failed mathod: just extracting the boxes with no modifications.
            # Did not produce usable results when running the image classifier through the sliding window.
            # High

            # sub_image=image[:,y1:y2,x1:x2].permute((1,2,0)).detach().numpy().astype(np.uint8)
            
            plt.imsave(name,sub_image)

    # ***************************************************************************
    # --------------------------- Save List of Data -----------------------------
    # ***************************************************************************
    
    images=neg_images+pos_images
    labels=neg_labels+pos_labels
    with open(f'/kaggle/working/{subset}_images.pkl','wb') as f:
        pickle.dump(images, f)
    with open(f'/kaggle/working/{subset}_labels.pkl','wb') as f:
        pickle.dump(labels, f)


generate_samples('test')
generate_samples('train')
generate_samples('valid')