## Iterate through training images and corresponding bbox files to create list of images where the target person overlaps with another person in the image.

In [1]:
import os
import json
import pickle
import cv2
import numpy as np
import torch
from itertools import combinations

from PIL import Image
from IPython.display import display

IMAGE_COUNT = 0
TRAIN = 0
OVERLAP_TRAIN_IMGS = []


In [8]:
## Load Training Dataset
with open('train_images.pkl', 'rb') as f:
    train_images = pickle.load(f)

In [9]:
def process_image_directory(directory_input, iou_max=0.1):
    global IMAGE_COUNT
    global TRAIN
    
    images = os.listdir('images/'+directory_input)
    if '.DS_Store' in images:
        images.remove('.DS_Store')
#     print(images)
    IMAGE_COUNT += len(images)

    for image_path in images:
        ## Find only train images
        if image_path in train_images:
            TRAIN += 1
        
            ## Get Filename
            filename = image_path[:-16]

            ## Read Image
            img = cv2.imread('images/'+directory_input+'/'+image_path)
            assert img.shape == (1024, 2048, 3)

            ## Display Image
#             img_display = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Converting BGR to RGB for display
#             display(Image.fromarray(img_display))


            ## Get Bounding Box Annotations
            bbox_list = get_bbox_annotations(directory_input, filename)
#             print(bbox_list)
            
            ## Find overlapping bbox annotations
            overlap_list = find_overlap_single_image(bbox_list, iou_max)
            
            ## Get Filenames for images with overlapping bboxes
            overlap_imgs = get_overlap_img_filenames(overlap_list, filename)
            OVERLAP_TRAIN_IMGS.extend(overlap_imgs)
            
    return


In [10]:
## Get Bounding Box Annotations
def get_bbox_annotations(directory_input, filename):
    bbox_file = 'bboxes/'+directory_input+'/'+filename+'_gtBboxCityPersons.json'
#     print()
    bbox_list = []
    with open(bbox_file) as f:
        data = json.load(f)
        for obj in data['objects']:
            if obj['label'] == 'pedestrian':
#                 print(obj)
                bbox_list.append(obj['bboxVis'])
                
    return bbox_list


def find_overlap_single_image(bbox_list, iou_max=0.1):
    skip = [] # list of ids for overlapping people
    bbox_list_indices = list(range(len(bbox_list)))
    bbox_list_combinations = list(combinations(bbox_list_indices, 2))

    for pair in bbox_list_combinations:
#         print(pair)
        if pair[0] not in skip or pair[1] not in skip:
            boxA = wh_to_xy(bbox_list[pair[0]])
            boxB = wh_to_xy(bbox_list[pair[1]])
            iou = calc_iou(boxA, boxB)

            if iou >= iou_max:
                skip.append(pair[0])
                skip.append(pair[1])
#                 print('skip list updated:', skip)

    return sorted(list(set(skip)))



def wh_to_xy(bbox):
    return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
    
    
    
def calc_iou(boxA, boxB):
    """citation: https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/"""
    # determine the (x, y)-coordinates of the intersection rectangle
    x0 = max(boxA[0], boxB[0])
    y0 = max(boxA[1], boxB[1])
    x1 = min(boxA[2], boxB[2])
    y1 = min(boxA[3], boxB[3])
    
    # compute the area of intersection rectangle
    interArea = max(0, x1 - x0 + 1) * max(0, y1 - y0 + 1)
    
    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    
    # return the intersection over union value
    return iou


def get_overlap_img_filenames(overlap_list, filename):
    overlap_imgs = []
    for person_id in overlap_list:
        ## Create filename for skipped cropped img
        img_path = filename+'_'+str(person_id)
        overlap_imgs.append(img_path)
    return overlap_imgs
    

In [11]:
# bbox_list = process_image_directory('demo', 0.01)

# OVERLAP_TRAIN_IMGS

## Run all images

In [12]:
IMAGE_COUNT = 0
TRAIN = 0
OVERLAP_TRAIN_IMGS = []

ignore_directories = ['.DS_Store', 'demo', 'test']
# ignore_directories = ['.DS_Store']

## Process all images in 'images' directory unless subfolder is included in 'ignore_directories' list         
for directory in os.listdir('images/'):
    if directory not in ignore_directories:
        print()
        print(directory)
        process_image_directory(directory, 0.1) #0.1 original
        
print()
print('------ COMPLETE ------')
print('Number of overlapping images:', len(OVERLAP_TRAIN_IMGS))
print()
print('IMAGE_COUNT', IMAGE_COUNT)
print('TRAIN', TRAIN)


zurich

strasbourg

weimar

munster

aachen

tubingen

jena

bochum

darmstadt

dusseldorf

hamburg

cologne

lindau

monchengladbach

frankfurt

krefeld

ulm

hanover

stuttgart

erfurt

bremen

------ COMPLETE ------
Number of overlapping images: 5039

IMAGE_COUNT 3475
TRAIN 2475


### Save list of filenames of overlapping train images

In [13]:
with open('overlap_train_images.pkl', 'wb') as f:
    pickle.dump(OVERLAP_TRAIN_IMGS, f)

In [8]:
with open('overlap_train_images.pkl', 'rb') as f:
    saved_overlap_images = pickle.load(f)

In [9]:
saved_overlap_images

['aachen_000065_000019_4', 'aachen_000065_000019_5']

---
### Demo Example output

['demo_train/aachen_000065_000019_4', 'demo_train/aachen_000065_000019_5']
(4, 5)
boxA [1211, 406, 1241, 484]
boxB [1228, 405, 1262, 498]