In [1]:
import torch 
import torch.nn as nn 
import torch.optim as optim 

from PIL import Image, ImageFile 
ImageFile.LOAD_TRUNCATED_IMAGES = True

import cv2 

import os 
import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt 
import matplotlib.patches as patches 

from tqdm import tqdm


In [2]:
# Defining a function to calculate Intersection over Union (IoU) 
def iou(box1, box2, is_pred=True): 
	if is_pred: 
		# IoU score for prediction and label 
		# box1 (prediction) and box2 (label) are both in [x, y, width, height] format 
		
		# Box coordinates of prediction 
		b1_x1 = box1[..., 0:1] - box1[..., 2:3] / 2
		b1_y1 = box1[..., 1:2] - box1[..., 3:4] / 2
		b1_x2 = box1[..., 0:1] + box1[..., 2:3] / 2
		b1_y2 = box1[..., 1:2] + box1[..., 3:4] / 2

		# Box coordinates of ground truth 
		b2_x1 = box2[..., 0:1] - box2[..., 2:3] / 2
		b2_y1 = box2[..., 1:2] - box2[..., 3:4] / 2
		b2_x2 = box2[..., 0:1] + box2[..., 2:3] / 2
		b2_y2 = box2[..., 1:2] + box2[..., 3:4] / 2

		# Get the coordinates of the intersection rectangle 
		x1 = torch.max(b1_x1, b2_x1) 
		y1 = torch.max(b1_y1, b2_y1) 
		x2 = torch.min(b1_x2, b2_x2) 
		y2 = torch.min(b1_y2, b2_y2) 
		# Make sure the intersection is at least 0 
		intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) 

		# Calculate the union area 
		box1_area = abs((b1_x2 - b1_x1) * (b1_y2 - b1_y1)) 
		box2_area = abs((b2_x2 - b2_x1) * (b2_y2 - b2_y1)) 
		union = box1_area + box2_area - intersection 

		# Calculate the IoU score 
		epsilon = 1e-6
		iou_score = intersection / (union + epsilon) 

		# Return IoU score 
		return iou_score 
	
	else: 
		# IoU score based on width and height of bounding boxes 
		
		# Calculate intersection area 
		intersection_area = torch.min(box1[..., 0], box2[..., 0]) * torch.min(box1[..., 1], box2[..., 1]) 

		# Calculate union area 
		box1_area = box1[..., 0] * box1[..., 1] 
		box2_area = box2[..., 0] * box2[..., 1] 
		union_area = box1_area + box2_area - intersection_area 

		# Calculate IoU score 
		iou_score = intersection_area / union_area 

		# Return IoU score 
		return iou_score

In [64]:
# Non-maximum suppression function to remove overlapping bounding boxes 
def nms(bboxes, iou_threshold, threshold): 
	# Filter out bounding boxes with confidence below the threshold. 
	bboxes = [box for box in bboxes if box[0] > threshold]

	# Sort the bounding boxes by confidence in descending order. 
	bboxes = sorted(bboxes, key=lambda x: x[0], reverse=True) 

	# Initialize the list of bounding boxes after non-maximum suppression. 
	bboxes_nms = [] 

	while bboxes: 
		# Get the first bounding box. 
		first_box = bboxes.pop(0) 

		# Iterate over the remaining bounding boxes. 
		for box in bboxes: 
		# If the bounding boxes do not overlap or if the first bounding box has 
		# a higher confidence, then add the second bounding box to the list of 
		# bounding boxes after non-maximum suppression. 
			print(box, first_box)
			if box[0] != first_box[0] or iou( 
				torch.tensor(first_box[2:]), 
				torch.tensor(box[2:]), 
			) < iou_threshold: 
				# Check if box is not in bboxes_nms 
				if box not in bboxes_nms: 
					# Add box to bboxes_nms 
					bboxes_nms.append(box) 

	# Return bounding boxes after non-maximum suppression. 
	return bboxes_nms

In [51]:
# Function to convert cells to bounding boxes 
def convert_cells_to_bboxes(predictions, anchors, s, is_predictions=True): 
	# Batch size used on predictions 
	batch_size = predictions.shape[0] 
	# Number of anchors 
	num_anchors = len(anchors) 
	# List of all the predictions 
	box_predictions = predictions[..., 1:] 

	# If the input is predictions then we will pass the x and y coordinate 
	# through sigmoid function and width and height to exponent function and 
	# calculate the score and best class. 
	if is_predictions: 
		anchors = anchors.reshape(1, len(anchors), 1, 1, 2) 
		box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2]) 
		box_predictions[..., 2:] = torch.exp( 
			box_predictions[..., 2:]) * anchors 
		scores = torch.sigmoid(predictions[..., 0:1])
	
	# Else we will just calculate scores and best class. 
	else: 
		scores = predictions[..., 0:1]

	# Calculate cell indices 
	cell_indices = ( 
		torch.arange(s) 
		.repeat(predictions.shape[0], 3, s, 1) 
		.unsqueeze(-1) 
		.to(predictions.device) 
	) 
 

	# Calculate x, y, width and height with proper scaling 
	x = 1 / s * (box_predictions[..., 0:1] + cell_indices) 
	y = 1 / s * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4)) 
	width_height = 1 / s * box_predictions[..., 2:4] 

	# Concatinating the values and reshaping them in 
	# (BATCH_SIZE, num_anchors * S * S, 6) shape
	converted_bboxes = torch.cat( 
		(scores, x, y, width_height), dim=-1
	).reshape(batch_size, num_anchors * s * s, 5) 

	# Returning the reshaped and converted bounding box list 
	return converted_bboxes.tolist()

In [93]:
# Function to plot images with bounding boxes and class labels 
def plot_image(image, boxes): 

	# Reading the image with OpenCV 
	img = np.array(image) 
	# Getting the height and width of the image 
	h, w = img.shape 

	# Create figure and axes 
	fig, ax = plt.subplots(1) 

	# Add image to plot 
	ax.imshow(img, cmap='gray')

	# Plotting the bounding boxes and labels over the image 
	for box in boxes:
		# Get the center x and y coordinates 
		box = box[1:] 
		# Get the upper left corner coordinates 
		upper_left_x = box[0] - box[2] / 2
		upper_left_y = box[1] - box[3] / 2

		# Create a Rectangle patch with the bounding box 
		rect = patches.Rectangle( 
			(upper_left_x * w, upper_left_y * h), 
			box[2] * w, 
			box[3] * h, 
			linewidth=2, 
			edgecolor='red', 
			facecolor="none", 
		) 
		
		# Add the patch to the Axes 
		ax.add_patch(rect)

	# Display the plot 
	plt.show()

# Clustering

In [53]:
box_dims = []
with open('../data/AR-MOT/labels.csv', 'r', encoding='utf-8') as label_file:
    label_file.readline()
    for line in label_file.readlines():
        _, x, y, width, height = line.split(',')
        box_dims.append([float(width), float(height)])
box_dims = np.array(box_dims)
box_dims.shape

(4193, 2)

In [54]:
def one_vs_all_iou(anch_box, boxes):
    x = np.minimum(anch_box[0], boxes[:, 0])
    y = np.minimum(anch_box[1], boxes[:, 1])
    intersection = x * y
    anch_box_area = anch_box[0] * anch_box[1]
    boxes_area = boxes[:, 0] * boxes[:, 1]
    union = anch_box_area + boxes_area - intersection
    return intersection / union

In [55]:

def distance(point, points):
    return 1 - one_vs_all_iou(point, points)

In [56]:
def kmeans(samples, n_clusters, distance_func):
    n_samples = samples.shape[0]
    distances = np.empty((n_samples, n_clusters))
    last_clusters = np.zeros((n_samples))
    nearest_clusters = np.full((n_samples), -1)

    clusters = samples[np.random.choice(n_samples, n_clusters, replace=False)]

    while not (last_clusters == nearest_clusters).all():
        last_clusters = nearest_clusters
        for i in range(n_clusters):
            distances[:, i] = distance_func(clusters[i], samples)
        nearest_clusters = np.argmin(distances, axis=1)
        for i in range(n_clusters):
            clusters[i] = np.mean(samples[nearest_clusters == i], axis=0)

    return clusters, nearest_clusters, distances

In [57]:
clusters, nearest_clusters, distances = kmeans(box_dims, 9, distance_func=distance)
intercluster_mean_distance = np.mean(distances[np.arange(distances.shape[0]), nearest_clusters])

In [58]:
anchors = sorted(clusters, key=lambda x: x[0]*x[1])
anchors = np.array(anchors).reshape(3, 3, 2)

In [59]:
anchors

array([[[ 89.75458716, 106.92431193],
        [174.64      , 158.47703704],
        [289.96757458, 187.5693904 ]],

       [[221.38817481, 271.33161954],
        [358.9468599 , 263.64251208],
        [500.21971253, 321.72689938]],

       [[372.8490566 , 432.9509434 ],
        [639.91686461, 471.93349169],
        [961.578125  , 637.625     ]]])

In [65]:
# Create a dataset class to load the images and labels from the folder 
class Dataset(torch.utils.data.Dataset): 
	def __init__( 
		self, image_dir, labels_path, anchors, 
		image_size=416, grid_sizes=[13, 26, 52], original_image_size=4096
	):
		labels = []
		with open(labels_path, 'r', encoding='utf-8') as label_file:
			label_file.readline()
			label = []
			ix_prev = 1
			for line in label_file.readlines():
				ix, x1, y1, w, h = [float(a) / original_image_size for a in line.split(',')]
				if ix != ix_prev:
					labels.append(label)
					label = [[x1, y1, w, h]]
					ix_prev = ix
				else:
					label.append([x1, y1, w, h])
		self.labels = labels #np.array(labels, dtype=np.float32)
  
		# Image and label directories 
		self.image_dir = image_dir
		# Image size 
		self.image_size = image_size
		# Grid sizes for each scale 
		self.grid_sizes = grid_sizes 
		# Anchor boxes 
		self.anchors = anchors.reshape(-1, 2) / original_image_size
		# Number of anchor boxes 
		self.num_anchors = self.anchors.shape[0] 
		# Number of anchor boxes per scale 
		self.num_anchors_per_scale = self.num_anchors // 3
		# Ignore IoU threshold 
		self.ignore_iou_thresh = 0.5

	def __len__(self): 
		return len(self.label_list) 
	
	def __getitem__(self, idx):
		img_path = os.path.join(self.image_dir, str(idx+1).zfill(6) + '.jpg') 
		image = np.array(Image.open(img_path))
		targets = [torch.zeros((self.num_anchors_per_scale, s, s, 5)) 
				for s in self.grid_sizes]
		bboxes = self.labels[idx]
		for box in bboxes:
			iou_anchors = one_vs_all_iou(torch.tensor(box[2:4]), self.anchors)
			anchor_indices = iou_anchors.argsort(descending=True, dim=0) 
			x, y, width, height = box
			has_anchor = [False] * 3
			for anchor_idx in anchor_indices: 
				scale_idx = anchor_idx // self.num_anchors_per_scale 
				anchor_on_scale = anchor_idx % self.num_anchors_per_scale
				s = self.grid_sizes[scale_idx]
				i, j = int(s * y), int(s * x) 
				anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
				if not anchor_taken and not has_anchor[scale_idx]:
					targets[scale_idx][anchor_on_scale, i, j, 0] = 1
					x_cell, y_cell = s * x - j, s * y - i
					width_cell, height_cell = (width * s, height * s)
					box_coordinates = torch.tensor( 
										[x_cell, y_cell, width_cell, 
										height_cell] 
									)
					targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
					has_anchor[scale_idx] = True
				elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
					targets[scale_idx][anchor_on_scale, i, j, 0] = -1
		return image, tuple(targets)

In [66]:
dataset = Dataset('../data/AR-MOT/images', '../data/AR-MOT/labels.csv', anchors)

In [67]:
anchors = anchors / 4096

In [94]:
x, y = dataset[0]
boxes = [] 
for i in range(3): 
    anchor = anchors[i] 
    yy = torch.unsqueeze(y[i], 0)
    boxes += convert_cells_to_bboxes( 
               yy, is_predictions=False, s=y[i].shape[2], anchors=anchor 
             )[0] 
print(len(boxes))
# Applying non-maximum suppression 
# boxes = nms(boxes, iou_threshold=1, threshold=0.7) 

10647


In [None]:
plot_image(x, boxes * 4096)

KeyboardInterrupt: 