In [7]:
import cv2
import numpy as np
from pathlib import Path
import json

In [8]:
def read_json(path_file):
    with open(path_file) as file_ptr:
        data = json.load(file_ptr)
    return data
def write_json(path_file, data):
    with open(path_file, "w") as file_ptr:
        json.dump(data, file_ptr, indent=4)

In [11]:
class ImagePatcher():
    '''
    Assumes that the input images to the patcher will be of same size
    '''
    def __init__(self, patch_sz, overlap):
        self.window_sz = patch_sz
        self.overlap = overlap
    
    def calculate_padding_for_same_pad(self, input_sz, kernel_sz, stride):
        '''
        Calculate required padding such that kernel convolves on the
        entire input
        '''
        output_sz = np.ceil(input_sz / stride)
        total_pad = (output_sz - 1) * stride - input_sz + kernel_sz
        
        low_priority_pad = total_pad // 2
        high_priority_pad = total_pad - low_priority_pad
        return low_priority_pad, high_priority_pad
    
    def get_num_windows_along_axis(self, axis_len, window_len, overlap):
        '''
        Params:
        ------
        + axis_len - number of pixels along an axis of image
        + window_len - number of pixels along an axis of window
        + overlap - overlap between two windows in percentage
        Returns:
        -------
        + number of windows along the axis
        + stride - number of pixels for the jump
        + number of pad with low priority (left pad along width, top pad along height)
        + number of pad with high priority (right pad along width, bottom pad along height)
        '''
        stride = np.floor(window_len * (100 - overlap) * 0.01)
        low_priority_pad, high_priority_pad = self.calculate_padding_for_same_pad(input_sz=axis_len, kernel_sz=window_len, stride=stride)
        num_windows =int(((axis_len - window_len + low_priority_pad + high_priority_pad) // stride)) + 1    
        return int(num_windows), int(stride), int(low_priority_pad), int(high_priority_pad)
    
    def is_inside_bbox(self, point, bbox):
        xmin, ymin, xmax, ymax = bbox
        point_x, point_y = point
        return point_x >= xmin and point_x <= xmax and point_y >= ymin and point_y <= ymax
    
    def get_patch_annotations(self, patch_xmin, patch_xmax, patch_ymin, patch_ymax, annotations):
        patch_bboxs = []
        patch_box = (patch_xmin, patch_ymin, patch_xmax, patch_ymax)
        for bbox in annotations:
            xmin, ymin, xmax, ymax = bbox
            xmin = xmin + self.left_pad
            xmax = xmax + self.left_pad
            ymin = ymin + self.top_pad
            ymax = ymax + self.top_pad
            
            if self.is_inside_bbox(point=(xmin, ymin), bbox=patch_box) or \
               self.is_inside_bbox(point=(xmin, ymax), bbox=patch_box) or \
               self.is_inside_bbox(point=(xmax, ymin), bbox=patch_box) or \
               self.is_inside_bbox(point=(xmax, ymax), bbox=patch_box):
                patch_bbox_xmin = max(xmin, patch_xmin) - patch_xmin
                patch_bbox_ymin = max(ymin, patch_ymin) - patch_ymin
                patch_bbox_xmax = min(xmax, patch_xmax) - patch_xmin
                patch_bbox_ymax = min(ymax, patch_ymax) - patch_ymin
                patch_bboxs.append([patch_bbox_xmin, patch_bbox_ymin, patch_bbox_xmax, patch_bbox_ymax])
        return patch_bboxs

    def generate_patches(self, path_img_file, path_json_file=None):
        """
        Generate patches from image provided
        """
        img = cv2.imread(path_img_file)
        if path_json_file is not None:
            annotations = read_json(path_json_file)

        self.height, self.width, self.channels = img.shape
        self.dtype = img.dtype
        self.num_rows, self.row_stride, self.left_pad, self.right_pad = self.get_num_windows_along_axis(axis_len=self.height, window_len=self.window_sz,overlap=self.overlap)
        self.num_cols, self.col_stride, self.top_pad, self.bottom_pad = self.get_num_windows_along_axis(axis_len=self.width, window_len=self.window_sz,overlap=self.overlap)
        
        self.padded_img_height = self.height+self.left_pad+self.right_pad
        self.padded_img_width = self.width+self.top_pad+self.bottom_pad
        padded_img = np.zeros(shape=(self.padded_img_height, self.padded_img_width, self.channels), dtype=self.dtype)
        padded_img[self.left_pad:self.left_pad+self.height, self.right_pad:self.right_pad+self.width, :] = img
        # cv2.imwrite("padded_img.png", padded_img)

        path_patch_dir = Path("dataset/patches/")
        path_patch_dir.mkdir(parents=True, exist_ok=True)
        image_filename = Path(path_img_file).stem

        for row_idx in range(0, self.num_rows):
            for col_idx in range(0, self.num_cols):
                h_grid_start = row_idx * self.row_stride
                w_grid_start = col_idx * self.col_stride
                
                patch_xmin = w_grid_start
                patch_xmax = w_grid_start+self.window_sz
                patch_ymin = h_grid_start
                patch_ymax = h_grid_start+self.window_sz
                
                patch = padded_img[patch_ymin:patch_ymax, patch_xmin:patch_xmax, :] # slice 
                # print("patch sz", patch.shape)
                
                patch_name = "{}_{}_{}".format(image_filename, row_idx, col_idx)
                if path_json_file is not None:
                    patch_bboxs = self.get_patch_annotations(patch_xmin, patch_xmax, patch_ymin, patch_ymax, annotations)
                    # for patch_bbox in patch_bboxs:
                    #     patch = cv2.rectangle(patch, (patch_bbox[0], patch_bbox[1]), (patch_bbox[2], patch_bbox[3]), (255, 255, 0), 1)
                    write_json(path_file="{}/{}.json".format(path_patch_dir, patch_name), data=patch_bboxs)
                cv2.imwrite("{}/{}.jpg".format(path_patch_dir, patch_name), patch)
    
    def generate_img_from_patches(self, path_patch_dir, img_name, path_annotation_dir=None):
        padded_img = np.zeros(shape=(self.padded_img_height, self.padded_img_width, self.channels), dtype=self.dtype)
        img_bboxs = []
        for row_idx in range(0, self.num_rows):
            for col_idx in range(0, self.num_cols):
                h_grid_start = row_idx * self.row_stride
                w_grid_start = col_idx * self.col_stride
                
                patch_xmin = w_grid_start
                patch_xmax = w_grid_start+self.window_sz
                patch_ymin = h_grid_start
                patch_ymax = h_grid_start+self.window_sz
                
                patch_name = "{}_{}_{}".format(img_name, row_idx, col_idx)
                patch = cv2.imread("{}/{}.jpg".format(path_patch_dir, patch_name))
                
                padded_img[patch_ymin:patch_ymax, patch_xmin:patch_xmax, :] = patch
                if path_annotation_dir is not None:
                    patch_bboxs = read_json("{}/{}.json".format(path_annotation_dir, patch_name))
                    for bbox in patch_bboxs:
                        xmin, ymin, xmax, ymax = bbox
                        xmin = max(xmin + patch_xmin - self.left_pad, 0)
                        ymin = max(ymin + patch_ymin - self.top_pad, 0)
                        xmax = min(xmax + patch_xmin - self.left_pad, self.width)
                        ymax = min(ymax + patch_ymin - self.top_pad, self.height)
                        img_bboxs.append([xmin, ymin, xmax, ymax])
        
        cv2.imwrite("padded_img_from_patches.png", padded_img)
        original_img = padded_img[self.left_pad:self.left_pad+self.height, self.right_pad:self.right_pad+self.width, :]
        cv2.imwrite("original_img_from_patches.png", np.copy(original_img))
        if path_annotation_dir is not None:
            mask = np.zeros_like(original_img)
            for bbox in img_bboxs:
                original_img = cv2.rectangle(original_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 1)
                mask = cv2.rectangle(mask, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), -1)
            write_json("./original_img_bboxes.json", img_bboxs)
            cv2.imwrite("original_img_from_patches_with_bboxs.png", original_img)
            cv2.imwrite("mask from patches.png", mask)
        
    

In [12]:
image_patcher = ImagePatcher(patch_sz=224, overlap=25)
image_patcher.generate_patches(path_img_file="./DJI_0367.JPG", path_json_file="DJI_0367.json")
image_patcher.generate_img_from_patches(path_patch_dir="dataset/patches/", img_name="DJI_0367", path_annotation_dir="dataset/patches/")

### Convert bbox from small image to large image

In [4]:
def convert_to_diagonal_points(bbox):
    x, y, width, height = bbox # bbox in  [top left x position, top left y position, width, height]
    xmin = x
    ymin = y
    xmax = x + width 
    ymax = y + height
    start_point = (xmin, ymin)
    end_point = (xmax, ymax)
    return start_point, end_point

def convert(im_w, im_h, x_min, x_max, y_min, y_max):
    # yolo format
    # https://mrtpk.github.io/deep-learning/2019/09/30/YOLO-annotation.html
    dw = 1./im_w
    dh = 1./im_h
    x = (x_min + x_max)/2.0
    y = (y_min + y_max)/2.0
    w = x_max - x_min
    h = y_max - y_min
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def deconvert(im_w, im_h, x, y, w, h):
    ox = float(x)
    oy = float(y)
    ow = float(w)
    oh = float(h)
    x = ox*im_w
    y = oy*im_h
    w = ow*im_w
    h = oh*im_h
    xmax = (((2*x)+w)/2)
    xmin = xmax-w
    ymax = (((2*y)+h)/2)
    ymin = ymax-h
    return [int(xmin),int(ymin),int(xmax),int(ymax)]

In [5]:
path_annotated_img = "./sample_dataset_with_annotations/DJI_0367_JPG.rf.d83afb6717d6b0fd215e19e8eec1267b.jpg"
path_annotated_json = "./sample_dataset_with_annotations/_annotations.coco.json"
annotaed_img = cv2.imread(path_annotated_img)
annotations = read_json(path_annotated_json)
annotations['images'], annotaed_img.shape

([{'id': 0,
   'license': 1,
   'file_name': 'DJI_0367_JPG.rf.d83afb6717d6b0fd215e19e8eec1267b.jpg',
   'height': 1536,
   'width': 2048,
   'date_captured': '2021-06-02T17:58:51+00:00'}],
 (1536, 2048, 3))

In [6]:
# annotations['images'], annotations['annotations']
# This code converts bbox in small image to bbox in large image
annotaed_img_viz = np.copy(annotaed_img).astype(np.uint8)
big_img = cv2.imread("DJI_0367.JPG")
bboxes_big = []
for annotation in annotations['annotations']:
    bbox = annotation['bbox'] #  [x,y,width,height]
    
    start_point, end_point = convert_to_diagonal_points(bbox=bbox)
    x,y,w,h = convert(im_w=2048, im_h=1536, x_min=start_point[0], x_max=end_point[0], y_min=start_point[1], y_max=end_point[1])
    xmin_big, ymin_big, xmax_big, ymax_big = deconvert(im_w=big_img.shape[1], im_h=big_img.shape[0], x=x, y=y, w=w, h=h)
    
    bboxes_big.append([xmin_big, ymin_big, xmax_big, ymax_big])
    annotaed_img_viz = cv2.rectangle(annotaed_img_viz, start_point, end_point, (0, 255, 0), 1)
    big_img = cv2.rectangle(big_img, (xmin_big, ymin_big), (xmax_big, ymax_big), (0, 255, 0), 1)
cv2.imwrite("annoated_img_viz.png", annotaed_img_viz)
cv2.imwrite("annoated_big_img_viz.png", big_img)
write_json("./DJI_0367.json", bboxes_big)