In [2]:
import cv2
import numpy as np
import random
import torch
from PIL import Image
import json
import os

random.seed(42)

In [36]:
def image_preprocess(image_path, resized_height=1600, resized_width=1200):
    #resize img
    image = cv2.imread(image_path)
    overlay_image = cv2.resize(image, (resized_width, resized_height))

    # black background overlay
    resized_dimension = resized_height
    black_background = np.zeros((resized_dimension, resized_dimension, 3), dtype=np.uint8)
    overlay_height, overlay_width = overlay_image.shape[:2]
    black_background[0:overlay_height, 0:overlay_width] = overlay_image

    return black_background

In [3]:
def add_random_offset(point, max_offset):
        return point + random.randint(-max_offset, max_offset)

In [4]:
def random_rgb_color():
    r = random.randint(0, 175)
    g = random.randint(0, 175)
    b = random.randint(0, 175)
    return (r, g, b)

In [5]:
def draw_broken_box_with_subboxes(background, rect_start, width, height_in_lines, thickness, delta, l1, l2, max_offset=10, color=(0, 255, 0), opacity=0.5):
    """
    Draw a rectangular box broken into smaller subboxes with slight irregularities in width and height.
    
    Parameters:
        background (OpenCV Image): OpenCV Matrix representing the (already resized) image.
        rect_start (tuple): Starting coordinate (x, y) of the rectangle (top-left corner).
        width (int): Width of the overall rectangle.
        height (int): Height of the overall rectangle.
        thickness (int): Height of each smaller subbox.
        delta (int): Reduction in height for each smaller subbox.
        max_offset (int): Maximum random offset for width irregularity (in pixels).
        color (tuple): Color of the rectangle (BGR format).
        opacity (float): Opacity of the shape (0: fully transparent, 1: fully opaque).
        
    Returns:
        result_img (ndarray): Image with the broken box and irregular subboxes.
        bbox (dict): Metadata of the bounding box for YOLO.
    """
    # height in pixels
    height_in_pixels = height_in_lines*thickness
    
    # Create an overlay image (same size as the background)
    overlay = background.copy()

    # Define the top-left starting point of the rectangle
    x, y = rect_start
    
    # Number of subboxes along the height
    num_subboxes = height_in_lines
    print(f"num_subboxes = {num_subboxes}")
    img_height, img_width, _ = overlay.shape
    print(f"img_width = {img_width}, img_height = {img_height}")
    # Initialize the bounding box metadata
    # every object in the labels file has the following line: <class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>.
    # it is not necessary that each object segment has the same number of x,y points
    # However, there should be minimum of 3 x,y points for each object
    # in our case, the segmenting polygon of a highlight will be defined using 8 coordinates
    
    bbox = {
        "class": 0,                                            # Assuming class 0 for YOLO
        "poly": [
            [(x+l1)/img_width, (y+thickness)/img_height],
            [(x+l1)/img_width, y/img_height],
            [(x+width)/img_width, y/img_height],
            [(x+width)/img_width, (y+height_in_pixels-thickness)/img_height],
            [(x+width-l2)/img_width, (y+height_in_pixels-thickness)/img_height],
            [(x+width-l2)/img_width, (y+height_in_pixels)/img_height],
            [x/img_width, (y+height_in_pixels)/img_height],
            [x/img_width, (y+thickness)/img_height]
        ]
        #"x_center": (x + width / 2) / background.shape[1],  # Normalize x center
        #"y_center": (y + height_in_pixels / 2) / background.shape[0],  # Normalize y center
        #"width": width / background.shape[1],                # Normalize width
        #"height": height_in_pixels / background.shape[0],              # Normalize height
    }
    

    # Iterate over each subbox to create the broken box pattern
    for i in range(num_subboxes):
        # Starting Y position of the current subbox
        subbox_start_y = y + i * thickness
        # Ending Y position of the current subbox (apply delta to reduce height)
        subbox_end_y = subbox_start_y + thickness - delta
            
        # Apply random width variation
        random_width_start = add_random_offset(x, max_offset)
        random_width_end = add_random_offset(x + width, max_offset)

        # Using half of random width offset as the random height offset
        random_height_offset = max_offset//2

        if(i == 0):
            random_width_start += l1
        elif(i == num_subboxes-1):
            random_width_end -= l2
        
        # Define the points of the subbox (irregular rectangle)
        pts = np.array([
            [add_random_offset(random_width_start, max_offset), add_random_offset(subbox_start_y, random_height_offset)],  # Top-left
            [add_random_offset(random_width_end, max_offset), add_random_offset(subbox_start_y, random_height_offset)],    # Top-right
            [add_random_offset(random_width_end, max_offset), add_random_offset(subbox_end_y, random_height_offset)],      # Bottom-right
            [add_random_offset(random_width_start, max_offset), add_random_offset(subbox_end_y, random_height_offset)]     # Bottom-left
        ], np.int32)
        
        # Draw the subbox on the overlay
        cv2.fillPoly(overlay, [pts], color)
    
    # Perform alpha blending (opacity) between the background and overlay
    result_img = cv2.addWeighted(overlay, opacity, background, 1 - opacity, 0)

    return result_img, bbox

In [6]:
training_custom_metadata = './background_images/train/metadata_train.json'

with open(training_custom_metadata, 'r') as f:
    metadata = json.load(f)   

In [7]:
import math
training_dataset_path = "./background_images/train"
training_bbox_data = './generated_dataset/labels/train/'
generated_train_dataset_path = "./generated_dataset/images/train/"
variations_per_image = 15

for image in metadata["images"]:
    image_filename = image["filename"]
    image_path = os.path.join(training_dataset_path, image_filename)
    # bg_img = image_preprocess(image_path, 1600)
    bg_img = cv2.imread(image_path)
    intermediate_img = bg_img

    # Save each variation with a unique filename
    filename, extension = os.path.splitext(image_filename)
    
    for var in range(variations_per_image):
        intermediate_img = bg_img

        # create a path string representing a fresh file for this image variation
        result_image_name = f"{filename}_variation{var}{extension}"
        result_metadata_name = f"{filename}_variation{var}.txt"
        training_bbox_data_file = f"{training_bbox_data}{result_metadata_name}"

        # flush previous contents if present
        with open(training_bbox_data_file,"w") as f:
            f.write("")
        
        with open(training_bbox_data_file, "a") as f:
            for box_num, box in enumerate(image["boxes"]):
                # 10% chance there is no highlight in this paragraph/box
                is_not_highlighted = random.randint(0, 9)
                if(is_not_highlighted<1):
                    continue
                
                # Get initial box parameters
                original_rect_start = box["rect_start"][:]
                width = box["width"]
                height = box["height"]
                thickness = box["thickness"]
                delta = box["line_spacing"]
                
                start_line = 0 # absolute line number from where highlight begins 
                max_lines = round(height // thickness, 0)
                lines_left_in_box = max_lines
                
                while lines_left_in_box>0:  
                    # 20% chance there is no highlight in the rest of the box
                    is_not_highlighted = random.randint(1, 10)
                    if(is_not_highlighted < 3):
                        lines_left_in_box = 0
                        continue
                        
                    # else, set random starting position within the remaining rectangle bounds
                    if(start_line == max_lines-1):
                        random_line_for_highlight_start = start_line
                    else:
                        random_line_for_highlight_start = random.randint(start_line, max_lines-1) # measured in number of lines, not pixels.
                    
                    lines_left_in_box = max_lines - random_line_for_highlight_start
                    variation_rect_start = original_rect_start[:]
                    variation_rect_start[1] += thickness*random_line_for_highlight_start
                    highlight_height =  random.randint(1, lines_left_in_box) # height of highlight-box in number of lines
                    
                    # new values for start_line and lines_left_in_box
                    start_line = random_line_for_highlight_start + highlight_height
                    lines_left_in_box = max_lines - start_line
    
                    # Ensure the rectangle is within the image bounds
                    #variation_rect_start[1] = min(variation_rect_start[1], resized_img_height - adjusted_height)
                    
                    l1, l2 = random.randint(0, 100), random.randint(0, 100)
                    max_offset = random.randint(0, 10)
                    color = random_rgb_color()
                    opacity = random.uniform(0.2, 0.6)
    
                    print(f"box = {box_num} \n rect_start = {variation_rect_start}\n width = {width} \n height = {highlight_height} \n opacity= {opacity}")
                    # Draw the adjusted rectangle
                    intermediate_img, bbox_metadata = draw_broken_box_with_subboxes(
                        intermediate_img, variation_rect_start, width, highlight_height, thickness, delta, l1, l2, max_offset, color, opacity
                    )

                    # create line to be appended to the image label file
                    line_to_be_appended = f"{bbox_metadata['class']} "
                    spaces=0
                    print(bbox_metadata)
                    for coord in bbox_metadata["poly"]:
                        line_to_be_appended = line_to_be_appended + f"{coord[0]} {coord[1]}"
                        if(spaces<8):
                            line_to_be_appended = line_to_be_appended + " "
                        spaces += 1
                    # append that line to the image path
                    f.write(f"{line_to_be_appended}\n")
                    
            result_image_filepath = os.path.join(generated_train_dataset_path, result_image_name)
            print(f"\n")

            (h, w) = intermediate_img.shape[:2]
            center = (w // 2, h // 2)
            degree_to_rotate = np.random.uniform(-2, 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, degree_to_rotate, 1)
            rotated_image = cv2.warpAffine(intermediate_img, rotation_matrix, (w, h))
            
            cv2.imwrite(result_image_filepath, rotated_image)
            #json.dump(bbox_metadata, f)  # Append the JSON object (box) to the file
            f.write("\n")  # Write each box on a new line

box = 1 
 rect_start = [113, 345]
 width = 903 
 height = 1 
 opacity= 0.3687687278741082
num_subboxes = 1
img_width = 1200, img_height = 1600
{'class': 0, 'poly': [[0.1725, 0.253125], [0.1725, 0.215625], [0.8466666666666667, 0.215625], [0.8466666666666667, 0.215625], [0.8358333333333333, 0.215625], [0.8358333333333333, 0.253125], [0.09416666666666666, 0.253125], [0.09416666666666666, 0.253125]]}
box = 1 
 rect_start = [113, 405]
 width = 903 
 height = 1 
 opacity= 0.47925575799529074
num_subboxes = 1
img_width = 1200, img_height = 1600
{'class': 0, 'poly': [[0.1175, 0.290625], [0.1175, 0.253125], [0.8466666666666667, 0.253125], [0.8466666666666667, 0.253125], [0.7991666666666667, 0.253125], [0.7991666666666667, 0.290625], [0.09416666666666666, 0.290625], [0.09416666666666666, 0.290625]]}
box = 2 
 rect_start = [115, 644]
 width = 903 
 height = 1 
 opacity= 0.4208162525092908
num_subboxes = 1
img_width = 1200, img_height = 1600
{'class': 0, 'poly': [[0.17333333333333334, 0.44], [0.17

AttributeError: 'NoneType' object has no attribute 'copy'

In [8]:
validation_custom_metadata = './background_images/val/metadata_train.json'

with open(validation_custom_metadata, 'r') as f:
    metadata = json.load(f)   

In [9]:
import math
training_dataset_path = "./background_images/val"
training_bbox_data = './generated_dataset/labels/val/'
generated_train_dataset_path = "./generated_dataset/images/val/"
variations_per_image = 12

for image in metadata["images"]:
    image_filename = image["filename"]
    image_path = os.path.join(training_dataset_path, image_filename)
    # bg_img = image_preprocess(image_path, 1600)
    bg_img = cv2.imread(image_path)
    intermediate_img = bg_img

    # Save each variation with a unique filename
    filename, extension = os.path.splitext(image_filename)
    
    for var in range(variations_per_image):
        intermediate_img = bg_img

        # create a path string representing a fresh file for this image variation
        result_image_name = f"{filename}_variation{var}{extension}"
        result_metadata_name = f"{filename}_variation{var}.txt"
        training_bbox_data_file = f"{training_bbox_data}{result_metadata_name}"

        # flush previous contents if present
        with open(training_bbox_data_file,"w") as f:
            f.write("")
        
        with open(training_bbox_data_file, "a") as f:
            for box_num, box in enumerate(image["boxes"]):
                # 10% chance there is no highlight in this paragraph/box
                is_not_highlighted = random.randint(0, 9)
                if(is_not_highlighted<1):
                    continue
                
                # Get initial box parameters
                original_rect_start = box["rect_start"][:]
                width = box["width"]
                height = box["height"]
                thickness = box["thickness"]
                delta = box["line_spacing"]
                
                start_line = 0 # absolute line number from where highlight begins 
                max_lines = round(height // thickness, 0)
                lines_left_in_box = max_lines
                
                while lines_left_in_box>0:  
                    # 20% chance there is no highlight in the rest of the box
                    is_not_highlighted = random.randint(1, 10)
                    if(is_not_highlighted < 3):
                        lines_left_in_box = 0
                        continue
                        
                    # else, set random starting position within the remaining rectangle bounds
                    if(start_line == max_lines-1):
                        random_line_for_highlight_start = start_line
                    else:
                        random_line_for_highlight_start = random.randint(start_line, max_lines-1) # measured in number of lines, not pixels.
                    
                    lines_left_in_box = max_lines - random_line_for_highlight_start
                    variation_rect_start = original_rect_start[:]
                    variation_rect_start[1] += thickness*random_line_for_highlight_start
                    highlight_height =  random.randint(1, lines_left_in_box) # height of highlight-box in number of lines
                    
                    # new values for start_line and lines_left_in_box
                    start_line = random_line_for_highlight_start + highlight_height
                    lines_left_in_box = max_lines - start_line
    
                    # Ensure the rectangle is within the image bounds
                    #variation_rect_start[1] = min(variation_rect_start[1], resized_img_height - adjusted_height)
                    
                    l1, l2 = random.randint(0, 100), random.randint(0, 100)
                    max_offset = random.randint(0, 10)
                    color = random_rgb_color()
                    opacity = random.uniform(0.2, 0.6)
    
                    print(f"box = {box_num} \n rect_start = {variation_rect_start}\n width = {width} \n height = {highlight_height} \n opacity= {opacity}")
                    # Draw the adjusted rectangle
                    intermediate_img, bbox_metadata = draw_broken_box_with_subboxes(
                        intermediate_img, variation_rect_start, width, highlight_height, thickness, delta, l1, l2, max_offset, color, opacity
                    )

                    # create line to be appended to the image label file
                    line_to_be_appended = f"{bbox_metadata['class']} "
                    spaces=0
                    print(bbox_metadata)
                    for coord in bbox_metadata["poly"]:
                        line_to_be_appended = line_to_be_appended + f"{coord[0]} {coord[1]}"
                        if(spaces<8):
                            line_to_be_appended = line_to_be_appended + " "
                        spaces += 1
                    # append that line to the image path
                    f.write(f"{line_to_be_appended}\n")
                    
            result_image_filepath = os.path.join(generated_train_dataset_path, result_image_name)
            print(f"\n")

            (h, w) = intermediate_img.shape[:2]
            center = (w // 2, h // 2)
            degree_to_rotate = np.random.uniform(-2, 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, degree_to_rotate, 1)
            rotated_image = cv2.warpAffine(intermediate_img, rotation_matrix, (w, h))
            
            cv2.imwrite(result_image_filepath, rotated_image)
            #json.dump(bbox_metadata, f)  # Append the JSON object (box) to the file
            f.write("\n")  # Write each box on a new line

box = 0 
 rect_start = [72, 196]
 width = 417 
 height = 2 
 opacity= 0.43140097456018206
num_subboxes = 2
img_width = 584, img_height = 895
{'class': 0, 'poly': [[0.24143835616438356, 0.23128491620111732], [0.24143835616438356, 0.21899441340782122], [0.8373287671232876, 0.21899441340782122], [0.8373287671232876, 0.23128491620111732], [0.7568493150684932, 0.23128491620111732], [0.7568493150684932, 0.2435754189944134], [0.1232876712328767, 0.2435754189944134], [0.1232876712328767, 0.23128491620111732]]}
box = 0 
 rect_start = [72, 240]
 width = 417 
 height = 1 
 opacity= 0.2017201865160085
num_subboxes = 1
img_width = 584, img_height = 895
{'class': 0, 'poly': [[0.21575342465753425, 0.28044692737430166], [0.21575342465753425, 0.2681564245810056], [0.8373287671232876, 0.2681564245810056], [0.8373287671232876, 0.2681564245810056], [0.7448630136986302, 0.2681564245810056], [0.7448630136986302, 0.28044692737430166], [0.1232876712328767, 0.28044692737430166], [0.1232876712328767, 0.28044692