In [10]:
import tensorflow as tf
import tensorflow_addons as tfa
import os
import cv2
import math
import xml.etree.ElementTree as ET
import random
random.seed(10)
import numpy as np
from tqdm import tqdm

In [11]:
AUG_BASE_DIR = r"F:\Minor Data Collection\Final Image Data\Monument Original 512\Augmented 512 v1"
ORI_BASE_DIR = r"F:\Minor Data Collection\Final Image Data\Monument Original 512\Original Images and Annotations Resized"

IMG_PATH = "JPEGImages"
ANNO_PATH = "Annotations"

output_dir = "./JPEGimagesNew"
output_annotations_dir = "./AnnotationsNew"

# listing all the images and their annotations
img_files = os.listdir(os.path.join(AUG_BASE_DIR, IMG_PATH))
try: 
    img_files.remove('desktop.ini')
except:
    pass

# getting the randomly rotated image data
def filter_condition(file_name):
    split_arr = file_name.split("_")
    if(len(split_arr) == 1):
        return False
    return split_arr[1] == "RR"

img_files = list(filter(lambda img : filter_condition(img),img_files))

In [12]:
for img_name_primary in tqdm(img_files):
    # get the image name
    name_split = img_name_primary.split("_")
    image_name = name_split[0] + ".jpg"
    
    # Load the image and its annotation
    image_path = os.path.join(ORI_BASE_DIR, IMG_PATH, image_name)
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    degree = random.choice(np.concatenate((np.arange(-16, -8, 0.2), np.arange(8, 16, 0.2))))
    angle = degree * math.pi / 180
    image = tfa.image.rotate(image,angle,"bilinear","nearest")
    
    # Read the annotation file
    annotation_path = os.path.join(ORI_BASE_DIR, ANNO_PATH, image_name.replace(".jpg",".xml"))
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    
    # Get the size of the image
    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    # Perform the rotation on the bounding boxes
    for obj in root.iter("object"):
        bndbox = obj.find("bndbox")
        xmin = int(bndbox.find("xmin").text)
        ymin = int(bndbox.find("ymin").text)
        xmax = int(bndbox.find("xmax").text)
        ymax = int(bndbox.find("ymax").text)

        ymin = 512 - ymin
        ymax = 512 - ymax

        # Find the center of the image
        center_x = 256
        center_y = 256

        # Calculate the new bounding box coordinates
        xmin_new = round(center_x + (xmin - center_x) * math.cos(angle) - (ymin - center_y) * math.sin(angle))
        ymin_new = round(center_y + (xmin - center_x) * math.sin(angle) + (ymin - center_y) * math.cos(angle))
        xmax_new = round(center_x + (xmax - center_x) * math.cos(angle) - (ymax - center_y) * math.sin(angle))
        ymax_new = round(center_y + (xmax - center_x) * math.sin(angle) + (ymax - center_y) * math.cos(angle))

        ymin_new = 512 - ymin_new
        ymax_new = 512 - ymax_new

        xmin1 = min([xmin_new,xmax_new])
        xmax1 = max([xmin_new,xmax_new])
        ymin1 = min([ymin_new,ymax_new])
        ymax1 = max([ymin_new,ymax_new])


        # changing y by some suitable value ()
        if(degree < 0):
            xminTemp = xmin + degree
            xmaxTemp = xmax - degree
            
        else:
            # calculate factors by which bounding box width might increase            
            width_reduce_factor = 0.8 - 0.02 * (degree-10)
            reduced_width = width_reduce_factor * (xmax1-xmin1)
            center_x = (xmax1 + xmin1)/2
            #changing x
            xmin1 = round(center_x - reduced_width/2)
            xmax1 = round(center_x + reduced_width/2)
            xminTemp = min([xmin1,xmax1])
            xmaxTemp = max([xmin1,xmax1])

        #changing y
        reduced_height = (ymax-ymin)-(ymax1-ymin1)
        ymin1 = round(ymin1 - reduced_height/2)
        ymax1 = round(ymax1 + reduced_height/2)
        yminTemp = min([ymin1,ymax1])
        ymaxTemp = max([ymin1,ymax1])


        # checking if final values are between 0 and 300
        dim_arr = [ xminTemp , yminTemp, xmaxTemp, ymaxTemp ]

        for y in range(0,4):
            i = dim_arr[y]
            i = i if i >= 0 else 0
            i = i if i <= 512 else 512
            dim_arr[y] = round(i)

        xmin1 , ymin1, xmax1, ymax1 = dim_arr

        # Update the bounding box coordinates in the annotation file
        bndbox.find("xmin").text = str(xmin1)
        bndbox.find("ymin").text = str(ymin1)
        bndbox.find("xmax").text = str(xmax1)
        bndbox.find("ymax").text = str(ymax1)
    
    # chaniging the file name in xml file
    filename = root.find('.//filename')
    filename.text = img_name_primary.split(".")[0] # removing the .jpg in the end

    # Save the augmented image
    output_image_path = os.path.join(AUG_BASE_DIR, IMG_PATH, img_name_primary)
    tf.io.write_file(output_image_path, tf.image.encode_jpeg(image).numpy())

    # copy the annotation
    output_annotation_path = os.path.join(AUG_BASE_DIR, ANNO_PATH, img_name_primary.replace(".jpg",".xml"))
    tree.write(output_annotation_path)
    

100%|██████████████████████████████████████████████████████████████████████████████| 1448/1448 [01:57<00:00, 12.34it/s]


### Handling Tranlation as Above

In [13]:
AUG_BASE_DIR = r"F:\Minor Data Collection\Final Image Data\Monument Original 512\Augmented 512 v1"
ORI_BASE_DIR = r"F:\Minor Data Collection\Final Image Data\Monument Original 512\Original Images and Annotations Resized"

IMG_PATH = "JPEGImages"
ANNO_PATH = "Annotations"

# listing all the images and their annotations
img_files = os.listdir(os.path.join(AUG_BASE_DIR, IMG_PATH))
try: 
    img_files.remove('desktop.ini')
except:
    pass

# getting the randomly rotated image data
def filter_condition(file_name):
    split_arr = file_name.split("_")
    if(len(split_arr) == 1):
        return False
    return split_arr[1] == "TRAN"

img_files = list(filter(lambda img : filter_condition(img),img_files))

In [17]:
for img_name_primary in tqdm(img_files):
    # get the image name
    name_split = img_name_primary.split("_")
    image_name = name_split[0] + ".jpg"
    
    # Load the image and its annotation
    image_path = os.path.join(ORI_BASE_DIR, IMG_PATH, image_name)
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)

    dx_dy_list = [[40,0],[0,40],[-40,0],[0,-40],[40,40],[-40,-40],[-40,40],[40,-40]]
    dx_dy = random.choice(dx_dy_list)
    # Perform translation augmentation
    image = tfa.image.translate(image, dx_dy,"bilinear","constant")

    # Read the annotation file
    annotation_path = os.path.join(ORI_BASE_DIR, ANNO_PATH, image_name.replace(".jpg",".xml"))
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    # Get the size of the image
    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    # Perform the rotation on the bounding boxes
    remove = []
    for obj in root.iter("object"):
        bndbox = obj.find("bndbox")
        xmin = int(bndbox.find("xmin").text)
        ymin = int(bndbox.find("ymin").text)
        xmax = int(bndbox.find("xmax").text)
        ymax = int(bndbox.find("ymax").text)

        original_width = xmax-xmin
        original_height = ymax-ymin

        # xmin ymin xmax ymax
        dim_arr = [ xmin + dx_dy[0] , ymin + dx_dy[1] , xmax + dx_dy[0], ymax + dx_dy[1] ] 
        for y in range(0,4):
            i = dim_arr[y]
            i = i if i >= 0 else 0
            i = i if i <= 512 else 512
            dim_arr[y] = i

        xmin1 , ymin1, xmax1, ymax1 = dim_arr

        final_width = xmax1-xmin1
        final_height = ymax1-ymin1

        # if translated bbox does not retain 50% of it's original width and height then remove it
        if( (final_width/original_width) < 0.50 or (final_height/original_height)< 0.50):
            root.remove(obj)
        else:
            # Update the bounding box coordinates in the annotation file
            bndbox.find("xmin").text = str(xmin1)
            bndbox.find("ymin").text = str(ymin1)
            bndbox.find("xmax").text = str(xmax1)
            bndbox.find("ymax").text = str(ymax1)


    # Save the augmented image
    output_image_path = os.path.join(AUG_BASE_DIR, IMG_PATH, img_name_primary)
    tf.io.write_file(output_image_path, tf.image.encode_jpeg(image).numpy())

    # copy the annotation
    output_annotation_path = os.path.join(AUG_BASE_DIR, ANNO_PATH, img_name_primary.replace(".jpg",".xml"))
    tree.write(output_annotation_path)
    

  1%|▍                                                                                | 7/1330 [00:00<02:18,  9.58it/s]



  1%|▌                                                                               | 10/1330 [00:01<02:02, 10.79it/s]



100%|██████████████████████████████████████████████████████████████████████████████| 1330/1330 [01:50<00:00, 12.00it/s]
