# Data Generation Script for Training Classifiers

## Imports

In [1]:
import os
import json
import random
import string
from PIL import Image
from joblib import Parallel, delayed
import os
from tqdm import tqdm


## Global Variables

In [2]:
in_dir = "/mnt/nis_lab_research/data/coco_files/neg/far_shah_b1-b5_b8_train_neg"
out_dir = "/mnt/nis_lab_research/data/class_data/neg/far_shah_b1-b5_b8_train_neg"
bg_color = "white"
padding = 0.05
border = 0
num_units = -1
backend = "threading"

## Preprocessing

In [3]:
with open(os.path.join(in_dir, "result.json")) as f:
    obj = json.load(f)

In [4]:
img_list = obj["images"]
cat_list = obj["categories"]
ann_list = obj["annotations"]

In [5]:
cat_list

[{'id': 0, 'name': 'Accept Button'},
 {'id': 1, 'name': 'Address Input Box'},
 {'id': 2, 'name': 'Advertisement'},
 {'id': 3, 'name': 'Alert Notification'},
 {'id': 4, 'name': 'Allow Button'},
 {'id': 5, 'name': 'Checkbox'},
 {'id': 6, 'name': 'Click Captcha'},
 {'id': 7, 'name': 'Close Button'},
 {'id': 8, 'name': 'Download Button'},
 {'id': 9, 'name': 'Email Input Box'},
 {'id': 10, 'name': 'General Button'},
 {'id': 11, 'name': 'General Input Box'},
 {'id': 12, 'name': 'Image Captcha'},
 {'id': 13, 'name': 'Login Button'},
 {'id': 14, 'name': 'Logo'},
 {'id': 15, 'name': 'Name Input Box'},
 {'id': 16, 'name': 'Password Input Box'},
 {'id': 17, 'name': 'Phone Input Box'},
 {'id': 18, 'name': 'Play Button'},
 {'id': 19, 'name': 'Popup'},
 {'id': 20, 'name': 'Search Button'},
 {'id': 21, 'name': 'Search Input Box'},
 {'id': 22, 'name': 'Submit Button'},
 {'id': 23, 'name': 'Text Captcha'},
 {'id': 24, 'name': 'Toggle Button'},
 {'id': 25, 'name': 'Update Button'},
 {'id': 26, 'name': '

In [6]:
cat_map = []
for cat in cat_list:
    cat_map.append(cat["name"])
# cat_map = sorted(cat_map)

In [7]:
cat_map

['Accept Button',
 'Address Input Box',
 'Advertisement',
 'Alert Notification',
 'Allow Button',
 'Checkbox',
 'Click Captcha',
 'Close Button',
 'Download Button',
 'Email Input Box',
 'General Button',
 'General Input Box',
 'Image Captcha',
 'Login Button',
 'Logo',
 'Name Input Box',
 'Password Input Box',
 'Phone Input Box',
 'Play Button',
 'Popup',
 'Search Button',
 'Search Input Box',
 'Submit Button',
 'Text Captcha',
 'Toggle Button',
 'Update Button',
 'Video',
 'Random']

## Functions

In [8]:
def crop_image(file_path, bounding_box, padding):
    
    with Image.open(file_path) as img:
        
        x_min, y_min, width, height = bounding_box

        # Calculate padding in pixels
        pad_width = int(width * padding)
        pad_height = int(height * padding)

        # Adjust the bounding box with padding
        x_min = max(x_min - pad_width, 0)
        y_min = max(y_min - pad_height, 0)
        x1 = min(x_min + width + 2 * pad_width, img.width)
        y1 = min(y_min + height + 2 * pad_height, img.height)
        
        cropped_img = img.crop((x_min, y_min, x1, y1))
        
        return cropped_img

In [9]:
def paste_to_bg(image, background_color, bg_width, bg_height):
    
    # Create a new image with the specified background color and dimensions
    background = Image.new('RGB', (bg_width, bg_height), background_color)

    # Calculate the position to paste the image so it's centered
    x = (bg_width - image.width) // 2
    y = (bg_height - image.height) // 2

    # Paste the image onto the background
    background.paste(image, (x, y), image if image.mode == 'RGBA' else None)

    return background

In [10]:
def resize_ar_lock(img, target_size):

    original_width, original_height = img.size
    target_width, target_height = target_size

    # Calculate scaling factor
    scaling_factor = min(target_width / original_width, target_height / original_height)

    # Calculate new dimensions
    new_width = max(int(original_width * scaling_factor), 1)
    new_height = max(int(original_height * scaling_factor), 1)

    # Resize the image
    resized_img = img.resize((new_width, new_height))

    return resized_img


In [11]:
def gen_rand_str(length):
    characters = string.ascii_letters + string.digits
    random_string = ''.join(random.choice(characters) for i in range(length))
    return random_string

In [12]:
if not os.path.exists(out_dir):
        os.makedirs(out_dir)
for cat in cat_list:
    os.makedirs(os.path.join(out_dir, cat["name"]), exist_ok=True)

## Main

In [13]:
def process_image(img, ann_list, in_dir, out_dir, cat_map, padding, bg_color, border):
    img_bn = os.path.basename(img["file_name"])[0:-4]
    img_fp = os.path.join(in_dir, "images", os.path.basename(img["file_name"]))
    img_id = img["id"]

    for j, ann in enumerate(ann_list):
        ann_img_id = ann["image_id"]
        cat_id = ann["category_id"]

        # if cat_id == 1:
        #     cat_id = 0

        if img_id == ann_img_id:
            try:
                elem_img = crop_image(img_fp, ann["bbox"], padding)
                e_w = elem_img.size[0]
                e_h = elem_img.size[1]

                if e_w < e_h:
                    elem_img = paste_to_bg(elem_img, bg_color, e_h + border, e_h + border)
                elif e_w > e_h:
                    elem_img = paste_to_bg(elem_img, bg_color, e_w + border, e_w + border)

                elem_img.save(os.path.join(out_dir, cat_map[cat_id], img_bn + "-" + str(j)) + ".png", "PNG")

            except Exception as e:
                print(img_fp)
                print(e)
            

In [14]:
with tqdm(total=len(img_list)) as pbar:
    Parallel(n_jobs=num_units, backend=backend)(
        delayed(lambda x: (process_image(x, ann_list, in_dir, out_dir, cat_map, padding, bg_color, border), pbar.update(1)))(img) for img in img_list
    )

  7%|▋         | 723/10643 [04:56<43:39,  3.79it/s]  

/mnt/nis_lab_research/data/coco_files/neg/far_shah_b1-b5_b8_train_neg/images/lwCSe6xAAuerGXwZ-microsoft_ss.png
tile cannot extend outside image


100%|██████████| 10643/10643 [59:19<00:00,  2.99it/s] 
