In [None]:
import os
from pathlib import Path
import shutil
import json
import pandas as pd
import albumentations as A
import random
from loguru import logger
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
GOAL = 500  # how many image to be augmented
REGION_ATTRIBUTE = "Defects"
IMG_PATH = r"train"
VIA_PATH = Path() / "train" / "train.json"  # path to your image annotation json

RANDOM_STATE = 0
random.seed(RANDOM_STATE)

BOX_COLOR = (255, 0, 0)  # Red
TEXT_COLOR = (255, 255, 255)  # White

sample_output = {
    "_via_settings": {
        "ui": {
            "annotation_editor_height": 25,
            "annotation_editor_fontsize": 0.8,
            "leftsidebar_width": 18,
            "image_grid": {
                "img_height": 80,
                "rshape_fill": "none",
                "rshape_fill_opacity": 0.3,
                "rshape_stroke": "yellow",
                "rshape_stroke_width": 2,
                "show_region_shape": True,
                "show_image_policy": "all",
            },
            "image": {
                "region_label": "class",
                "region_color": "class",
                "region_label_font": "10pxSans",
                "on_image_annotation_editor_placement": "NEAR_REGION",
            },
        },
        "core": {"buffer_size": 18, "filepath": {}, "default_filepath": ""},
        "project": {"name": "via_project_val.5.12"},
    },
    "_via_img_metadata": {},
    "_via_attributes": {
        "regions": {
            REGION_ATTRIBUTE: {
                "type": "radio",
                "description": "",
                "options": {},
                "default_options": {},
            }
        },
        "file": {},
    },
}

## Functions

In [None]:
def visualize_bbox(
    img: np.ndarray,
    bbox: tuple[float, float, float, float],
    class_name: str,
    color: tuple[int, int, int] = BOX_COLOR,
    thickness: int = 2,
) -> np.ndarray:
    """
    Visualizes a single bounding box on the given image.

    Args:
        - img (np.ndarray): The image on which to draw the bounding box. Should be a NumPy array representing an image (e.g., from OpenCV).
        - bbox (tuple[float, float, float, float]): A tuple representing the bounding box in the format (x_min, y_min, width, height), where:
            - x_min (float): The x-coordinate of the top-left corner of the bounding box.
            - y_min (float): The y-coordinate of the top-left corner of the bounding box.
            - width (float): The width of the bounding box.
            - height (float): The height of the bounding box.
        - class_name (str): The label or class name to display on the bounding box.
        - color (tuple[int, int, int], optional): The color of the bounding box, specified as a BGR tuple (Blue, Green, Red). Defaults to BOX_COLOR.
        - thickness (int, optional): The thickness of the bounding box border. Defaults to 2.

    Return:
        - img (np.ndarray): The image with the bounding box and class name drawn on it.
    """
    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)

    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)

    ((text_width, text_height), _) = cv2.getTextSize(
        class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1
    )
    cv2.rectangle(
        img,
        (x_min, y_min - int(1.3 * text_height)),
        (x_min + text_width, y_min),
        BOX_COLOR,
        -1,
    )
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35,
        color=TEXT_COLOR,
        lineType=cv2.LINE_AA,
    )
    return img


def visualize(
    image: np.ndarray, bboxes: list[tuple[float, float, float, float]]
) -> None:
    """
    Visualizes multiple bounding boxes on the given image.

    Args:
        - image (np.ndarray): The image on which to draw the bounding boxes. Should be a NumPy array representing an image (e.g., from OpenCV).
        - bboxes (List[Tuple[float, float, float, float, str]]): A list of bounding boxes, where each bounding box is a tuple in the format
            (x_min, y_min, width, height, class_name).
            - x_min (float): The x-coordinate of the top-left corner of the bounding box.
            - y_min (float): The y-coordinate of the top-left corner of the bounding box.
            - width (float): The width of the bounding box.
            - height (float): The height of the bounding box.
            - class_name (str): The label or class name to display on the bounding box.

    Return:
        - None:

    Yield:
        - Displays the image with bounding boxes and class labels using Matplotlib.
    """
    img = image.copy()
    for bbox in bboxes:
        class_name = bbox[-1]
        img = visualize_bbox(img, bbox[:-1], class_name)
    plt.figure(figsize=(12, 12))
    plt.axis("off")
    plt.imshow(img)


def generate_annotation(
    filename: str,
    bboxes: list[tuple[float, float, float, float, str]],
    transformed_img_path: str,
) -> dict[str, dict]:
    """
    Generates an annotation dictionary for an image with bounding boxes and corresponding class labels.

    Args:
        -filename (str): The name of the image file (without path) for which the annotation is generated.
        -bboxes (list[tuple[float, float, float, float, str]]): A list of bounding boxes, where each bounding box is a tuple in the format
            (x_min, y_min, width, height, class_name).
            - x_min (float): The x-coordinate of the top-left corner of the bounding box.
            - y_min (float): The y-coordinate of the top-left corner of the bounding box.
            - width (float): The width of the bounding box.
            - height (float): The height of the bounding box.
            - class_name (str): The label or class name associated with the bounding box.
        -transformed_img_path (str): The file path to the transformed image, used to obtain the file size.

    Returns:
        -annot (dict[str, dict]): A dictionary containing the annotation information, structured for use in dataset annotations.
            The keys include filename, size, regions, and other file attributes.
    """
    filesize = os.stat(transformed_img_path).st_size

    regions = []
    for bbox in bboxes:
        region = {
            "shape_attributes": {
                "name": "rect",
                "x": int(bbox[0]),
                "y": int(bbox[1]),
                "width": int(bbox[2]),
                "height": int(bbox[3]),
            },
            "region_attributes": {"Defects": bbox[-1]},
        }
        regions.append(region)

    annot = {
        f"{filename}{filesize}": {
            "filename": filename,
            "size": filesize,
            "regions": regions,
            "file_attributes": {},
        }
    }

    return annot

def get_features(via_json: dict[str, any]) -> list[str]:
    """
    Extracts unique features from the regions in a VIA (VGG Image Annotator) JSON annotation file.

    Args:
        - via_json (dict[str, any]): A dictionary representing the VIA JSON annotations.
            Each entry corresponds to an image and contains a list of regions with attributes.

    Return:
        - feature (list[str]): A list of unique features (values from the specified `REGION_ATTRIBUTE`) found in the regions.
    """
    feature = []
    for key, value in via_json.items():
        for regions in value["regions"]:
            feat = regions["region_attributes"][REGION_ATTRIBUTE]
            if feat not in feature:
                feature.append(feat)

    return feature

In [1]:
with open(VIA_PATH, "r") as fs: 
    annot = json.load(fs)["_via_img_metadata"]

features = get_features(annot)

# update via output
sample_output["_via_attributes"]["regions"]["options"] = {feature: feature for feature in features}

image_files = {
    "filekey":  [],
    "filename": [], 
    "filesize": [], 
    "annot": []
}

for feature in features: 
    image_files.update({feature:[]})

for key, value in annot.items(): 
    print(value)

    features_count = {feature: 0 for feature in features}

    bbox = []

    for regions in value["regions"]: 
        feat = regions["region_attributes"][REGION_ATTRIBUTE]
        features_count[feat] += 1

        coor = regions["shape_attributes"]

        bbox.append([coor["x"], coor["y"], coor["width"], coor["height"], feat])

    # print(features_count)

    image_files["filekey"].append(key)
    image_files["filename"].append(value["filename"])
    image_files["filesize"].append(value["size"])
    image_files["annot"].append(json.dumps(bbox))
    for feature, cnt in features_count.items(): 
        image_files[feature].append(cnt)

image_files
# %%
image_files_df = pd.DataFrame(image_files)
# image_files_df = pd.read_csv("annotationsv2.csv")

# %%

final_annotation = []
for feature in features: 

    goal =  GOAL

    # print(feature)
    # print(goal)
    # print(image_files[feature])

    # create albumentations pipeline

    # get relavant file names
    feature_images_df = image_files_df[image_files_df[feature] != 0]
    feature_images_df.reset_index(drop=True, inplace=True)
    feature_count = feature_images_df[feature].sum()
    # print(len(feature_images_df))

    aug = goal - feature_count

    logger.info(f"Feature: {feature}, Goal: {goal}, Images to augment: {aug}")

    img_save_path = os.path.join(IMG_PATH, "augmentation", feature)

    # print(aug)
    if not os.path.exists(img_save_path): 
        os.makedirs(img_save_path)
        logger.info(f"Path for {feature} created. ")

    _via_img_metadata = {}

    while aug > 0: 

        try: 

            # randomly choose files to perform augmentation
            key = random.randint(0, len(feature_images_df)-1)
            filename = feature_images_df.loc[key, "filename"]
            annots = json.loads(feature_images_df.loc[key, "annot"])

            print(filename)
            print(annots)

            file_path = os.path.join(IMG_PATH, filename)

            if os.path.exists(file_path): 

                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                img_height = img.shape[0]
                img_width = img.shape[1]

                defects = [annot[-1] for annot in annots]
                print(defects)

                bbox = [annot for annot in annots if feature in annot]
                
                for box in bbox: 
                    for i in range(3): 
                        if box[i] < 0: 
                            box[i] = 0

                print(bbox)

                # augmentation pipeline
                transform = A.Compose([
                    A.BBoxSafeRandomCrop(erosion_rate=0), 
                    A.RandomRotate90(p=0.5),   
                    A.VerticalFlip(p=0.5), 
                    A.HorizontalFlip(p=0.5), 
                    A.RandomBrightnessContrast(p=0.5), 
                    A.RandomGamma(p=0.5)
                ], bbox_params=A.BboxParams(format="coco"))

                transformed = transform(image=img, bboxes=bbox)

                # visualize(
                #     transformed["image"],
                #     transformed["bboxes"]
                # )
                transformed_img_file = f"{aug}_{feature}_{filename}"
                transformed_img_path = os.path.join(img_save_path, transformed_img_file)

                if cv2.imwrite(transformed_img_path, cv2.cvtColor(transformed["image"], cv2.COLOR_BGR2RGB)): 
                    print("after_cv", transformed["bboxes"])
                    
                    transformed_annotation = generate_annotation(transformed_img_file, transformed["bboxes"], transformed_img_path)
                    _via_img_metadata.update(transformed_annotation)
                    aug -= len(bbox)

            else: 
                pass

        except Exception as e: 
            raise e

    final_json = sample_output

    final_json["_via_img_metadata"] = _via_img_metadata

    final_json_path = os.path.join(img_save_path, f"{feature}.json")

    with open(final_json_path, "w") as f:
        json.dump(final_json, f)


# %% join all json and move all files into one new folder
import shutil

final_annotation = annot.copy()

if not os.path.exists("final"):
    os.mkdir("final")

final_folder = os.path.join("final", IMG_PATH)

if not os.path.exists(final_folder): 
    for i in range(len(IMG_PATH.split(os.sep))): 
        folder = os.path.join("final", *IMG_PATH.split(os.sep)[:i+1])
        try: 
            os.mkdir(folder)
        except Exception as e: 
            raise e


for top, dirs, files in os.walk(IMG_PATH): 
    for file in files: 
        if ".jpg" in file: 
            from_file = os.path.join(top, file)
            to_file = os.path.join(final_folder, file)
            shutil.copy(from_file, to_file)

for feature in features: 
    annot_file = os.path.join(IMG_PATH, "augmentation", feature, f"{feature}.json")

    with open(annot_file, "r") as f:
        aug_annot = json.load(f)["_via_img_metadata"]

    final_annotation.update(aug_annot)

final_json = sample_output

final_json["_via_img_metadata"] = final_annotation

final_json_path = os.path.join(final_folder, "final.json")

with open(final_json_path, "w") as f:
    json.dump(final_json, f)

[32m2024-08-01 17:05:57.051[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: SMPF, Goal: 500, Images to augment: 421[0m
[32m2024-08-01 17:05:57.053[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for SMPF created. [0m


{'filename': '22190X800009_042159_part1.jpg', 'size': 168523, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 614, 'y': 603, 'width': 44, 'height': 15}, 'region_attributes': {'Defects': 'SMPF'}}], 'file_attributes': {}}
{'filename': '22302D800584_142522_part3.jpg', 'size': 66303, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 530, 'y': 151, 'width': 98, 'height': 66}, 'region_attributes': {'Defects': 'PCB Exposed Copper'}}], 'file_attributes': {}}
{'filename': '22376P807440_162604_Vision5_part3.jpg', 'size': 80951, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 373, 'y': 260, 'width': 33, 'height': 39}, 'region_attributes': {'Defects': 'PCB contamination'}}], 'file_attributes': {}}
{'filename': '22483T808354_043712_Vision5_part1.jpg', 'size': 141642, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 645, 'y': 373, 'width': 124, 'height': 155}, 'region_attributes': {'Defects': 'PCB scratches'}}, {'shape_attributes': {'name': 'rect', 'x': 410, 'y': 600, 'width

[32m2024-08-01 17:06:04.584[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: PCB Exposed Copper, Goal: 500, Images to augment: 399[0m
[32m2024-08-01 17:06:04.587[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for PCB Exposed Copper created. [0m


23200L802621_225018_part2.jpg
[[512, 432, 19, 21, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[512, 432, 19, 21, 'PCB Exposed Copper']]
after_cv [(324.0, 332.0, 19.0, 21.0, 'PCB Exposed Copper')]
24170V800329_024342_part1.jpg
[[488, 355, 19, 29, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[488, 355, 19, 29, 'PCB Exposed Copper']]
after_cv [(195.0, 286.0, 19.0, 29.0, 'PCB Exposed Copper')]
24230X807329_013920_part3.jpg
[[670, 468, 24, 24, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[670, 468, 24, 24, 'PCB Exposed Copper']]
after_cv [(514.0, 282.0, 24.0, 24.0, 'PCB Exposed Copper')]
234222811569_202340_A5_part2.jpg
[[49, 484, 19, 18, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[49, 484, 19, 18, 'PCB Exposed Copper']]
after_cv [(528.0, 15.000000000000004, 19.0, 17.999999999999996, 'PCB Exposed Copper')]
24131E804525_150837_part1.jpg
[[204, 514, 28, 31, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[204, 514, 28, 31, 'PCB Exposed Copper']]
after_cv [(141.99999999999997, 133.0,

[32m2024-08-01 17:06:12.121[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: PCB contamination, Goal: 500, Images to augment: 445[0m
[32m2024-08-01 17:06:12.125[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for PCB contamination created. [0m


['PCB Exposed Copper']
[[305, 27, 56, 26, 'PCB Exposed Copper']]
after_cv [(314.0, 13.0, 56.0, 26.0, 'PCB Exposed Copper')]
23353M803164_224555_part3.jpg
[[248, 381, 25, 19, 'PCB Exposed Copper']]
['PCB Exposed Copper']
[[248, 381, 25, 19, 'PCB Exposed Copper']]
after_cv [(76.0, 18.0, 25.000000000000043, 19.0, 'PCB Exposed Copper')]
233819801800_152542_part1.jpg
[[622, 266, 103, 113, 'PCB contamination']]
['PCB contamination']
[[622, 266, 103, 113, 'PCB contamination']]
after_cv [(32.00000000000003, 168.0, 102.99999999999994, 113.0, 'PCB contamination')]
23372H800994_235709_part1.jpg
[[110, 260, 647, 104, 'PCB contamination']]
['PCB contamination']
[[110, 260, 647, 104, 'PCB contamination']]
after_cv [(13.0, 22.0, 647.0, 104.0, 'PCB contamination')]
23372H801152_223717_part3.jpg
[[516, 319, 45, 151, 'PCB contamination']]
['PCB contamination']
[[516, 319, 45, 151, 'PCB contamination']]
after_cv [(194.0, 130.0, 44.99999999999997, 151.0, 'PCB contamination')]
23381E807295_125744_part3.jpg

[32m2024-08-01 17:06:20.592[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: PCB scratches, Goal: 500, Images to augment: 187[0m
[32m2024-08-01 17:06:20.596[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for PCB scratches created. [0m


['PCB contamination']
[[605, 550, 53, 41, 'PCB contamination']]
after_cv [(115.0, 1.0, 53.00000000000003, 41.0, 'PCB contamination')]
23070D803595_090440_part2.jpg
[[571, 183, 34, 51, 'PCB contamination']]
['PCB contamination']
[[571, 183, 34, 51, 'PCB contamination']]
after_cv [(60.99999999999999, 100.99999999999999, 34.00000000000002, 51.000000000000014, 'PCB contamination')]
23141E803134_025440_Vision5_part1.jpg
[[605, 550, 53, 41, 'PCB contamination']]
['PCB contamination']
[[605, 550, 53, 41, 'PCB contamination']]
after_cv [(150.0, 3.000000000000007, 53.00000000000003, 40.99999999999998, 'PCB contamination')]
23372H800994_235709_part1.jpg
[[110, 260, 647, 104, 'PCB contamination']]
['PCB contamination']
[[110, 260, 647, 104, 'PCB contamination']]
after_cv [(3.0000000000000124, 82.0, 647.0, 104.0, 'PCB contamination')]
233432803423_223743_part3.jpg
[[399, 265, 212, 34, 'PCB scratches']]
['PCB scratches']
[[399, 265, 212, 34, 'PCB scratches']]
after_cv [(70.00000000000001, 129.0, 21

[32m2024-08-01 17:06:23.274[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: Pass, Goal: 500, Images to augment: 372[0m


after_cv [(345.99999999999994, 370.0, 185.00000000000006, 239.0, 'PCB scratches'), (30.00000000000002, 42.0, 655.0, 569.0, 'PCB scratches')]
23494G809608_024008_part3.jpg
[[475, 189, 69, 98, 'PCB scratches']]
['PCB scratches']
[[475, 189, 69, 98, 'PCB scratches']]
after_cv [(201.0, 191.0, 69.0, 98.0, 'PCB scratches')]
242310801017_222220_part1.jpg
[[246, 589, 124, 23, 'PCB scratches']]
['PCB scratches']
[[246, 589, 124, 23, 'PCB scratches']]
after_cv [(235.00000000000003, 49.0, 123.99999999999997, 23.0, 'PCB scratches')]
23355M805832_105819_part2.jpg
[[116, 40, 688, 560, 'PCB scratches'], [114, 41, 86, 271, 'PCB scratches'], [677, 316, 78, 255, 'PCB scratches']]
['PCB scratches', 'PCB scratches', 'PCB scratches']
[[116, 40, 688, 560, 'PCB scratches'], [114, 41, 86, 271, 'PCB scratches'], [677, 316, 78, 255, 'PCB scratches']]
after_cv [(3.000000000000036, 35.0, 688.0, 560.0, 'PCB scratches'), (607.0, 36.0, 86.0, 271.0, 'PCB scratches'), (52.00000000000004, 311.0, 77.99999999999993, 255.

[32m2024-08-01 17:06:23.277[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for Pass created. [0m


23026S801929_111757_part1.jpg
[[653, 23, 91, 151, 'PCB scratches'], [388, 550, 26, 25, 'Pass'], [654, 22, 32, 152, 'PCB scratches']]
['PCB scratches', 'Pass', 'PCB scratches']
[[388, 550, 26, 25, 'Pass']]
after_cv [(50.0, 33.00000000000002, 26.0, 24.999999999999964, 'Pass')]
23302V802588_033836_part2.jpg
[[116, 163, 87, 71, 'Pass'], [397, 23, 25, 24, 'Pass'], [399, 498, 19, 26, 'Pass'], [346, 72, 43, 43, 'Pass']]
['Pass', 'Pass', 'Pass', 'Pass']
[[116, 163, 87, 71, 'Pass'], [397, 23, 25, 24, 'Pass'], [399, 498, 19, 26, 'Pass'], [346, 72, 43, 43, 'Pass']]
after_cv [(86.0, 161.0, 87.0, 71.0, 'Pass'), (367.0, 21.0, 24.999999999999943, 24.0, 'Pass'), (369.0, 495.99999999999994, 19.000000000000057, 26.000000000000057, 'Pass'), (316.0, 70.0, 43.0, 43.000000000000014, 'Pass')]
23120Q801593_082838_part2.jpg
[[99, 167, 81, 71, 'Pass'], [395, 23, 29, 26, 'Pass'], [405, 491, 27, 30, 'Pass'], [344, 77, 43, 44, 'Pass']]
['Pass', 'Pass', 'Pass', 'Pass']
[[99, 167, 81, 71, 'Pass'], [395, 23, 29, 26, 