In [1]:
import json
import cv2
import pandas as pd
from utils import extract_patch
from tqdm import tqdm
import shutil
import os
import numpy as np

# Step 1: Extract Coin Patches from the annotated images

The first step is to extract the data from the file. The annotated images are COCO format. The data is in the form of a dictionary with the following keys:
- `info`: Information about the dataset
- `licenses`: License information
- `images`: Image information
- `annotations`: Annotation information
- `categories`: Category information

We are interested in the `images` and `annotations` keys. The `images` key contains information about the images, such as the image ID, file name, and image size. The `annotations` key contains information about the annotations, such as the annotation ID, image ID, category ID, and bounding box coordinates.

For each annotation, we will extract the coin patch from the image using the bounding box coordinates.

The coin patches are then saved as separate images in a new directory. With the images, we create a `labels.csv` file that contains the file name, category ID, and bounding box coordinates for each coin patch.

The image size is 1400x1400 pixels, and the coin patch is centered in the image.

We have seen that the largest coin fits into an 860x860 pixel square. To allow for rotation augmentation, we used 1400x1400 pixel square to extract the coin patch.

The final augmented coin patch sizes saved are 900x900 pixels to allow for some margin.



In [2]:


def patches_from_coco(source,dest):
    if not os.path.exists(os.path.join(dest, "patches")):
        os.makedirs(os.path.join(dest, "patches"))
    else:
        shutil.rmtree(os.path.join(dest, "patches"))
        os.makedirs(os.path.join(dest, "patches"))

    # Load the data from the file
    data = json.load(open(os.path.join(source, "annotations.json")))

    id_to_label = { e["id"]:e["name"] for e in data["categories"]}
    id_to_label

    id_to_images = { e["id"]:e["file_name"] for e in data["images"]}

    annotations = data["annotations"]


    df_labels = pd.DataFrame(columns=["name","label", "image", "bbox"])
    for i,annotation in tqdm(list(enumerate(annotations))):
        image_id = annotation["image_id"]
        label_id = annotation["category_id"]
        bbox = annotation["bbox"] # "bbox": [x,y,width,height]
        label = id_to_label[label_id]
        image = id_to_images[image_id]
        image_name = image.split(".")[0].replace("_", ".")
        filepath = os.path.join(source, image)
        patch = extract_patch(filepath, bbox, size=1400)
        idx = str(i).zfill(3)
        cv2.imwrite(os.path.join(dest, "patches", f"{idx}.jpg"), patch)
        df_labels.loc[i] = [idx,label, image, bbox]
    df_labels.to_csv(os.path.join(dest, "patches", "labels.csv"), index=False)

print("Processing train")
patches_from_coco("data/train","processed_data/train")
print("Processing val")
patches_from_coco("data/val","processed_data/val")


Processing train


100%|██████████| 330/330 [00:24<00:00, 13.65it/s]


Processing val


100%|██████████| 53/53 [00:02<00:00, 18.55it/s]


# Step 2 : Put the patches in the ImageFolder format for PyTorch

We now save the images in the ImageFolder format, so that we can directly use the ImageFolder dataset from PyTorch to load the data

In [3]:
def patches_to_ImageFolder(src, dest):
    df = pd.read_csv(os.path.join(src,"labels.csv"))
    # empty the dest folder
    if os.path.exists(dest):
        shutil.rmtree(dest)
    # create the folder
    os.makedirs(dest, exist_ok=True)

    df["label"].unique()

    for label in tqdm(df["label"].unique()):
        os.makedirs(dest + label, exist_ok=True)
        for i, row in df[df["label"]==label].iterrows():
            idx = str(row["name"]).zfill(3)
            shutil.copy(src + idx + ".jpg", dest + label + "/" + idx + ".jpg")
print("Processing train")
patches_to_ImageFolder("processed_data/train/patches/", "processed_data/train/folder_dataset/")
print("Processing val")
patches_to_ImageFolder("processed_data/val/patches/", "processed_data/val/folder_dataset/")



Processing train


100%|██████████| 23/23 [00:00<00:00, 69.61it/s]


Processing val


100%|██████████| 18/18 [00:00<00:00, 504.26it/s]


# Step 3: Perform data augmentation on the images

We have seen that the data is not balanced in the dataset. We decided to perform data augmentation both for the balancing of the dataset and to increase its size.

We use the following transformations:
- Random translation, to simulate the coin being off-center
- Random rotation, to train the model on different orientations of the coin
- Random brightness, to simulate different lighting conditions
- Random contrast, to simulate different lighting conditions
- Random blur, to simulate different focus conditions

the code for the data augmentation is in the `utils.py` file


In [4]:
from utils import random_rotation, random_translation, random_brightness, random_contrast, random_blur, extend_patch, crop_around_center

def augment_patch(patch, angle_range, translation_range, brightness_range, contrast_range, blur_range):
    original_shape = patch.shape[:2]
    to_add = 2
    patch = extend_patch(patch, (original_shape[0]+to_add, original_shape[1]+to_add))
    patch = random_rotation(patch, angle_range)
    patch = random_translation(patch, translation_range)
    patch = random_brightness(patch, brightness_range)
    patch = random_contrast(patch, contrast_range)
    patch = random_blur(patch, blur_range)
    patch = patch[to_add//2:-to_add//2, to_add//2:-to_add//2]
    patch = crop_around_center(patch, 900)
    return patch
def perform_augmentation(source_folder,target_folder, target_number_per_class,augmentation_params):
    """
        source_folder: str, path to the folder containing the patches; each class should be in a separate folder
        target_folder: str, path to the folder where the augmented patches will be saved; each class will be in a separate folder
        target_number_per_class: int, total number of patches per class after augmentation
    """
    classes = os.listdir(source_folder)

    angle_range = augmentation_params["angle_range"]
    translation_range = augmentation_params["translation_range"]
    brightness_range = augmentation_params["brightness_range"]
    contrast_range = augmentation_params["contrast_range"]
    blur_range = augmentation_params["blur_range"]
    for class_name in tqdm(classes):
        os.makedirs(target_folder + class_name, exist_ok=True)
        patches = os.listdir(source_folder + class_name)
        patches = [p for p in patches if p.endswith(".jpg")]
        n = len(patches)
        for i in range(target_number_per_class):
            if i < n:
                patch_path = patches[i]
                patch = cv2.imread(source_folder + class_name + "/" + patch_path)
                patch = crop_around_center(patch, 900)
                idx = str(i).zfill(3)
                cv2.imwrite(target_folder + class_name + "/" + idx + ".jpg", patch)
            else:
                patch_path = np.random.choice(patches)
                patch = cv2.imread(source_folder + class_name + "/" + patch_path)
                patch = augment_patch(patch, angle_range, translation_range, brightness_range, contrast_range,blur_range)
                idx = str(i).zfill(3)
                cv2.imwrite(target_folder + class_name + "/" + idx + ".jpg", patch)




In [5]:
augmentation_params = {
    "angle_range":(0,360),
    "translation_range":(-20,20),
    "brightness_range":(-20,20),
    "contrast_range":(0.8,1.2),
    "blur_range":(0,10)
}

perform_augmentation("processed_data/train/folder_dataset/", "processed_data/train/augmented_dataset/", target_number_per_class=300, augmentation_params=augmentation_params)

100%|██████████| 23/23 [06:06<00:00, 15.92s/it]
