## Installing the necessary libraries

1. Setup

1.1 Import libraries

In [None]:
import tensorflow as tf
import json
import numpy as np
from matplotlib import pyplot as plt
import cv2

2.1  Splitting the Dataset into Test, Train and Validation Set

In [None]:
import os
import shutil

# Get the path to the data folder
data_dir = "data"

# Get the path to the images folder
images_dir = os.path.join(data_dir, "images")

# Get the list of all images in the images folder
images = os.listdir(images_dir)

# Split the images into train, test and validation sets
train_images = images[:int(len(images) * 0.6)]
test_images = images[int(len(images) * 0.6):int(len(images) * 0.8)]
validation_images = images[int(len(images) * 0.8):]

# Create the train, test and validation folders
train_dir = os.path.join(data_dir, "train")
test_dir = os.path.join(data_dir, "test")
validation_dir = os.path.join(data_dir, "validation")

# Create the train, test and validation subdirectories
for directory in [train_dir, test_dir, validation_dir]:
    if not os.path.exists(directory):
        os.mkdir(directory)

# Move the images to the train, test and validation folders
for image in train_images:
    shutil.move(os.path.join(images_dir, image), os.path.join(train_dir, image))

for image in test_images:
    shutil.move(os.path.join(images_dir, image), os.path.join(test_dir, image))

for image in validation_images:
    shutil.move(os.path.join(images_dir, image), os.path.join(validation_dir, image))

2.2 Move the Matching Labels

In [3]:
import os

# Get the path to the data folder
data_dir = "data"

# Get the list of all folders in the data folder
folders = os.listdir(data_dir)

# Loop over all folders
for folder in folders:

    # Get the path to the images folder in the current folder
    images_dir = os.path.join(data_dir, folder)

    # Get the list of all images in the images folder
    images = os.listdir(images_dir)

    # Loop over all images
    for image in images:

        # Get the path to the label for the current image
        label_path = os.path.join(data_dir, folder, "labels", image + ".json")

        # Check if the label exists
        if os.path.exists(label_path):

            # Get the name of the label
            label_name = os.path.basename(label_path)

            # Get the name of the image
            image_name = os.path.basename(image)

            # Check if the label name and image name are the same
            if label_name == image_name:

                # Move the label into the same folder as the image
                shutil.move(label_path, os.path.join(images_dir, label_name))


3. Apply Image Augmentation on Images and Labels using Albumentations

3.1 Setup Albumentations Transform Pipeline

In [4]:
import os
import shutil

# Get the path to the train folder
train_dir = "data/validation"
### We do the same for the test and train folders, just change names in the path above

# Create a new folder for images
images_dir = os.path.join(train_dir, "images")

# Create a new folder for labels
labels_dir = os.path.join(train_dir, "labels")

# Check if the images folder already exists
if not os.path.exists(images_dir):
    # Create the images folder
    os.mkdir(images_dir)

# Check if the labels folder already exists
if not os.path.exists(labels_dir):
    # Create the labels folder
    os.mkdir(labels_dir)

# Loop through all the files in the train folder
for file in os.listdir(train_dir):
    # Get the file extension
    file_ext = os.path.splitext(file)[1]

    # If the file extension is .jpg, move the file to the images folder
    if file_ext == ".jpg":
        shutil.move(os.path.join(train_dir, file), os.path.join(images_dir, file))

    # If the file extension is .json, move the file to the labels folder
    elif file_ext == ".json":
        shutil.move(os.path.join(train_dir, file), os.path.join(labels_dir, file))


3.1 Setup Albumentations Transform Pipeline

In [5]:
import albumentations as alb

In [6]:
augmentor = alb.Compose([alb.RandomCrop(width=450, height=450), 
                         alb.HorizontalFlip(p=0.5), 
                         alb.RandomBrightnessContrast(p=0.2),
                         alb.RandomGamma(p=0.2), 
                         alb.RGBShift(p=0.2), 
                         alb.VerticalFlip(p=0.5)], 
                       bbox_params=alb.BboxParams(format='albumentations', 
                                                  label_fields=['class_labels']))

3.2 Load a Test Image and Annotation with OpenCV and JSON

In [7]:
img = cv2.imread(os.path.join('data','train', 'images','2a08733595044c579f95d8cb8aaaf909.jpg'))

In [8]:
with open(os.path.join('data', 'train', 'labels', '2a08733595044c579f95d8cb8aaaf909.json'), 'r') as f:
    label = json.load(f)

In [9]:
label['shapes'][0]['points']

[[213.13725490196074, 178.97058823529414],
 [392.5490196078431, 446.61764705882354]]

3.3 Extract Coordinates and Rescale to Match Image Resolution

In [10]:
coords = [0,0,0,0]
coords[0] = label['shapes'][0]['points'][0][0]
coords[1] = label['shapes'][0]['points'][0][1]
coords[2] = label['shapes'][0]['points'][1][0]
coords[3] = label['shapes'][0]['points'][1][1]

In [11]:
coords

[213.13725490196074, 178.97058823529414, 392.5490196078431, 446.61764705882354]

In [None]:
coords = list(np.divide(coords, [640,480,640,480]))

In [None]:
coords

3.4 Apply Augmentations and View Results

In [12]:
augmented = augmentor(image=img, bboxes=[coords], class_labels=['face'])

ValueError: Expected x_min for bbox [213.13725490196074, 178.97058823529414, 392.5490196078431, 446.61764705882354, 'face'] to be in the range [0.0, 1.0], got 213.13725490196074.

In [13]:
augmented['bboxes'][0][2:]

NameError: name 'augmented' is not defined

In [None]:
augmented['bboxes']

In [None]:
cv2.rectangle(augmented['image'], 
              tuple(np.multiply(augmented['bboxes'][0][:2], [450,450]).astype(int)),
              tuple(np.multiply(augmented['bboxes'][0][2:], [450,450]).astype(int)), 
                    (255,0,0), 2)

plt.imshow(augmented['image'])

5. Build and Run Augmentation Pipeline

5.1 Run Augmentation Pipeline

In [None]:
for partition in ['train','test','validation']: 
    for image in os.listdir(os.path.join('data', partition, 'images')):
        img = cv2.imread(os.path.join('data', partition, 'images', image))

        coords = [0,0,0.00001,0.00001]
        label_path = os.path.join('data', partition, 'labels', f'{image.split(".")[0]}.json')
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label = json.load(f)

            coords[0] = label['shapes'][0]['points'][0][0]
            coords[1] = label['shapes'][0]['points'][0][1]
            coords[2] = label['shapes'][0]['points'][1][0]
            coords[3] = label['shapes'][0]['points'][1][1]
            coords = list(np.divide(coords, [640,480,640,480]))

        try: 
            for x in range(60):
                augmented = augmentor(image=img, bboxes=[coords], class_labels=['face'])
                cv2.imwrite(os.path.join('aug_data', partition, 'images', f'{image.split(".")[0]}.{x}.jpg'), augmented['image'])

                annotation = {}
                annotation['image'] = image

                if os.path.exists(label_path):
                    if len(augmented['bboxes']) == 0: 
                        annotation['bbox'] = [0,0,0,0]
                        annotation['class'] = 0 
                    else: 
                        annotation['bbox'] = augmented['bboxes'][0]
                        annotation['class'] = 1
                else: 
                    annotation['bbox'] = [0,0,0,0]
                    annotation['class'] = 0 


                with open(os.path.join('data', partition, 'labels', f'{image.split(".")[0]}.{x}.json'), 'w') as f:
                    json.dump(annotation, f)

        except Exception as e:
            print(e)