In [1]:
!pip install bbaug

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bbaug
  Downloading bbaug-0.4.2-py3-none-any.whl (13 kB)
Installing collected packages: bbaug
Successfully installed bbaug-0.4.2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

import random
import cv2
from matplotlib import pyplot as plt
import albumentations as A
import pandas as pd
import os
import json
import numpy as np
from PIL import Image as im
from copy import deepcopy
from bbaug.policies.policies import POLICY_TUPLE
from bbaug import policies

Mounted at /content/drive


In [3]:
aug_policy = policies.policies_v0()
print(aug_policy)
aug_policy1 = policies.policies_v1()
print(aug_policy1)
aug_policy2 = policies.policies_v2()
print(aug_policy2)
aug_policy3 = policies.policies_v3()
print(aug_policy3)

# instantiate the policy container with the selected policy set
# policy_container = policies.PolicyContainer(aug_policy)

# custom_policy = [
#     [POLICY_TUPLE('Rotate', 1.0, 10), POLICY_TUPLE('Sharpness', 1.0, 1), POLICY_TUPLE('Brightness', 1.0, 7)],
#     [POLICY_TUPLE('Brightness', 1.0, 9)],
#     [POLICY_TUPLE('Sharpness', 1.0, 1)]
# ]

custom_policy = [
    [POLICY_TUPLE('Brightness', 1.0, 9)],
    [POLICY_TUPLE('Sharpness', 1.0, 1)]
]

print(custom_policy)

policy_container = policies.PolicyContainer(custom_policy)

[[policy(name='Translate_X', probability=0.6, magnitude=4), policy(name='Equalize', probability=0.8, magnitude=10)], [policy(name='Translate_Y_BBox', probability=0.2, magnitude=2), policy(name='Cutout', probability=0.8, magnitude=8)], [policy(name='Sharpness', probability=0.0, magnitude=8), policy(name='Shear_X', probability=0.4, magnitude=0)], [policy(name='Shear_Y', probability=1.0, magnitude=2), policy(name='Translate_Y_BBox', probability=0.6, magnitude=6)], [policy(name='Rotate', probability=0.6, magnitude=10), policy(name='Color', probability=1.0, magnitude=6)]]
[[policy(name='Translate_X', probability=0.6, magnitude=4), policy(name='Equalize', probability=0.8, magnitude=10)], [policy(name='Translate_Y_BBox', probability=0.2, magnitude=2), policy(name='Cutout', probability=0.8, magnitude=8)], [policy(name='Sharpness', probability=0.0, magnitude=8), policy(name='Shear_X', probability=0.4, magnitude=0)], [policy(name='Shear_Y', probability=1.0, magnitude=2), policy(name='Translate_Y

In [4]:
def get_images_path(dataset_type):
  return "/content/drive/My Drive/thyroid_nodule_detection/Freehand to Bounding Box Conversion Fix/fixed_labels/cropped_dataset/dataset_after_augmented_2/" + dataset_type + "/cropped_images"

def get_labels_path(dataset_type):
  return "/content/drive/My Drive/thyroid_nodule_detection/Freehand to Bounding Box Conversion Fix/fixed_labels/cropped_dataset/dataset_after_augmented_2/" + dataset_type + "/cropped_labels.json"

In [5]:
def process_df(labels_df, dataset_type):
    cases = labels_df
    case_list = []
    for case_row in labels_df['cases']:
        image_id = case_row['case_id']
        bboxes = []
        for bbox in case_row['bboxes']:
          bbox_array = []
          xmin = bbox[0]
          ymin = bbox[1]
          xmax = bbox[0] + bbox[2]
          ymax = bbox[1] + bbox[3]
          bbox_array.append(xmin)
          bbox_array.append(ymin)
          bbox_array.append(xmax)
          bbox_array.append(ymax)
          bboxes.append(bbox_array)

        temp_case = {}
        temp_case['case_id'] = image_id
        temp_case['image'] = cv2.imread(os.path.join(get_images_path(dataset_type), image_id + ".jpg"), cv2.IMREAD_COLOR)
        temp_case['labels'] = case_row['labels']
        temp_case['bboxes'] = bboxes
        case_list.append(temp_case)
    return pd.DataFrame(case_list)

In [6]:
def augment_data(dataset_df):
  images_aug = []
  cases_aug = []
  # for index,row in df.iterrows():
  for ind in dataset_df.index:
    # select a random policy from the policy set
    policy = policy_container.select_random_policy() 
    #policy = custom_policy
    #img_aug, bbs_aug = policy_container.apply_augmentation(policy, row.get('image'), row.get('bboxes'), row.get('labels'))
    case_id = dataset_df['case_id'][ind] + "_aug"
    image = dataset_df['image'][ind]
    bbox_array = dataset_df['bboxes'][ind]
    label_array = dataset_df['labels'][ind]
    img_aug, bbs_aug = policy_container.apply_augmentation(policy, image, bbox_array, label_array)
    images_aug.append(img_aug)

    h, w, _ = image.shape
    augmented_bboxes = []
    for bbox in bbs_aug:
      if bbox[1] < 0:
        bbox[1] = 0
      if bbox[2] < 0:
        bbox[2] = 0
      if bbox[3] > w:
        bbox[3] = w
      if bbox[4] > h:
        bbox[4] = h
      converted_bbox = []
      converted_bbox.append(bbox[1])
      converted_bbox.append(bbox[2])
      converted_bbox.append(bbox[3])
      converted_bbox.append(bbox[4])
      augmented_bboxes.append(converted_bbox)
 
    case_aug =  {
        "case_id": case_id,
        "bboxes": augmented_bboxes,
        "labels": label_array
    }
    cases_aug.append(case_aug)

  labels_aug = {
      "cases": cases_aug
  }

  return images_aug, labels_aug;

In [7]:
def image_convert(image):
    image = image.clone().cpu().numpy()
    image = image.transpose((1, 2, 0))
    image = (image * 255).astype(np.uint8)
    return image

def plot_img(image, bbox):
    # vis0 = cv2.fromarray(image_array)
    # out = image_array[:, :, ::-1].copy()
    # out = cv2.cvtColor(image_array, cv2.)
    # image = image_convert(out)

    #image = np.ascontiguousarray(image_array)

    image_copy = deepcopy(image)
    
    for i in bbox:
        print(f'bbox in plot_img {i}')
        cv2.rectangle(image_copy, (int(i[0]), int(i[1])), (int(i[2]), int(i[3])), (0, 255, 0), thickness=2)
    plt.figure(figsize=(10, 10))
    plt.imshow(image_copy)
    plt.show()

In [8]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

def update_dataset_with_augmented_data(images, original_labels_df, new_labels_dict, dataset_type):
  for i in range(len(images)):
    case_id_aug = new_labels_dict.get('cases')[i].get('case_id')
    cv2.imwrite(get_images_path(dataset_type) + "/" + case_id_aug + ".jpg", images[i])

  for oc in original_labels_df['cases']:
    orig_case = {
        "case_id": oc['case_id'],
        "bboxes": oc['bboxes'],
        "labels": oc['labels']
    }
    new_labels_dict.get('cases').append(orig_case)

  with open(get_labels_path(dataset_type), "w") as labels_file:
    json.dump(new_labels_dict, labels_file, cls=NpEncoder)

In [11]:
original_test_labels_df = pd.read_json(get_labels_path('test'))
test_dataset_df = process_df(original_test_labels_df, 'test')
test_images_aug, test_labels_aug = augment_data(test_dataset_df)
update_dataset_with_augmented_data(test_images_aug, original_test_labels_df, test_labels_aug, 'test')

In [12]:
original_validation_labels_df = pd.read_json(get_labels_path('validation'))
validation_dataset_df = process_df(original_validation_labels_df, 'validation')
validation_images_aug, validation_labels_aug = augment_data(validation_dataset_df)
update_dataset_with_augmented_data(validation_images_aug, original_validation_labels_df, validation_labels_aug, 'validation')

In [13]:
original_train_labels_df = pd.read_json(get_labels_path('train'))
train_dataset_df = process_df(original_train_labels_df, 'train')
train_images_aug, train_labels_aug = augment_data(train_dataset_df)
update_dataset_with_augmented_data(train_images_aug, original_train_labels_df, train_labels_aug, 'train')

## **TEST AUGMENTATION - VISUALIZE ORIGINAL VS NEW**

In [None]:
images_aug, labels_aug = augment_data(test_dataset_df)

for i in range(8):
  original_bboxes = [] # in xmin, ymin, xmax, ymax format
  for bbox in test_dataset_df.get('bboxes').get(i):
    converted_bbox = []
    converted_bbox.append(bbox[0])
    converted_bbox.append(bbox[1])
    converted_bbox.append(bbox[0] + bbox[2])
    converted_bbox.append(bbox[1] + bbox[3])
    original_bboxes.append(converted_bbox)

  augmented_bboxes = [] # in xmin, ymin, xmax, ymax format
  for bbox in labels_aug.get('cases')[i].get('bboxes'):
    converted_bbox = []
    converted_bbox.append(bbox[0])
    converted_bbox.append(bbox[1])
    converted_bbox.append(bbox[0] + bbox[2])
    converted_bbox.append(bbox[1] + bbox[3])
    augmented_bboxes.append(converted_bbox)

  plot_img(test_dataset_df.get('image').get(i), original_bboxes)
  plot_img(images_aug[i], augmented_bboxes)

Output hidden; open in https://colab.research.google.com to view.