## Get data ready to convert

In [1]:
# imports

import random
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
# read a folder of images and turn them into an array

def read_images_to_array(folder_path):

  image_array = []
  # Get a sorted list of filenames
  filenames = sorted(os.listdir(folder_path))
  for filename in filenames:
    if filename.endswith(".jpg") or filename.endswith(".png"):
      img_path = os.path.join(folder_path, filename)
      img = cv2.imread(img_path)

      if img is not None:
        image_array.append(img)

  return image_array

def split_images(image_array): 

    red_region_images = []
    raw_images = [] 

    for image in image_array:
        if image[25,100].sum() == 255*3 :
            red_region_images.append(image)
        else: 
            raw_images.append(image) 
            
    return red_region_images, raw_images

def crop_raw_images(image_array): 
    
    cropped_images = [] 
    
    for i in range(len(image_array)): 
        
        image = image_array[i]
        
        mask = np.zeros(image.shape, dtype=np.uint8)
        mask = cv2.circle(mask, (320, 240), 200, (255,255,255), -1)

        res = cv2.bitwise_and(image, mask)
        res[mask==0] = 255
        
        cropped_images.append(res)

    return cropped_images

def crop_masks(image_array):
    cropped_masks = []

    for image in image_array:
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        mask = cv2.circle(mask, (250, 257), 135, (255, 255, 255), -1)

        if image.shape[:2] != mask.shape:
            raise ValueError("Image and mask dimensions do not match.")

        res = cv2.bitwise_and(image, image, mask=mask)
        res[mask == 0] = 255

        cropped_masks.append(res)

    return cropped_masks

def create_binary_masks(image_array):
    binary_masks = []
    
    for image in image_array:
        # Ensure image is in BGR format (convert if necessary)
        if image.ndim == 2:
            # Convert grayscale to BGR color (assuming gray image)
            image_color = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        elif image.shape[2] != 3:
            raise ValueError("Input image must have 3 channels (BGR format).")
        else:
            image_color = image
        
        # Convert BGR to HSV
        hsv = cv2.cvtColor(image_color, cv2.COLOR_BGR2HSV)

        # Define lower and upper bounds for red color in HSV
        lower_red = np.array([0, 150, 115])
        upper_red = np.array([255, 255, 255])

        # Create mask using inRange function
        mask = cv2.inRange(hsv, lower_red, upper_red)

        # Apply bitwise AND operation using color image
        res = cv2.bitwise_and(image_color, image_color, mask=mask)
        
        binary_masks.append(mask)
        
    return binary_masks

def create_blank_masks(image_array): 
    masks = []
    for i in range(len(image_array) - 1): 
        mask = np.zeros((256, 256), dtype=np.uint8)  # Create a 2D array filled with zeros
        masks.append(mask)
    return masks

def zoom_at(image_array, zoom, coord=None):
    
    zoomed_array = []
    
    for img in image_array: 
        

        # Translate to zoomed coordinates
        h, w, _ = [ zoom * i for i in img.shape ]

        if coord is None: cx, cy = w/2, h/2
        else: cx, cy = [ zoom*c for c in coord ]

        img = cv2.resize( img, (0, 0), fx=zoom, fy=zoom)
        img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
                   int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5)),
                   : ]
        zoomed_array.append(img)
    
    return zoomed_array

def double_crop(image_array): 
    
    cropped_images = []
    
    for image in image_array: 
        
        # this is crop for each side (totalCrop/2) 
        cropped_image = image[0:515, 70:570]
        
        cropped_images.append(cropped_image)
    
    return cropped_images

def add_padding(image_array): 
    
    padded_images = []
    
    for image in image_array: 

        padded_image = cv2.copyMakeBorder(
            image,
            17,
            17,
            0,
            0,
            cv2.BORDER_CONSTANT,
            value=(255,255,255)
        )
        
        padded_images.append(padded_image)
        
    return padded_images


In [3]:
# function to resize images

def crop_images(image_array): 
    
    cropped_images = []
    
    for i in range(len(image_array) -1): 
        
        image = image_array[i]
        
        image_height, image_width = image.shape[:2]
        
        # Bounding box dimensions
        box_width, box_height = 256, 256

        x_top_left = (image_width - box_width) // 2
        y_top_left = (image_height - box_height) // 2
        x_bottom_right = x_top_left + box_width
        y_bottom_right = y_top_left + box_height
        
        cropped_image = image[y_top_left:y_bottom_right, x_top_left:x_bottom_right]
        cropped_images.append(cropped_image)
                              
    return cropped_images 

In [4]:
# function to create blank masks 

def create_blank_masks(image_array): 
    masks = []
    for i in range(len(image_array)): 
        mask = np.zeros((256, 256), dtype=np.uint8)  # Create a 2D array filled with zeros
        masks.append(mask)
    return masks

In [5]:
# get images
folder_path_training = './data/training_images'
train_images = read_images_to_array(folder_path_training)
folder_path_val = './data/val_images'
val_images = read_images_to_array(folder_path_val)
folder_path_empty = './data/images_wo_tumor'
ntumor_images = read_images_to_array(folder_path_empty)

# split them 
train_masks, train_raw = split_images(train_images)
val_masks, val_raw = split_images(val_images) 

# crop each one out 
train_raw = crop_raw_images(train_raw) 
val_raw = crop_raw_images(val_raw) 
train_masks = crop_masks(train_masks)
val_masks = crop_masks(val_masks) 

# create masks for images with no tumors 
ntumor_masks = create_blank_masks(ntumor_images) 

# zoom masks
train_masks = zoom_at(train_masks, 1.4, coord=None)
val_masks = zoom_at(val_masks, 1.4, coord=None)

# create binary masks
train_masks = create_binary_masks(train_masks)
val_masks = create_binary_masks(val_masks) 

# crop raw images 
train_raw = double_crop(train_raw)
val_raw = double_crop(val_raw) 

# add padding to raw images 
train_raw = add_padding(train_raw) 
val_raw = add_padding(val_raw) 

## Now convert the binary masks to coco json

### Only run this once

In [6]:
def save_images(image_array, folder_path, base_filename='image'):
    
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Iterate through the image array and save each image
    for idx, img in enumerate(image_array):
        # Construct the filename
        filename = f"{base_filename}_{idx+1}.png"
        file_path = os.path.join(folder_path, filename)
        
        # Save the image
        cv2.imwrite(file_path, img)

    print(f"Images have been saved to {folder_path}")

In [7]:
save_images(train_masks, './data/coco_json/masks/Tumor', base_filename='image')

Images have been saved to ./data/coco_json/masks/Tumor


In [8]:
save_images(train_raw, './data/coco_json/images', base_filename='image')

Images have been saved to ./data/coco_json/images


In [9]:
# imports

import os
import json
import os
import cv2
import glob

category_ids = {
    "Tumor": 0
}

MASK_EXT = 'png'
ORIGINAL_EXT = 'png'
image_id = 0
annotation_id = 0

In [10]:
def images_annotations_info(maskpath):
    global image_id, annotation_id
    annotations = []
    images = []

    for category in category_ids.keys():
        for mask_image in glob.glob(os.path.join(maskpath, category, f'*.{MASK_EXT}')):
            original_file_name = f'{os.path.basename(mask_image).split(".")[0]}.{ORIGINAL_EXT}'
            mask_image_open = cv2.imread(mask_image)
            
            height, width, _ = mask_image_open.shape

            if original_file_name not in map(lambda img: img['file_name'], images):
                image = {
                    "id": image_id + 1,
                    "width": width,
                    "height": height,
                    "file_name": original_file_name,
                }
                images.append(image)
                image_id += 1
            else:
                image = [element for element in images if element['file_name'] == original_file_name][0]

            gray = cv2.cvtColor(mask_image_open, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            contours = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[0]

            for contour in contours:
                bbox = cv2.boundingRect(contour)
                area = cv2.contourArea(contour)
                segmentation = contour.flatten().tolist()

                annotation = {
                    "iscrowd": 0,
                    "id": annotation_id,
                    "image_id": image['id'],
                    "category_id": category_ids[category],
                    "bbox": bbox,
                    "area": area,
                    "segmentation": [segmentation],
                }

                if area > 0:
                    annotations.append(annotation)
                    annotation_id += 1

    return images, annotations, annotation_id

In [11]:
def process_masks(mask_path, dest_json):
    global image_id, annotation_id
    image_id = 0
    annotation_id = 0

    coco_format = {
        "info": {},
        "licenses": [],
        "images": [],
        "categories": [{"id": value, "name": key, "supercategory": key} for key, value in category_ids.items()],
        "annotations": [],
    }

    coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)

    with open(dest_json, "w") as outfile:
        json.dump(coco_format, outfile, sort_keys=True, indent=4)

    print("Created %d annotations for images in folder: %s" % (annotation_cnt, mask_path))

In [12]:
train_mask_path = "./data/coco_json/masks"
train_json_path = "./data/coco_json/images/train.json"
process_masks(train_mask_path, train_json_path)

Created 347 annotations for images in folder: ./data/coco_json/masks
