In [None]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import scipy.ndimage as nd
from pathlib import Path
import os
from itertools import chain
from math import pi, sqrt, cos, sin
import random
from detectron2.structures import BoxMode
import json

In [None]:
data_path = Path("../data/synthetic")
dataset_size = 5000

In [None]:
if not data_path.exists():
    data_path.mkdir(parents=True, exist_ok=True)
    (data_path / "train").mkdir(parents=True, exist_ok=True)
    (data_path / "test").mkdir(parents=True, exist_ok=True)

for file in chain(data_path.glob("*/*")):
    os.remove(file)

In [None]:
def draw_brain(img, noise=25):
    brain_center = np.random.randint(150,400,2)
    brain_size = np.random.randint(70, 100)

    brain = np.zeros(img.shape, np.uint8)
    cv.circle(brain, brain_center, brain_size, (128,128,128), -1)
    brain = np.where(brain > 0, np.random.normal(brain, noise).astype(int), 0)

    mask = brain > 0
    img[mask] = brain[mask]

    return np.append(brain_center, brain_size) # (x,y,r)

def draw_tumor(img, brain, noise=50):
    # find center within brain
    ang = random.uniform(0, 1) * 2 * pi
    hyp = sqrt(random.uniform(0, 1)) * (brain[2] - 10)
    tumor_center = [int(brain[0]+(cos(ang)) * hyp), int(brain[1]+(sin(ang) * hyp))]

    # find size within brain
    max_dist = brain[2] - sqrt((brain[0] - tumor_center[0])**2 + (brain[1] - tumor_center[1])**2)
    tumor_size = np.random.randint(5, min(max_dist, 16))

    tumor = np.zeros(img.shape, np.uint8)
    cv.circle(tumor, tumor_center, tumor_size, (0,255,0), -1)
    tumor = np.where(tumor > 0, np.random.normal(tumor, noise).astype(int), 0)

    mask = tumor > 0
    img[mask] = tumor[mask]

    return np.append(tumor_center, tumor_size) # (x,y,r)

def make_entry(dir, pt, positive, img_h=512, img_w=512, channels=3):
    pt = f"Cmet{pt:04}"

    offset = np.random.randint(-50,50)
    img_h += offset
    img_w += offset

    img = np.zeros((img_h, img_w, channels),np.uint8)
    brain_info = draw_brain(img)

    annotations = []

    if (np.random.randint(10) < (10*positive)):
        tumor_info = draw_tumor(img, brain_info)
        tumor_info[0] -= tumor_info[2]
        tumor_info[1] -= tumor_info[2]
        tumor_info[2] *= 2

        annotations.append(
            {
                "bbox": np.append(tumor_info, tumor_info[-1]).tolist(),
                "bbox_mode": BoxMode.XYWH_ABS,
                "category_id": 0
            }
        )

    data = {
        "file_name": f"{pt}.npy",
        "image_id": str(pt),
        "height": img_h,
        "width": img_w,
        "annotations": annotations
    }
    
    np.save(dir / pt, img)

    return data

def create_dataset(size, test_split=0.3, positive=0.1):
    train_split = 1.0 - test_split
    assert(train_split > 0)

    train_records = []
    test_records = []

    for num in range(size):
        if np.random.randint(100) < (float(100)*test_split):
            test_records.append(make_entry(data_path / "test", num, positive))
        else:
            train_records.append(make_entry(data_path / "train", num, positive))
    
    with open(data_path / "train/dataset.json", "w") as json_file:
        json_data = json.dumps(train_records, indent=4)
        json_file.write(json_data)

    with open(data_path / "test/dataset.json", "w") as json_file:
        json_data = json.dumps(test_records, indent=4)
        json_file.write(json_data)

create_dataset(dataset_size)

In [None]:
def show_info(pt):
    img = np.load(pt)

    plt.figure()
    plt.imshow(img)

limit = 0
for pt in data_path.glob("train/*"):
    if limit == 0:
        break
    limit -= 1

    show_info(str(pt))