# Create Sample Images
A collection of n (e.g. 1000) images will be created for testing the crazy cost function. Each image will be a set that includes two cells approximated by a guassian, a label matrix, a penalty matrix, and a seed matrix.

The label matrix will approximate what is expected from annotations by a biologist. Watershed using the seed and fake data image will be used to create the label matrix. Therefore, the label matrix will "split" the labels when the two cells overlap as determined by watershed.

The penalty matrix is part of the crazy cost that penalizes segmentation solutions that merge (or split?) cells.

The placement of the cells will be chosen randomly, except the cells will not touch the border of the image.

In [None]:
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot

import numpy
import os
import pandas
import scipy.ndimage
import skimage.feature
import skimage.filters
import skimage.io
import skimage.morphology
import skimage.segmentation

# User variables
* n: the number of images to generate
* im_size: the number of pixels defining the length and width of the sample images
* cell_radius: the radius of the two sample cells

In [None]:
n = 1000
im_size = 64
cell_radius = 10

# Create a table defining the sample set
A table will be created that contains the centroid X and Y of each cell and another column with a boolean that indicates whether or not the cells will overlap. One use of this table will be to ensure we have a meaningful number of cells with overlap for testing.

The table will also contain path information to the images. Each image will be contained within a directory dedicated to the type of image.

In [None]:
def merge_check(xy_array):
    # xy_array =  ["cell1_x", "cell1_y", "cell2_x", "cell2_y"]
    is_merge_bool = False
    a = xy_array[:2]
    b = xy_array[2:]
    # find the distance between the centroids of each cell
    cen_dist = numpy.linalg.norm(a-b)
    if cen_dist <= 2*cell_radius:
        is_merge_bool = True
    return is_merge_bool, cen_dist

In [None]:
# rand_range ensures cells won't end up outside the image.
rand_range = im_size - 2*cell_radius
print(rand_range)

# columns = ["cell1_x", "cell1_y", "cell2_x", "cell2_y"]
XY_rand = numpy.random.randint(rand_range, size=(n, 4)) + cell_radius

In [None]:
matplotlib.pyplot.hist(XY_rand.flatten())
matplotlib.pyplot.show()

In [None]:
merge_check_array = numpy.apply_along_axis(merge_check, 1, XY_rand)
print("The percentage of merged cells: {}%".format(numpy.sum(merge_check_array[:,0])/n*100))

In [None]:
cwd = os.getcwd()
os.makedirs(os.path.join(cwd, "..", "images", "images_label"), exist_ok=True)
os.makedirs(os.path.join(cwd, "..", "images", "images_penalty"), exist_ok=True)
os.makedirs(os.path.join(cwd, "..", "images", "images_sample"), exist_ok=True)
os.makedirs(os.path.join(cwd, "..", "images", "images_seed"), exist_ok=True)

In [None]:
label_filenames = ["label_{0:0>3}.png".format(i) for i in range(n)]
penalty_filenames = ["penalty_{0:0>3}.png".format(i) for i in range(n)]
sample_filenames = ["sample_{0:0>3}.png".format(i) for i in range(n)]
seed1_filenames = ["seed1_{0:0>3}.png".format(i) for i in range(n)]
seed2_filenames = ["seed2_{0:0>3}.png".format(i) for i in range(n)]

In [None]:
df_dict = {
    "bool_merge" : merge_check_array[:,0],
    "cell1_x" : XY_rand[:,0],
    "cell1_y" : XY_rand[:,1],
    "cell2_x" : XY_rand[:,2],
    "cell2_y" : XY_rand[:,3],
    "centroid_distance" : merge_check_array[:,1],
    "filename_label" : label_filenames,
    "filename_penalty" : penalty_filenames,
    "filename_sample" : sample_filenames,
    "filename_seed1" : seed1_filenames,
    "filename_seed2" : seed2_filenames,
          }

In [None]:
df = pandas.DataFrame(df_dict)
df.to_csv(os.path.join(cwd, "..", "images", "sample_data.csv"))

# Create the seed images

In [None]:
def make_seed_images(df_series):
    img = numpy.zeros((im_size,im_size), dtype="bool_")
    img[df_series["cell1_y"], df_series["cell1_x"]] = 1
    filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed1"])
    skimage.io.imsave(filename, img)
    img = numpy.zeros((im_size,im_size), dtype="bool_")
    img[df_series["cell2_y"], df_series["cell2_x"]] = 1
    filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed2"])
    skimage.io.imsave(filename, img)
    return

In [None]:
_ = df.apply(make_seed_images, axis=1)

In [None]:
df_series = df.loc[0]
type(df_series)

In [None]:
img = numpy.zeros((im_size,im_size), dtype="bool_")
img[df_series["cell1_y"], df_series["cell1_x"]] = 1
img[df_series["cell2_y"], df_series["cell2_x"]] = 1
matplotlib.pyplot.imshow(img)

# Create the fake data

In [None]:
guassian_stamp = numpy.zeros((2*cell_radius+1, 2*cell_radius+1), dtype="uint8")
guassian_stamp[cell_radius,cell_radius] = 255
guassian_stamp = skimage.filters.gaussian(guassian_stamp, sigma = cell_radius)
guassian_stamp = guassian_stamp/numpy.max(guassian_stamp)*127
guassian_stamp.astype("uint8")
matplotlib.pyplot.imshow(guassian_stamp)
noise_floor = numpy.min(guassian_stamp) 

In [None]:
def make_sample_images(df_series):
    
    imgnoise = numpy.random.randint(noise_floor, size=(im_size, im_size))
    imgnoise2 = numpy.random.randint(noise_floor/2, size=(im_size, im_size))
    
    img1 = numpy.zeros((im_size,im_size), dtype="uint8")
    
    ys_start = df_series["cell1_y"] - cell_radius
    ys_end = df_series["cell1_y"] + cell_radius+1


    xs_start = df_series["cell1_x"] - cell_radius
    xs_end = df_series["cell1_x"] + cell_radius+1

    img1[ys_start:ys_end,xs_start:xs_end] = guassian_stamp
    imgnoise[ys_start:ys_end,xs_start:xs_end] = 0
    
    img2 = numpy.zeros((im_size,im_size), dtype="uint8")

    ys_start = df_series["cell2_y"] - cell_radius
    ys_end = df_series["cell2_y"] + cell_radius+1


    xs_start = df_series["cell2_x"] - cell_radius
    xs_end = df_series["cell2_x"] + cell_radius+1

    img2[ys_start:ys_end,xs_start:xs_end] = guassian_stamp
    imgnoise[ys_start:ys_end,xs_start:xs_end] = 0
      
    img = img1 + img2 + imgnoise + imgnoise2
    filename = os.path.join(cwd, "..", "images", "images_sample", df_series["filename_sample"])
    skimage.io.imsave(filename, img)
    return

In [None]:
_ = df.apply(make_sample_images, axis=1)

# Create the label matrices

In [None]:
filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed1"])
seed1_img = skimage.io.imread(filename)

filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed2"])
seed2_img = skimage.io.imread(filename)

markers = skimage.measure.label(seed1_img + seed2_img)

filename = os.path.join(cwd, "..", "images", "images_sample", df_series["filename_sample"])
sample_img = skimage.io.imread(filename)

thresh = skimage.filters.threshold_otsu(sample_img)
binary = sample_img > thresh

labels = skimage.segmentation.watershed(numpy.invert(sample_img), markers, mask=binary)


labels2 = make_label_image(df_series)

matplotlib.pyplot.imshow(labels2)


In [None]:
def make_label_image(df_series):
    filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed1"])
    seed1_img = skimage.io.imread(filename)
    
    filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_seed2"])
    seed2_img = skimage.io.imread(filename)
    
    markers = skimage.measure.label(seed1_img + seed2_img)
    
    filename = os.path.join(cwd, "..", "images", "images_sample", df_series["filename_sample"])
    sample_img = skimage.io.imread(filename)
    
    thresh = skimage.filters.threshold_otsu(sample_img)
    binary = sample_img > thresh
    
    labels = skimage.segmentation.watershed(numpy.invert(sample_img), markers, mask=binary)
        
    filename = os.path.join(cwd, "..", "images", "images_label", df_series["filename_label"])
    
    skimage.io.imsave(filename, labels)
    
    return

In [None]:
_ = df.apply(make_label_image, axis=1)

# Create pentaly matrices

In [None]:
filename = os.path.join(cwd, "..", "images", "images_label", df_series["filename_label"])
label_img = skimage.io.imread(filename)

cell1_bool_img = label_img == 1
cell2_bool_img = label_img == 2
matplotlib.pyplot.imshow(cell2_bool_img)

In [None]:
filename = os.path.join(cwd, "..", "images", "images_label", df_series["filename_label"])
label_img = skimage.io.imread(filename)

cell1_bool_img = label_img == 1
cell2_bool_img = label_img == 2

matplotlib.pyplot.imshow(label_img)

img = numpy.zeros((im_size,im_size,4), dtype="float")

img[cell1_bool_img] = 1
img[cell2_bool_img] = -1

filename = os.path.join(cwd, "..", "images", "images_penalty", df_series["filename_penalty"])
skimage.io.imsave(filename, img)

In [None]:
def make_penalty_image(df_series):
    filename = os.path.join(cwd, "..", "images", "images_seed", df_series["filename_label"])
    label_img = skimage.io.imread(filename)
    
    cell1_bool_img = label_img == 1
    cell2_bool_img = label_img == 2
    
    img = numpy.zeros((im_size,im_size), dtype="float")
    
    img[cell1_bool_img] = 1
    img[cell2_bool_img] = -1
    
    filename = os.path.join(cwd, "..", "images", "images_penalty", df_series["filename_penalty"])
    skimage.io.imsave(filename, img)
    return