In [1]:
import os
import glob
import napari
import numpy as np
from PIL import Image
from matplotlib import image
from matplotlib import pyplot as plt 
from tifffile import imread, imwrite
from sklearn.model_selection import train_test_split

In [None]:
folder  = 'nuclie00'
training_imgs,training_masks = [], []

for file in glob.glob("mem/images/*.jpg"):
    training_imgs.append(file)
for file in glob.glob("mem/masks/*.tiff"):
    training_masks.append(file)
    
training_imgs = sorted(training_imgs)
training_masks = sorted(training_masks)


train_imgs = []
for j in range(len(training_imgs)):
    img = Image.open(training_imgs[j])
   
    img.load()
    img = np.array(img, dtype='float32')
    train_imgs.append(img)
 
train_imgs = np.array(train_imgs)
print(train_imgs.shape, train_imgs.dtype)

train_masks = []
for j in range(len(training_masks)):
    img = Image.open(training_masks[j])
    
    img.load()
    img = np.array(img, dtype='uint8')
    train_masks.append(img)
 
train_masks = np.array(train_masks)
print(train_masks.shape, train_masks.dtype)

viewer = napari.view_image(train_imgs, colormap='gray', name='nucl1')
viewer.add_labels(train_masks, name='mask_final')

In [7]:
training_imgs = sorted(training_imgs)
training_masks = sorted(training_masks)

In [8]:
training_imgs, testing_imgs, training_masks, testing_masks = train_test_split(training_imgs, training_masks, test_size = 0.1, random_state = 0)

In [None]:
print('Num training imgs:  ', len(training_imgs))
print('Num training masks: ', len(training_masks))
print('Num testing imgs:   ', len(testing_imgs))
print('Num testing masks:  ', len(testing_masks))

In [4]:
def flip_labels(img):
    """
    For mask images with inverted nucleous and chromosome labels 
    """
    #ind = (img==1)
    #img[ind] = 3
    
    ind_1,ind_2 = (img==1),(img==2)
    img[ind_1] = 2
    img[ind_2] = 1
    
    return img

In [10]:
def add_label(mask, img, limit=0):
    """
    Adds a fourth layer for background that is outside of cell.
    
    Any pixel value below the 'limit' thresholding value is sent to class it's own class, 
    preserving the class of nucleous background pixels.
    """

    ind = (img <= limit)
    mask[ind] = -1
    
    mask = mask + 1
    
    return mask

### Create multiple training sets of images in form of numpy arrays

- train_imgs preserves original dimensions
- train_imgs2 decreases resolution by a factor of 2
- train_imgs4 decreases resolution by a factor of 4

In [None]:
train_imgs = []
train_imgs2 = []
train_imgs4 = []

for j in range(len(training_imgs)):
    img = Image.open(training_imgs[j])
    img = img.resize((1700,1700))
    img2 = img.resize((1000,1000))
    img4 = img.resize((500,500))
    img.load()
    img2.load()
    img4.load()
    
    img = np.array(img, dtype='float32')
    img2 = np.array(img2, dtype='float32')
    img4 = np.array(img4, dtype='float32')
    train_imgs.append(img)
    train_imgs2.append(img2)
    train_imgs4.append(img4)

train_imgs = np.array(train_imgs)
train_imgs2 = np.array(train_imgs2)
train_imgs4 = np.array(train_imgs4)

print(train_imgs.shape, train_imgs.dtype)
print(train_imgs2.shape, train_imgs2.dtype)
print(train_imgs4.shape, train_imgs4.dtype)

In [None]:
viewer = napari.view_image(train_imgs, colormap='gray', name='nucl1')
viewer.add_labels(train_masks, name='mask_final')

### Create multiple testing sets of images in form of numpy arrays

As above, three sets are created with varying resolution

In [None]:
test_imgs = []
test_imgs2 = []
test_imgs4 = []

for j in range(len(testing_imgs)):
    img = Image.open(testing_imgs[j])
    img = img.resize((1700,1700))
    img2 = img.resize((1000,1000))
    img4 = img.resize((500,500))

    img.load()
    img2.load()
    img4.load()
    
    img = np.array(img, dtype='float32')
    img2 = np.array(img2, dtype='float32')
    img4 = np.array(img4, dtype='float32')

    test_imgs.append(img)
    test_imgs2.append(img2)
    test_imgs4.append(img4)
    
test_imgs = np.array(test_imgs)
test_imgs2 = np.array(test_imgs2)
test_imgs4 = np.array(test_imgs4)

print(test_imgs.shape, test_imgs.dtype)
print(test_imgs2.shape, test_imgs2.dtype)
print(test_imgs4.shape, test_imgs4.dtype)

In [None]:
# These are testing images with no masks

test_noGT = []
test_noGT2 = []
test_noGT4 = []

for j in range(len(testing_noGT)):
    img = Image.open(testing_noGT[j])
    
    img2 = img.resize((1000,1000))
    img4 = img.resize((500,500))

    img.load()
    img2.load()
    img4.load()
    
    img = np.array(img, dtype='float32')
    img2 = np.array(img2, dtype='float32')
    img4 = np.array(img4, dtype='float32')

    test_noGT.append(img)
    test_noGT2.append(img2)
    test_noGT4.append(img4)
    
test_noGT = np.array(test_noGT)
test_noGT2 = np.array(test_noGT2)
test_noGT4 = np.array(test_noGT4)

print(test_noGT.shape, test_noGT.dtype)
print(test_noGT2.shape, test_noGT2.dtype)
print(test_noGT4.shape, test_noGT4.dtype)

### Creates multiple training sets of image masks 

Notes: 
- all masks have additional class added non-cell background (lines 29-31)

In [None]:
train_masks = []
train_masks2 = []
train_masks4 = []

limit=150

for j in range(len(training_masks)):
    img = Image.open(training_masks[j])
    

    img = img.resize((1700,1700))    
    img2 = img.resize((1000,1000))
    img4 = img.resize((500,500))

    img.load()
    img2.load()
    img4.load()
    
    img = np.array(img, dtype='uint8')
    img2 = np.array(img2, dtype='uint8')
    img4 = np.array(img4, dtype='uint8')
    
    # correct any flipped labels
    # if j in [6,24,27,30,39]:
    #     img = flip_labels(img)
    #     img2 = flip_labels(img2)
    #     img4 = flip_labels(img4)
    # #   continue

    img = add_label(img, train_imgs[j,:], limit=limit)
    img2 = add_label(img2, train_imgs2[j,:], limit=limit)
    img4 = add_label(img4, train_imgs4[j,:], limit=limit)
                    
    train_masks.append(img)
    train_masks2.append(img2)
    train_masks4.append(img4)

train_masks = np.array(train_masks)
train_masks2 = np.array(train_masks2)
train_masks4 = np.array(train_masks4)

print(train_masks.shape, train_masks.dtype)
print(train_masks2.shape, train_masks2.dtype)
print(train_masks4.shape, train_masks4.dtype)

In [None]:
viewer = napari.view_image(train_imgs, colormap='gray', name='nucl1')
viewer.add_labels(train_masks, name='mask_final')

### Creates multiple testing sets of image masks

Notes:
- masks 23 and 59 had inverted labels (this is corrected with flip_labels()
- all masks have additional class added non-cell background (lines 29-31)

In [None]:
test_masks = []
test_masks2 = []
test_masks4 = []

limit=150

for j in range(len(testing_masks)):
    img = Image.open(testing_masks[j])
    
    img = img.resize((1700,1700))
    img2 = img.resize((1000,1000))
    img4 = img.resize((500,500))

    img.load()
    img2.load()
    img4.load()
    
    img = np.array(img, dtype='uint8')
    img2 = np.array(img2, dtype='uint8')
    img4 = np.array(img4, dtype='uint8')
    
    img = add_label(img, test_imgs[j,:], limit=limit)
    img2 = add_label(img2, test_imgs2[j,:], limit=limit)
    img4 = add_label(img4, test_imgs4[j,:], limit=limit)
                    
    test_masks.append(img)
    test_masks2.append(img2)
    test_masks4.append(img4)

test_masks = np.array(test_masks)
test_masks2 = np.array(test_masks2)
test_masks4 = np.array(test_masks4)

print(test_masks.shape, test_masks.dtype)
print(test_masks2.shape, test_masks2.dtype)
print(test_masks4.shape, test_masks4.dtype)

### We verify the number of labels in the masks has increased to 4

In [None]:
print('Num of unique train mask values: ', len(np.unique(train_masks[0:3,...])))
print(np.unique(train_masks[0:3,...]))

In [None]:
print('Num of unique test mask values: ', len(np.unique(test_masks[0:13,...])))
print(np.unique(test_masks[0:13,...]))

### This histogram justifies the background threshold value of limit=150

In [None]:
for j in range(1):
    
    plt.figure(figsize=(12,5))
    plt.rcParams.update({'font.size': 14})
    plt.hist((train_imgs[j,:,:].flatten()), bins=20); plt.title('Histogram'); 
    plt.xlabel('Frequency')
    plt.ylabel('Pixel intensity')
    plt.show()
    

### Options for viewing data

In [41]:
viewer = napari.view_image(test_imgs, colormap='gray', name='nucl1')
viewer.add_labels(test_masks, name='mask_final')

<Labels layer 'mask_final' at 0x7f0345ce1fa0>

In [None]:
plt.figure(figsize=(10,4))
plt.rcParams.update({'font.size': 16})

plt.imshow(test_masks[1,6:,:])

plt.tight_layout()
plt.show()

### Finally, we save data in in a path consistent with the train_and_segment script

In [38]:
resultpath = 'training_processed_images_mem/raw'
if os.path.isdir(resultpath) is False:
    os.mkdir(resultpath)
imwrite(resultpath + '/train.tif', train_imgs)
imwrite(resultpath + '/masks.tif', train_masks)
imwrite(resultpath + '/test.tif', test_imgs)
imwrite(resultpath + '/test_masks.tif', test_masks)

resultpath = 'training_processed_images_mem/factor2'
if os.path.isdir(resultpath) is False:
    os.mkdir(resultpath)
imwrite(resultpath + '/train.tif', train_imgs2)
imwrite(resultpath + '/masks.tif', train_masks2)
imwrite(resultpath + '/test.tif', test_imgs2)
imwrite(resultpath + '/test_masks.tif', test_masks2)

resultpath = 'training_processed_images_mem/factor4'
if os.path.isdir(resultpath) is False:
    os.mkdir(resultpath)
imwrite(resultpath + '/train.tif', train_imgs4)
imwrite(resultpath + '/masks.tif', train_masks4)
imwrite(resultpath + '/test.tif', test_imgs4)
imwrite(resultpath + '/test_masks.tif', test_masks4)