# Do prediction on a whole folder and create stacked Numpy files for each image

In [6]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import random
#import requests
from io import BytesIO
from PIL import Image
import numpy as np
import os
import cv2
from matplotlib.image import imread


Those are the relevant imports for the detection model

In [7]:
from maskrcnn_benchmark.config import cfg
pylab.rcParams['figure.figsize'] = 20, 12

# importing the prediction class
from predictor import NUCLEIdemo

# make sure that pytorch is installed correctly, check
# https://github.com/rusty1s/pytorch_geometric/issues/114
# for troubleshooting if CUDA errors occur

ImportError: libcudart.so.10.0: cannot open shared object file: No such file or directory

The NUCLEIdemo class can load the config file and does the image prediction.

In [9]:
configuration_file = "../configs/nuclei_1gpu_nonorm_offline_res50.yaml"

# update the config options with the config file
cfg.merge_from_file(configuration_file)

# manual override some options

cfg.merge_from_list(["MODEL.DEVICE", "cpu"])

# change dimensions of test images
cfg.merge_from_list(['INPUT.MAX_SIZE_TEST','2049'])

# change number of classes (classes + 1 for background)
cfg.merge_from_list(['MODEL.ROI_BOX_HEAD.NUM_CLASSES','4'])

# change normalization, here model was not normalized
cfg.merge_from_list(['INPUT.PIXEL_MEAN', [0., 0., 0.]])

# define model for prediction to use here
cfg.merge_from_list(['MODEL.WEIGHT', '/home/maxsen/DEEPL/model_final.pth'])
cfg.merge_from_list(['OUTPUT_DIR', '.'])

# show the configuration
print(cfg)

DATALOADER:
  ASPECT_RATIO_GROUPING: False
  NUM_WORKERS: 4
  SIZE_DIVISIBILITY: 32
DATASETS:
  TEST: ('coco_offline_augmented_test',)
  TRAIN: ('coco_offline_augmented_train', 'coco_offline_augmented_val')
INPUT:
  HEIGHT_IS_WIDTH: False
  MAX_SIZE_TEST: 2049
  MAX_SIZE_TRAIN: 1025
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (1024,)
  ONLINE_AUGMENT: False
  PIXEL_MEAN: [0.0, 0.0, 0.0]
  PIXEL_STD: [1.0, 1.0, 1.0]
  TO_BGR255: True
MODEL:
  BACKBONE:
    CONV_BODY: R-50-FPN
    FREEZE_CONV_BODY_AT: 2
    USE_GN: False
  CLS_AGNOSTIC_BBOX_REG: False
  DEVICE: cpu
  FBNET:
    ARCH: default
    ARCH_DEF: 
    BN_TYPE: bn
    DET_HEAD_BLOCKS: []
    DET_HEAD_LAST_SCALE: 1.0
    DET_HEAD_STRIDE: 0
    DW_CONV_SKIP_BN: True
    DW_CONV_SKIP_RELU: True
    KPTS_HEAD_BLOCKS: []
    KPTS_HEAD_LAST_SCALE: 0.0
    KPTS_HEAD_STRIDE: 0
    MASK_HEAD_BLOCKS: []
    MASK_HEAD_LAST_SCALE: 0.0
    MASK_HEAD_STRIDE: 0
    RPN_BN_TYPE: 
    RPN_HEAD_BLOCKS: 0
    SCALE_FACTOR: 1.0
    WIDTH_DIVISOR: 1
  FPN

## Multiple ways of loading and plotting of images

For my purposes, load_cv2 was best because it took into account all formats.

In [6]:
# load image
def load(path):
    pil_image = Image.open(path).convert("RGB")
    #print(pil_image)
    # convert to BGR format
    image = np.array(pil_image)[:, :, [2, 1, 0]]
    return image

def load_matplot(path):
    img = imread(path)
    return img

def load_cv2(path):
    img = cv2.imread(path,cv2.IMREAD_ANYDEPTH)
    
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    img = cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
    img = np.uint8(img)
    #img = cv2.convertScaleAbs(img)
    return img

def load_pil(path):
    img = Image.open(path)
    image = np.array(img)
    
    info = np.iinfo(image.dtype) # Get the information of the incoming image type
    print(info)
    data = image.astype(np.int32) / info.max # normalize the data to 0 - 1
    data = 255 * data # Now scale by 255
    img = data.astype(np.uint8)
    cv2.imshow("Window", img)
    

# show image alongside the result and save if necessary
def imshow(img, result, save_path=None):
    fig = plt.figure()
    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(img)
    plt.axis('off')
    ax2 = fig.add_subplot(1,2,2)
    ax2.imshow(result)
    plt.axis('off')
    if save_path:
        plt.savefig(save_path, bbox_inches = 'tight')
        plt.show()
    else:
        plt.show()
        
def imshow_single(result, save_path=None):
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.imshow(result)
    plt.axis('off')
    if save_path:
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()
    else:
        plt.show()

In [8]:
# here the image size and the confidence threshold can be changed
nuclei_detect = NUCLEIdemo(
    cfg,
    min_image_size=1024,
    confidence_threshold=0.3,
)

NameError: name 'NUCLEIdemo' is not defined

### Define the image paths and do the prediction on the whole folder

In [42]:
# make stacked numpy file from its prediction and the ground truth
# where the first dimension is the numpy file and every other is the numpy 

def make_numpy(prediction, image, path):
    
    # get the masks from the prediction variable
    list_masks = vars(prediction)['extra_fields']['mask']
    masks_to_save = []
    
    # ground truth image
    img = np.squeeze(np.dsplit(image,3)[0], axis=2)
    masks_to_save.append(img)
    
    # iterate through the list of masks
    for i, label in enumerate(vars(prediction)['extra_fields']['labels']):
        numpy_mask = list_masks[i].numpy().transpose(1,2,0)
        numpy_mask = np.squeeze(numpy_mask, axis=2)
        numpy_mask[numpy_mask > 0] = label
        
        masks_to_save.append(numpy_mask)
    
    # save the numpy array
    np.save(path, np.dstack(masks_to_save))
    

In [25]:
# predict for a folder of images

# folder of handled images
img_path = '/data/proj/smFISH/Simone/test_intron/AMEXP20181106/AMEXP20181106_hyb1/test_run_20181123_AMEXP20181106_hyb1_filtered_png/test_run_20181123_AMEXP20181106_hyb1_DAPI_filtered_png/'

# path to subfolder for the results
save_results = '/data/proj/smFISH/Students/Max_Senftleben/files/results/'

# path to save the images with their masks
save_independently = save_results + '20190329_test_run_20181123_AMEXP20181106_hyb1_DAPI_filtered_png/'

# path to save the predicted stacked numpy files
save_npy = save_results + '20190329_test_run_20181123_AMEXP20181106_hyb1_DAPI_filtered_npy/'

def save_pred_as_numpy():
    
    for one_image in os.listdir(img_path):
        print("Image {} is handled.".format(one_image))
        image = load_cv2(img_path + one_image)

        # prediction is done
        result, prediction = nuclei_detect.run_on_opencv_image_original(image)
        img = Image.fromarray(result)
        
        # png image is saved with masks (for visualization)
        img.save(save_independently + one_image[:-4] + '_pred.png')
        # numpy files are saved
        make_numpy(prediction, image, save_npy + one_image[:-4] + '_pred.npy')
        
        # optionally, the results can be shown
        #imshow(image, result)
        


In [26]:
# check predicted numpy files
# can also be used to check the chunks from below
random_img = random.choice(os.listdir(save_npy))
mask = np.load(save_npy+random_img)
mask_list = np.dsplit(mask, mask.shape[2])

'''
for i in mask_list:
    print(i)
    plt.imshow(np.squeeze(i, axis=2))
    plt.show()
    print(np.unique(i))
'''

In [193]:
# numpy arrays from above have the size of the original image
# here, the arrays can be sliced so that they can be used in training in the next step
# after this step, the chunks can further be used in the creation of the data set
def chunking_labeled_images(number_chunks_dimension, old_chunks, new_chunks):
    for i in os.listdir(old_chunks):
        mask = np.load(old_chunks + i)
        
        height, width = mask.shape[:2]
        instance_count = mask.shape[2]
        #masklist = np.dsplit(mask, instance_count)
        #plt.imshow(np.dstack((masklist[1]*100, masklist[1]*100, masklist[1]*100)))
        #plt.show()
        
        hsplits = np.split(mask,number_chunks_dimension,axis=0)
        total_images = []
        for split in hsplits:
                total_images.append(np.split(split,number_chunks_dimension,axis=1))
        total_images = [img for cpl in total_images for img in cpl] 
        
        for idx,image_chunk in enumerate(total_images):
            image_chunks_ids = []
            mask = image_chunk != 0
            planes_to_keep = np.flatnonzero((mask).sum(axis=(0,1)))
            # Make sure that the image has labeled objects
            if planes_to_keep.size:
                image_chunk_trimmed = image_chunk[:,:,planes_to_keep]
                image_chunk_trimmed_id = new_chunks + i.split('.')[0]+'chunk'+str(idx)
        

                np.save(image_chunk_trimmed_id, image_chunk_trimmed)

In [194]:
old = '/data/proj/smFISH/Students/Max_Senftleben/files/results/20190329_test_run_20181123_AMEXP20181106_hyb1_DAPI_filtered_npy/'
new = '/data/proj/smFISH/Students/Max_Senftleben/files/data/20190422_AMEX_transfer_nuclei/npy/'
dim = 2

chunking_labeled_images(dim, old, new)
