In [1]:
import os
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2
import re

In [2]:
def read_and_display_tiff_images(file_paths, imagenames, cmap='gray'):
    """
    read and show multiple tiff images.

    parameters:
    file_paths (list): the list of tiff file paths.
    """
    num_images = len(file_paths)
    plt.figure(figsize=(15, 5))

    for i, file_path in enumerate(file_paths):
        if os.path.exists(file_path):
            img = Image.open(file_path)
            plt.subplot(1, num_images, i + 1)
            plt.imshow(img,cmap=cmap)
            plt.title(imagenames[i])
            plt.axis('off')
        else:
            print(f"file not found: {file_path}")

    plt.show()

# draw all mask function
def show_anns(anns, random_color=False):
    if len(anns) == 0:
        return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)
    polygons = []
    for ann in sorted_anns:
        m = ann['segmentation']
        img = np.ones((m.shape[0], m.shape[1], 3))
        if random_color:
            color_mask = np.random.random((1, 3)).tolist()[0]
        else:
            color_mask = np.array([110/255, 77/255, 109/255, 0.6])
        for i in range(3):
            img[:,:,i] = color_mask[i]
        ax.imshow(np.dstack((img, m*0.35)))

# draw box and label in XYWH format
def show_box2(box, ax, label, iflabel):
    x0, y0 = box[0], box[1]
    w, h = box[2] , box[3] 
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=1)) 
    if iflabel == "y":
        ax.text(x0, y0, label,color='red')

# convert box from xywh to xyxy
def convert_box_xywh_to_xyxy(box):
    x1 = box[0]
    y1 = box[1]
    x2 = box[0] + box[2]
    y2 = box[1] + box[3]
    return [x1, y1, x2, y2]

def extract_number(file_name):
    # use regular expression to extract the number from the file name
    match = re.search(r'_(\d+)\.tiff$', file_name)
    return int(match.group(1)) if match else 0

# read single tiff
def wedoonetif(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img.dtype=bool
    return img

def convert_uint16_to_uint8(image_array):
    # determine the maximum value in the uint16 array
    max_val = np.max(image_array)

    # if the maximum value is not 0, normalize the pixel values to 0-255 range
    if max_val > 0:
        # normalize the pixel values to 0-255 range
        normalized_array = (image_array * (255.0 / max_val)).astype(np.uint8)
    else:
        # if the maximum value is 0, return the uint8 array of all zeros
        normalized_array = np.zeros(image_array.shape, dtype=np.uint8)

    return normalized_array

def segment_image(image, segmentation_mask):
    # convert the input image to np.array format
    image_array = np.array(image)
    if image_array.dtype == np.uint16:
    # convert to uint8 format
        image_array = convert_uint16_to_uint8(image_array)
    # initialize the segmented image array, the size is the same as the input image
    segmented_image_array = np.zeros_like(image_array)
    # crop the image according to the mask information
    segmented_image_array[segmentation_mask] = image_array[segmentation_mask]
    segmented_image = Image.fromarray(segmented_image_array)
    # create a black background image with the same size as the input image
    black_image = Image.new("RGB", image.size, (0, 0, 0))
    # initialize the transparent mask, the size is the same as the current mask
    transparency_mask = np.zeros_like(segmentation_mask, dtype=np.uint8)
    # assign the value to the transparent mask according to the current mask
    transparency_mask[segmentation_mask] = 255
    # convert the np.array of the transparent mask to an image
    transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
    # paste the current mask on the black background
    black_image.paste(segmented_image, mask=transparency_mask_image)
    return black_image

def list_directories(path):
    # list all files and directories in the path
    all_items = os.listdir(path)
    directories = []

    # check if each item is a directory
    for item in all_items:
        full_path = os.path.join(path, item)
        if os.path.isdir(full_path):
            directories.append(item)

    return directories

In [3]:
def execonesite(m,imagecsv,bigpath,thisplate,csvpath0):
    totalcells=imagecsv['Count_Cells']
    filenameactin=imagecsv['FileName_CorrActin']
    filenamedapi=imagecsv['FileName_CorrDAPI']
    filenametubulin=imagecsv['FileName_CorrTub']
    filenamelabelcell=imagecsv['FileName_CellsImage']
    filenamelabelcytoplasm=imagecsv['FileName_CytoplasmImage']
    filenamelabelnuclei=imagecsv['FileName_NucleiImage']
    imagepath0=imagecsv['Metadata_Image_PathName_DAPI']
    filenamemetadapi=imagecsv['FileName_DAPI']
    Metadata_Image_Metadata_Compound=imagecsv['Metadata_Image_Metadata_Compound']
    Metadata_Image_Metadata_Concentration=imagecsv['Metadata_Image_Metadata_Concentration']
    Metadata_folder=imagecsv['Metadata_folder']
    Metadata_Well=imagecsv['Metadata_Well']
    Metadata_Site='s'+str(imagecsv['Metadata_Site'])
    originalimagesize=str(imagecsv['Width_DAPI'])+'x'+str(imagecsv['Height_DAPI'])

    imagepath0=os.path.join(bigpath,'illum_corrected',thisplate)
    labelpath0=os.path.join(bigpath,'labels',thisplate)

    path1=os.path.join(imagepath0,filenamedapi)
    path2=os.path.join(imagepath0,filenametubulin)
    path3=os.path.join(imagepath0,filenameactin)
    # read the input image, convert to GRAY format
    # dapi
    image_1 = cv2.imread(path1)
    image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
    # read the input image, convert to RGB format
    # tubulin
    image_2 = cv2.imread(path2)
    image_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
    # read the input image, convert to GRAY format
    # actin
    image_3 = cv2.imread(path3)
    image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2GRAY)

    nucleicsv=pd.read_csv(os.path.join(csvpath0,'bbbc021_Nuclei.csv'))
    cellcsv=pd.read_csv(os.path.join(csvpath0,'bbbc021_Cells.csv'))
    cytoplasmcsv=pd.read_csv(os.path.join(csvpath0,'bbbc021_Cytoplasm.csv'))
    
    # locate the csv by ImageNumber, remember i+1
    # nucleicsv=nucleicsv[nucleicsv['ImageNumber'] == m+1]
    # cellcsv=cellcsv[cellcsv['ImageNumber'] == m+1]
    # cytoplasmcsv=cytoplasmcsv[cytoplasmcsv['ImageNumber'] == m+1]
    
    # locate the csv by Metadata_Site
    nucleicsv=nucleicsv[nucleicsv['Metadata_Site'] == imagecsv['Metadata_Site']]
    cellcsv=cellcsv[cellcsv['Metadata_Site'] == imagecsv['Metadata_Site']]
    cytoplasmcsv=cytoplasmcsv[cytoplasmcsv['Metadata_Site'] == imagecsv['Metadata_Site']]

    masks_nuclei=[]
    for i,p in nucleicsv.iterrows():
        x=p['AreaShape_BoundingBoxMinimum_X']
        y=p['AreaShape_BoundingBoxMinimum_Y']
        w=p['AreaShape_BoundingBoxMaximum_X']-p['AreaShape_BoundingBoxMinimum_X']
        h=p['AreaShape_BoundingBoxMaximum_Y']-p['AreaShape_BoundingBoxMinimum_Y']
        a=dict()
        a['bbox']=[x,y,w,h]
        a['ObjectNumber']=int(p['ObjectNumber'])
        a['area']=int(p['AreaShape_Area'])
        masks_nuclei.append(a)
    masks_cell=[]
    for i,p in cellcsv.iterrows():
        x=p['AreaShape_BoundingBoxMinimum_X']
        y=p['AreaShape_BoundingBoxMinimum_Y']
        w=p['AreaShape_BoundingBoxMaximum_X']-p['AreaShape_BoundingBoxMinimum_X']
        h=p['AreaShape_BoundingBoxMaximum_Y']-p['AreaShape_BoundingBoxMinimum_Y']
        a=dict()
        a['bbox']=[x,y,w,h]
        a['ObjectNumber']=int(p['ObjectNumber'])
        a['area']=int(p['AreaShape_Area'])
        masks_cell.append(a)
    
    image_label_nuclei_path=os.path.join(labelpath0,filenamelabelnuclei)
    image_label_cell_path=os.path.join(labelpath0,filenamelabelcell)
    # nuclei label processing
    image_label_nuclei = cv2.imread(image_label_nuclei_path,cv2.IMREAD_UNCHANGED)
    # image_label_nuclei is a 2D array
    non_zero_indices = np.nonzero(image_label_nuclei)
    # non_zero_indices is a tuple, containing two arrays, representing the row and column indices of non-zero elements
    non_zero_values = image_label_nuclei[non_zero_indices]
    # use np.unique to get all different non-zero values and their frequencies
    unique_values, counts = np.unique(non_zero_values, return_counts=True)
    # unique_values contains the different pixel values in the label image, each value represents a cell label
    # counts contains the number of occurrences of the corresponding pixel values in the label image, which is the area of the cell
    # prepare the nuclei mask
    labels_nuclei=[]
    for i,p in enumerate(unique_values):
        # create a new array, initialized to False, with the same size as image_label_nuclei
        new_array = np.zeros_like(image_label_nuclei, dtype=bool)
        # mark the part of image_label_nuclei with value 1 as True
        new_array[image_label_nuclei == p] = True
        labels_nuclei.append(new_array)
    for i,p in enumerate(labels_nuclei):
        masks_nuclei[i]['segmentation']=p
    # cell label processing
    image_label_cell = cv2.imread(image_label_cell_path,cv2.IMREAD_UNCHANGED)
    # image_label_cell is a 2D array
    non_zero_indices = np.nonzero(image_label_cell)
    # non_zero_indices is a tuple, containing two arrays, representing the row and column indices of non-zero elements
    non_zero_values = image_label_cell[non_zero_indices]
    # use np.unique to get all different non-zero values and their frequencies
    unique_values, counts = np.unique(non_zero_values, return_counts=True)
    # prepare the cell mask
    labels_cell=[]
    for i,p in enumerate(unique_values):
        # create a new array, initialized to False, with the same size as image_label_nuclei
        new_array = np.zeros_like(image_label_cell, dtype=bool)
        # mark the part of image_label_nuclei with value 1 as True
        new_array[image_label_cell == p] = True
        labels_cell.append(new_array)
    for i,p in enumerate(labels_cell):
        masks_cell[i]['segmentation']=p
    
    # whole metadata
    cellname1='MCF-7'
    cellname2='breast cancer cells'
    photograph='fluorescent microscopy'
    # define the save path, create folders
    exportpath=os.path.join('./bbbc021','demo',thisplate,Metadata_Well,Metadata_Site)
    exportnucleipath1=os.path.join(exportpath,'nuclei','image','dapi')
    exportnucleipath2=os.path.join(exportpath,'nuclei','text','dapi')
    exportnucleipath3=os.path.join(exportpath,'nuclei','mask','dapi')
    exportcellpath11=os.path.join(exportpath,'cell','image','tubulin')
    exportcellpath12=os.path.join(exportpath,'cell','text','tubulin')
    exportcellpath13=os.path.join(exportpath,'cell','mask','tubulin')
    exportcellpath21=os.path.join(exportpath,'cell','image','actin')
    exportcellpath22=os.path.join(exportpath,'cell','text','actin')
    exportcellpath23=os.path.join(exportpath,'cell','mask','actin')
    os.makedirs(exportpath,exist_ok=True)
    os.makedirs(exportnucleipath1,exist_ok=True)
    os.makedirs(exportnucleipath2,exist_ok=True)
    os.makedirs(exportnucleipath3,exist_ok=True)
    os.makedirs(exportcellpath11,exist_ok=True)
    os.makedirs(exportcellpath12,exist_ok=True)
    os.makedirs(exportcellpath13,exist_ok=True)
    os.makedirs(exportcellpath21,exist_ok=True)
    os.makedirs(exportcellpath22,exist_ok=True)
    os.makedirs(exportcellpath23,exist_ok=True)

    # process the mask all image
    # convert all non-zero pixels to 255, keep 0 unchanged
    image_label_nuclei_uint8 = (image_label_nuclei != 0).astype(np.uint8) * 255
    # create a PIL image
    imagemaskall = Image.fromarray(image_label_nuclei_uint8)
    imagemaskall.save(os.path.join(exportnucleipath3,'maskall.png'))

    image_label_cell_uint8 = (image_label_cell != 0).astype(np.uint8) * 255
    # create a PIL image
    imagemaskall2 = Image.fromarray(image_label_cell_uint8)
    imagemaskall2.save(os.path.join(exportcellpath13,'maskall.png'))
    imagemaskall2.save(os.path.join(exportcellpath23,'maskall.png'))

    # process the nuclei, DPAI channel
    image_z1 = Image.open(path1)
    cropped_boxes_nuclei = []
    cropped_boxes_nuclei_mask = []
    exportsnuclei = []
    type='nuclei'
    channel='DAPI'
    for i,mask in enumerate(masks_nuclei):
        exportdict={
            'cellname1' : cellname1,
            'cellname2' : cellname2,
            'type' : type,
            'originalimagesize' : originalimagesize,
            'photograph' : photograph,
            'compound' : Metadata_Image_Metadata_Compound,
            'concentration' : Metadata_Image_Metadata_Concentration,
            'bbox' : mask['bbox'],
            'ObjectNumber' : mask['ObjectNumber'],
            'area' : mask['area'],
            'channel' : channel,
            'description' : f'''This image depicts {cellname1} cells, which are a type of {cellname2}, captured using {photograph}, with the imaging channel set to {channel}, the cells were treated with {Metadata_Image_Metadata_Compound} compound at a concentration of {Metadata_Image_Metadata_Concentration} for 24 hours, the {type} area is {mask['area']} pixels.''',
            'title' : f'''{cellname1} Cells Treated with {Metadata_Image_Metadata_Compound}, {photograph} ({channel} Channel), {type} Area {mask['area']} px'''
        }
        exportsnuclei.append(exportdict)
        cropped_boxes_nuclei.append(segment_image(image_z1, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))
        cropped_boxes_nuclei_mask.append(segment_image(imagemaskall, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))

    for i,p in enumerate(cropped_boxes_nuclei):
        filename1=os.path.join(exportnucleipath1,'nuclei_'+str(i)+'.png')
        p.save(filename1)
        filename2=os.path.join(exportnucleipath3,'nuclei_mask_'+str(i)+'.png')
        cropped_boxes_nuclei_mask[i].save(filename2)
        filename3=os.path.join(exportnucleipath2,'nuclei_text_'+str(i)+'.txt')
        with open(filename3, 'w') as file:
            for key, value in exportsnuclei[i].items():
                file.write(f"{key}: {value}\n")

    # process the cell, tubulin channel
    image_z2 = Image.open(path2)
    cropped_boxes_cellt = []
    cropped_boxes_cellt_mask = []
    exportsncellt = []
    type='cell'
    channel='Tubulin'
    for i,mask in enumerate(masks_cell):
        exportdict={
            'cellname1' : cellname1,
            'cellname2' : cellname2,
            'type' : type,
            'originalimagesize' : originalimagesize,
            'photograph' : photograph,
            'compound' : Metadata_Image_Metadata_Compound,
            'concentration' : Metadata_Image_Metadata_Concentration,
            'bbox' : mask['bbox'],
            'ObjectNumber' : mask['ObjectNumber'],
            'area' : mask['area'],
            'channel' : channel,
            'description' : f'''This image depicts {cellname1} cells, which are a type of {cellname2}, captured using {photograph}, with the imaging channel set to {channel}, the cells were treated with {Metadata_Image_Metadata_Compound} compound at a concentration of {Metadata_Image_Metadata_Concentration} for 24 hours, the {type} area is {mask['area']} pixels.''',
            'title' : f'''{cellname1} Cells Treated with {Metadata_Image_Metadata_Compound}, {photograph} ({channel} Channel), {type} Area {mask['area']} px'''
        }
        exportsncellt.append(exportdict)
        cropped_boxes_cellt.append(segment_image(image_z2, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))
        cropped_boxes_cellt_mask.append(segment_image(imagemaskall2, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))

    for i,p in enumerate(cropped_boxes_cellt):
        filename1=os.path.join(exportcellpath11,'cell_'+str(i)+'.png')
        p.save(filename1)
        filename2=os.path.join(exportcellpath13,'cell_mask_'+str(i)+'.png')
        cropped_boxes_cellt_mask[i].save(filename2)
        filename3=os.path.join(exportcellpath12,'cell_text_'+str(i)+'.txt')
        with open(filename3, 'w') as file:
            for key, value in exportsncellt[i].items():
                file.write(f"{key}: {value}\n")

    # process the cell, actin channel
    image_z2 = Image.open(path3)
    cropped_boxes_cella = []
    cropped_boxes_cella_mask = []
    exportsncella = []
    type='cell'
    channel='Actin'
    for i,mask in enumerate(masks_cell):
        exportdict={
            'cellname1' : cellname1,
            'cellname2' : cellname2,
            'type' : type,
            'originalimagesize' : originalimagesize,
            'photograph' : photograph,
            'compound' : Metadata_Image_Metadata_Compound,
            'concentration' : Metadata_Image_Metadata_Concentration,
            'bbox' : mask['bbox'],
            'ObjectNumber' : mask['ObjectNumber'],
            'area' : mask['area'],
            'channel' : channel,
            'description' : f'''This image depicts {cellname1} cells, which are a type of {cellname2}, captured using {photograph}, with the imaging channel set to {channel}, the cells were treated with {Metadata_Image_Metadata_Compound} compound at a concentration of {Metadata_Image_Metadata_Concentration} for 24 hours, the {type} area is {mask['area']} pixels.''',
            'title' : f'''{cellname1} Cells Treated with {Metadata_Image_Metadata_Compound}, {photograph} ({channel} Channel), {type} Area {mask['area']} px'''
        }
        exportsncella.append(exportdict)
        cropped_boxes_cella.append(segment_image(image_z2, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))
        cropped_boxes_cella_mask.append(segment_image(imagemaskall2, mask["segmentation"]).crop(convert_box_xywh_to_xyxy(mask["bbox"])))

    for i,p in enumerate(cropped_boxes_cella):
        filename1=os.path.join(exportcellpath21,'cell_'+str(i)+'.png')
        p.save(filename1)
        filename2=os.path.join(exportcellpath23,'cell_mask_'+str(i)+'.png')
        cropped_boxes_cella_mask[i].save(filename2)
        filename3=os.path.join(exportcellpath22,'cell_text_'+str(i)+'.txt')
        with open(filename3, 'w') as file:
            for key, value in exportsncella[i].items():
                file.write(f"{key}: {value}\n")

    print(f'''{thisplate}_{thiswell}_{Metadata_Site}: Done!''')

# Starting Process

In [4]:
bigpath='../datasets/BBBC/BBBC021/demo/'
measurementspath=os.path.join(bigpath,'measurements')
platenames=list_directories(measurementspath)
wellnames=[]
for i,p in enumerate(platenames):
    thisdir=os.path.join(measurementspath,p)
    wellnames.append(os.listdir(thisdir))

In [5]:
for i in range(len(platenames)):
    thisplate=platenames[i]
    for j in range(len(wellnames[i])):
        thiswell=wellnames[i][j]
        csvpath0=os.path.join(measurementspath,thisplate,thiswell)
        imagecsv=pd.read_csv(os.path.join(csvpath0,'bbbc021_Image.csv'))
        for m in range(len(imagecsv)):
            execonesite(m,imagecsv.iloc[m],bigpath,thisplate,csvpath0)

Week10_40111_B02_s1: Done!
