In [1]:
# standard python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

In [2]:
# Used to find the area of platelets
import scipy.spatial as ss

In [3]:
# cellpose library for cell-like object segmentation
from cellpose import io
from cellpose import models
from cellpose.io import imread
from cellpose import plot, utils

In [4]:
# Control display of images
from IPython import display

In [5]:
from datetime import date

today = date.today()
today = today.strftime("%y%m%d")

In [6]:
today

'221105'

### Cell segmentation

In [7]:
"""
Convert a RGB images into a greyscale images. We assume that the images follow linear RGB 
format. 
"""

def rgb2gray(rgb):
    
    rgb[0, :, :] = rgb[0, :, :] * 0.2989
    rgb[1, :, :] = rgb[1, :, :] * 0.5870
    rgb[2, :, :] = rgb[2, :, :] * 0.1140
    
    return np.sum(rgb, axis = 0)

In [8]:
"""
Inputs: 
- file: The location of file. Should contain .tif data

Outputs: Saves a file to local PC containing the segmentation data
"""

def cell_segmentation(file):
    global file_counter
    
    file_pathway = r'C:\Users\Gursharan\Documents\SPR 2022\Engineering Capstone\capstone\segmentation'
    
    # model_type='cyto' or 'nuclei' or 'cyto2'
    model = models.Cellpose(model_type='cyto')  # cyto: cellpose data vs cyto2: cellpose + user data

    # list of files
    # PUT PATH TO YOUR FILES HERE!


    images = imread(file) 
    images = images[299, 3, :, :]


    # define CHANNELS to run segementation on
    # grayscale=0, R=1, G=2, B=3
    # channels = [cytoplasm, nucleus]
    # if NUCLEUS channel does not exist, set the second channel to 0
    channels = [[0,0]]
    # IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
    # channels = [0,0] # IF YOU HAVE GRAYSCALE
    # channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
    # channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus

    # if diameter is set to None, the size of the cells is estimated on a per image basis
    # you can set the average cell `diameter` in pixels yourself (recommended)
    # diameter can be a list or a single number for all images

    masks, flows, styles, diams = model.eval(images, diameter=40, channels=channels)
    io.masks_flows_to_seg(images, masks, flows, diams, f'{file_pathway}\\{today}_model_{file_counter}_.npy', channels)
    
    file_counter = file_counter+1
    

### Annotate the Image 

In [9]:
"""
Descripton: Annotate the image

Classification Scheme:

1: Platelet inactivated
2: Fileopodia of platelet visible
3: Platelet fully enlarged

Inputs: file containing segmentation data (.npy file extension)
Outputs: Annotation of the file

"""

def annotate(file):

    # load the file with segmentation results
    dat = np.load(file, allow_pickle=True).item()

    # plot image with outlines overlaid in red
    outlines = utils.outlines_list(dat['masks'])

    # Store classification of platelets in target
    annotation = []

    # Plot each cell with segmentation boundary shown in red. 
    # The user inputs a classification for each boundary
    
    for o in outlines:
        plt.figure()
        plt.imshow(dat['img'])
        plt.plot(o[:,0], o[:,1], color='r')
        plt.show()

        classification = int(input())
        annotation.append(classification)

        display.clear_output()
        
    return annotation

In [10]:
"""
Description: Get the area for each cell

Input: file pathway or file name
Output: List of areas

"""
def get_area(file):
    
    # get outlines from file
    dat = np.load(file, allow_pickle=True).item()
    outlines = utils.outlines_list(dat['masks'])
    
    # For each boundary compute the area
    area = []
    for points in outlines:
        hull = ss.ConvexHull(points)
        area.append(hull.area)
    
    return area

In [11]:
def get_all_filepaths(directory):
  
    # initializing empty file paths list
    file_paths = []
  
    # crawling through directory and subdirectories
    for root, directories, files in os.walk(directory):
        for filename in files:
            # join the two strings in order to form the full filepath.
            filepath = os.path.join(root, filename)    
            file_paths.append(filepath)
            
    # returning all file paths
    return file_paths

In [12]:
def file_processing_pipeline(file_pathways):
    
    df_list = [] # Store each of df generate from annotating the images here 
    seg_file_pathway = r'C:\Users\Gursharan\Documents\SPR 2022\Engineering Capstone\capstone\segmentation\\'
    
    
    data_file_names = [] # We will store file names  here
    
    # Get data file_names
    for filepath in file_pathways:
        data_file_names.append(os.path.basename(filepath))
    
    
    # Create segmentation files
    for filepath in file_pathways:
        cell_segmentation(filepath)
    
        
    # os.path.basename(your_path)

    segmentation_files = get_all_filepaths(seg_file_pathway)

    # We will use i to loop through data_file_names, s_files loops through segmentation_filepaths
    for i, s_file in enumerate(segmentation_files):
        target = annotate(s_file)
        area = get_area(s_file)

        df = pd.DataFrame({'area':area, 'target':target, 'file_names':data_file_names[i]})
        df_list.append(df)
        
    
    final_df = pd.concat(df_list, axis=0, ignore_index=True)
    
    return final_df

In [13]:
list_of_files = get_all_filepaths(r'C:\Users\Gursharan\Documents\SPR 2022\Engineering Capstone\capstone\data files')


file_counter = 0 # use in cell_segmentation function

df = file_processing_pipeline(list_of_files)

In [18]:
df.to_csv('221023_platelet.csv', index=False)

In [4]:
df = pd.read_csv('221023_platelet.csv')

In [5]:
df.head()

Unnamed: 0,area,target
0,88.960006,2
1,106.750445,2
2,142.830257,2
3,115.143003,2
4,127.840473,3
