# Use Annotation Masks to Analyze Image Content
The first part of this notebook allows users to apply segmentation masks to microscope images, extract information about the raw image, and save information about the raw image to a CSV file.

After that, users can visualize their data with the plots below.

For a given image, the number of cells will be recorded (each cell represented by a row in the CSV file). Information recorded about each cell will include: 
- filename of the image the cell was in
- size of cell (number of pixels it occupies)
- minimum intensity value within cell
- maximum intensity value within cell
- average intensity value of cell
- standard deviation of intensity values of cell

but more information can be added if needed.

"Cell 0" is the background of each image, and should be excluded from plots. (This is also why cell 0 is likely to be quite larger than the other cells.)

In [None]:
#imports
import os

In [None]:
#base_dir contains image folders of interest, and is where analysis output will be saved (in "CSV" subfolder)
base_dir = "/gnv_home/data/analysis_test/set1"
raw_folders = ['raw']
#raw_folders = ["phase", "FITC", "mCherry", "Cy5"]
annotation_folder ="annotated"
identifier = "example" #this will become part of the csv filename

In [None]:
from imageio import imread
from deepcell_toolbox.utils.io_utils import get_img_names
import numpy as np
import pandas as pd

def get_all_info_folders(base_dir, annotation_folder, raw_folders, split = True):
    #split determines if channels are combined in raw image, or saved separately
    #annotation_folder and all raw_folders should contain the same number of images
        
    full_dict = {}
    
    #get cell locations from annotation masks
    annotation_dir = os.path.join(base_dir, annotation_folder)
    annotations = get_img_names(annotation_dir)
    
    #now get information from all raw image channels
    for img_num in range(len(annotations)):
        full_dict[img_num] = {}
        full_dict[img_num]["annotations"] = get_annotation_info(annotation_dir, img_num)
        cell_locations = full_dict[img_num]["annotations"]["cells"]
        
        full_dict[img_num]["channels"] = {}
        
        #get info from each channel
        for channel in raw_folders:
            channel_dir = os.path.join(base_dir, channel)
            full_dict[img_num]["channels"][channel] = get_channel_info(channel_dir, img_num, cell_locations)

    return full_dict
        
        
def make_flat_cell_info(full_dict):
    '''
    takes folder_intensities dictionary, analyzes intensity data
    creates dictionary of information about each cell without nested dictionaries
    also does not include pixel locations and intensities
    each cell receives an identifier of image_name + cell number so that keys remain distinct
    
    '''
    cell_info={}
    
    for img_num in full_dict:
        for cell_num in full_dict[img_num]['annotations']["cells"]:
            
            #make unique cell_id so each row contains the information about 
            #one cell across the different images it was in
            cell_id = "img_" + str(img_num).zfill(3) + "_cell_num_" + str(cell_num).zfill(3)
            cell_info[cell_id] = {}
            
            cell_info[cell_id]["annotation_name"] = full_dict[img_num]['annotations']["file_name"]
            
            cell_info[cell_id]["size"] = full_dict[img_num]['annotations']['cells'][cell_num]['size']

            for channel in full_dict[img_num]["channels"]:
                cell_info[cell_id][channel + "_name"] = full_dict[img_num]["channels"][channel]["file_name"]
                cell_info[cell_id][channel + "_avg"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["avg"]
                cell_info[cell_id][channel + "_stdev"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["stdev"]
                cell_info[cell_id][channel + "_min"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["min"]
                cell_info[cell_id][channel + "_max"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["max"]
                
    return cell_info
        

def find_cell_locations(annotation_path):
    annotation = imread(annotation_path)
    cell_locations = {}
    cell_list = np.unique(annotation)
    for cell in cell_list:
        cell_locations[cell] = {}
        location = np.nonzero(annotation == cell)
        cell_locations[cell]["location"] = location
        cell_locations[cell]["size"] = len(location[0])
    return cell_locations

def get_annotation_info(annotation_dir, img_num):

    annotation_names = get_img_names(annotation_dir)
    annotation_name = annotation_names[img_num]
    annotation_path = os.path.join(annotation_dir, annotation_name)
    
    annotation_dict = {}
    annotation_dict["file_name"] = annotation_name
    annotation_dict["cells"] = find_cell_locations(annotation_path)
    annotation_dict["num_cells"] = len(annotation_dict["cells"])
    
    return annotation_dict

def apply_cell_locations(raw_image_path, cell_locations):
    
    raw_img = imread(raw_image_path)
    cell_intensity_info = {}
    
    for cell in cell_locations:
        cell_intensity_info[cell] = {}
        
        intensities = raw_img[cell_locations[cell]["location"]]
        
        cell_intensity_info[cell]["intensities"] = intensities
        cell_intensity_info[cell]["min"] = np.min(intensities)
        cell_intensity_info[cell]["max"] = np.max(intensities)
        cell_intensity_info[cell]["avg"] = np.average(intensities)
        cell_intensity_info[cell]["stdev"] = np.std(intensities)
        
    return cell_intensity_info

def get_channel_info(channel_dir, img_num, cell_locations):
    
    raw_names = get_img_names(channel_dir)
    raw_name = raw_names[img_num] 
    raw_path = os.path.join(channel_dir, raw_name)
    
    channel_dict = {}
    channel_dict["file_name"] = raw_name
    channel_dict["cells"] = apply_cell_locations(raw_path, cell_locations)
    
    return channel_dict

def make_save_df(flat_dict, base_dir, identifier):
    
    csv_dir = os.path.join(base_dir, "CSV")
    if not os.path.isdir(csv_dir):
        os.makedirs(csv_dir)
    csv_path = os.path.join(csv_dir, identifier + "_analysis.csv")
    
    df = pd.DataFrame.from_dict(flat_dict, "index")
    df.to_csv(csv_path, index = True)


In [None]:
#extract information from annotations, apply to corresponding raw images

full_dict = get_all_info_folders(base_dir, annotation_folder, raw_folders)

In [None]:
#flatten the dictionary and don't keep some of the information (all the pixel locations and intensities)
flat_dict = make_flat_cell_info(full_dict)

#optional, display it as dataframe before saving
#pd.DataFrame.from_dict(flat_dict, "index")

In [None]:
#turn the flat dict into a dataframe and save it to csv file
make_save_csv(flat_dict, base_dir, identifier)