# Use Annotation Masks to Analyze Image Content
The first part of this notebook allows users to apply segmentation masks to microscope images, extract information about the raw image, and save information about the raw image to a CSV file.

After that, users can visualize their data with the plots below.

For a given image, the number of cells will be recorded (each cell represented by a row in the CSV file). Information recorded about each cell will include: 
- filename of the image the cell was in
- size of cell (number of pixels it occupies)
- minimum intensity value within cell
- maximum intensity value within cell
- average intensity value of cell
- standard deviation of intensity values of cell

but more information can be added if needed.

"Cell 0" is the background of each image, and should be excluded from plots. (This is also why cell 0 is likely to be quite larger than the other cells.)

In [33]:
#imports
import os

import plotly

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [34]:
#base_dir contains image folders of interest, and is where analysis output will be saved (in "CSV" subfolder)
base_dir = "/gnv_home/data/analysis_test/set1"
raw_folders = ['raw']
#raw_folders = ["phase", "FITC", "mCherry", "Cy5"]
annotation_folder ="annotated"
identifier = "example" #this will become part of the csv filename

In [107]:
from imageio import imread
from deepcell_toolbox.utils.io_utils import get_img_names
import numpy as np
import pandas as pd

def get_all_info_folders(base_dir, annotation_folder, raw_folders, split = True):
    #split determines if channels are combined in raw image, or saved separately
    #annotation_folder and all raw_folders should contain the same number of images
        
    full_dict = {}
    
    #get cell locations from annotation masks
    annotation_dir = os.path.join(base_dir, annotation_folder)
    annotations = get_img_names(annotation_dir)
    
    #now get information from all raw image channels
    for img_num in range(len(annotations)):
        full_dict[img_num] = {}
        full_dict[img_num]["annotations"] = get_annotation_info(annotation_dir, img_num)
        cell_locations = full_dict[img_num]["annotations"]["cells"]
        
        full_dict[img_num]["channels"] = {}
        
        #get info from each channel
        for channel in raw_folders:
            channel_dir = os.path.join(base_dir, channel)
            full_dict[img_num]["channels"][channel] = get_channel_info(channel_dir, img_num, cell_locations)

    return full_dict
        
        
def make_flat_cell_info(full_dict):
    '''
    takes folder_intensities dictionary, analyzes intensity data
    creates dictionary of information about each cell without nested dictionaries
    also does not include pixel locations and intensities
    each cell receives an identifier of image_name + cell number so that keys remain distinct
    
    '''
    cell_info={}
    
    for img_num in full_dict:
        for cell_num in full_dict[img_num]['annotations']["cells"]:
            
            #make unique cell_id so each row contains the information about 
            #one cell across the different images it was in
            cell_id = "img_" + str(img_num).zfill(3) + "_cell_num_" + str(cell_num).zfill(3)
            cell_info[cell_id] = {}
            
            cell_info[cell_id]["annotation_name"] = full_dict[img_num]['annotations']["file_name"]
            
            cell_info[cell_id]["size"] = full_dict[img_num]['annotations']['cells'][cell_num]['size']

            for channel in full_dict[img_num]["channels"]:
                cell_info[cell_id][channel + "_name"] = full_dict[img_num]["channels"][channel]["file_name"]
                cell_info[cell_id][channel + "_avg"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["avg"]
                cell_info[cell_id][channel + "_stdev"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["stdev"]
                cell_info[cell_id][channel + "_min"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["min"]
                cell_info[cell_id][channel + "_max"] = full_dict[img_num]["channels"][channel]["cells"][cell_num]["max"]
                
    return cell_info
        

def find_cell_locations(annotation_path):
    annotation = imread(annotation_path)
    cell_locations = {}
    cell_list = np.unique(annotation)
    for cell in cell_list:
        cell_locations[cell] = {}
        location = np.nonzero(annotation == cell)
        cell_locations[cell]["location"] = location
        cell_locations[cell]["size"] = len(location[0])
    return cell_locations

def get_annotation_info(annotation_dir, img_num):

    annotation_names = get_img_names(annotation_dir)
    annotation_name = annotation_names[img_num]
    annotation_path = os.path.join(annotation_dir, annotation_name)
    
    annotation_dict = {}
    annotation_dict["file_name"] = annotation_name
    annotation_dict["cells"] = find_cell_locations(annotation_path)
    annotation_dict["num_cells"] = len(annotation_dict["cells"])
    
    return annotation_dict

def apply_cell_locations(raw_image_path, cell_locations):
    
    raw_img = imread(raw_image_path)
    cell_intensity_info = {}
    
    for cell in cell_locations:
        cell_intensity_info[cell] = {}
        
        intensities = raw_img[cell_locations[cell]["location"]]
        
        cell_intensity_info[cell]["intensities"] = intensities
        cell_intensity_info[cell]["min"] = np.min(intensities)
        cell_intensity_info[cell]["max"] = np.max(intensities)
        cell_intensity_info[cell]["avg"] = np.average(intensities)
        cell_intensity_info[cell]["stdev"] = np.std(intensities)
        
    return cell_intensity_info

def get_channel_info(channel_dir, img_num, cell_locations):
    
    raw_names = get_img_names(channel_dir)
    raw_name = raw_names[img_num] 
    raw_path = os.path.join(channel_dir, raw_name)
    
    channel_dict = {}
    channel_dict["file_name"] = raw_name
    channel_dict["cells"] = apply_cell_locations(raw_path, cell_locations)
    
    return channel_dict

def make_save_df(flat_dict, base_dir, identifier, include_background):
    
    csv_dir = os.path.join(base_dir, "CSV")
    if not os.path.isdir(csv_dir):
        os.makedirs(csv_dir)
        
    csv_name = identifier + "_analysis_includes_background_info.csv"
    
    if not include_background:
        flat_dict = no_background_dict(flat_dict)
        csv_name = identifier + "_analysis_no_background_info.csv"
        
    csv_path = os.path.join(csv_dir, csv_name)    
    df = pd.DataFrame.from_dict(flat_dict, "index")
    df.to_csv(csv_path, index = True)
    
    return df

def no_background_dict(flat_dict):
    no_bkgd_dict = {}
    for key in flat_dict:
        if "num_000" not in key:
            no_bkgd_dict[key] = flat_dict[key]
            
    return no_bkgd_dict

## Extract data from images and save csv

In [36]:
#extract information from annotations, apply to corresponding raw images

full_dict = get_all_info_folders(base_dir, annotation_folder, raw_folders)

In [28]:
full_dict

{0: {'annotations': {'cells': {0: {'location': (array([  0,   0,   0, ..., 243, 243, 243]),
      array([  0,   1,   2, ..., 241, 242, 243])),
     'size': 56829},
    1: {'location': (array([ 81,  81,  81,  81,  81,  82,  82,  82,  82,  82,  82,  82,  82,
              82,  82,  82,  83,  83,  83,  83,  83,  83,  83,  83,  83,  83,
              83,  83,  83,  83,  83,  83,  83,  84,  84,  84,  84,  84,  84,
              84,  84,  84,  84,  84,  84,  84,  84,  84,  84,  84,  84,  85,
              85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,
              85,  85,  85,  85,  85,  85,  86,  86,  86,  86,  86,  86,  86,
              86,  86,  86,  86,  86,  86,  86,  86,  86,  86,  86,  86,  86,
              87,  87,  87,  87,  87,  87,  87,  87,  87,  87,  87,  87,  87,
              87,  87,  87,  87,  87,  87,  87,  87,  88,  88,  88,  88,  88,
              88,  88,  88,  88,  88,  88,  88,  88,  88,  88,  88,  88,  88,
              88,  88,  88,  89,  89,  89

In [37]:
#flatten the dictionary and don't keep some of the information (all the pixel locations and intensities)
flat_dict = make_flat_cell_info(full_dict)

#optional, display it as dataframe before saving
#pd.DataFrame.from_dict(flat_dict, "index")

In [29]:
flat_dict

{'img_000_cell_num_000': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 229.0717063471115,
  'raw_max': 451,
  'raw_min': 0,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 103.56822918072596,
  'size': 56829},
 'img_000_cell_num_001': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 368.8857142857143,
  'raw_max': 559,
  'raw_min': 247,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 76.11330973205243,
  'size': 490},
 'img_000_cell_num_004': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 399.9340659340659,
  'raw_max': 580,
  'raw_min': 257,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 76.09510179484494,
  'size': 546},
 'img_000_cell_num_007': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 499.7913950456323,
  'raw_max': 1435,
  'raw_min': 277,
  'raw_name': 'MouseBrain_s

In [None]:
def no_background_dict(flat_dict):
    no_bkgd_dict = {}
    for key in flat_dict:
        if "num_000" not in key:
            no_bkgd_dict[key] = flat_dict[key]
            
    return no_bkgd_dict

test_dict = no_background_dict(flat_dict)

In [98]:
test_dict

{'img_000_cell_num_001': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 368.8857142857143,
  'raw_max': 559,
  'raw_min': 247,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 76.11330973205243,
  'size': 490},
 'img_000_cell_num_004': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 399.9340659340659,
  'raw_max': 580,
  'raw_min': 257,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 76.09510179484494,
  'size': 546},
 'img_000_cell_num_007': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 499.7913950456323,
  'raw_max': 1435,
  'raw_min': 277,
  'raw_name': 'MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif',
  'raw_stdev': 218.6357923082046,
  'size': 767},
 'img_000_cell_num_008': {'annotation_name': 'MouseBrain_s7_nuc_x_00_y_00_frame_000.png',
  'raw_avg': 416.57251908396944,
  'raw_max': 647,
  'raw_min': 267,
  'raw_name': 'MouseBrain_s

In [38]:
df = pd.DataFrame.from_dict(flat_dict, "index")
no_bkgd_df = pd.DataFrame.from_dict(test_dict, "index")

In [None]:
#turn the flat dict into a dataframe and save it to csv file
make_save_csv(flat_dict, base_dir, identifier)

## Visualize data

### From dictionary

In [59]:
df

Unnamed: 0,size,raw_avg,raw_max,raw_min,raw_name,raw_stdev,annotation_name
img_000_cell_num_000,56829,229.071706,451,0,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,103.568229,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_000_cell_num_001,490,368.885714,559,247,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,76.113310,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_000_cell_num_004,546,399.934066,580,257,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,76.095102,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_000_cell_num_007,767,499.791395,1435,277,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,218.635792,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_000_cell_num_008,655,416.572519,647,267,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,92.213777,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_000_cell_num_011,249,393.357430,552,287,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,61.407690,MouseBrain_s7_nuc_x_00_y_00_frame_000.png
img_001_cell_num_000,56170,229.918960,634,0,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,104.644415,MouseBrain_s7_nuc_x_00_y_00_frame_001.png
img_001_cell_num_001,518,412.756757,752,237,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,115.856333,MouseBrain_s7_nuc_x_00_y_00_frame_001.png
img_001_cell_num_004,622,442.575563,723,254,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,123.342502,MouseBrain_s7_nuc_x_00_y_00_frame_001.png
img_001_cell_num_007,650,651.972308,2227,292,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,367.238362,MouseBrain_s7_nuc_x_00_y_00_frame_001.png


### From CSV

In [94]:
csv_path = "/gnv_home/data/analysis_test/set1/CSV/analysis_notebook_test_analysis.csv"
csv_data = pd.read_csv(csv_path)
csv_data

Unnamed: 0.1,Unnamed: 0,raw_avg,raw_name,annotated_min,raw_min,raw_stdev,annotated_avg,annotation_name,annotated_stdev,raw_max,annotated_name,annotated_max,size
0,img_000_cell_num_000,229.071706,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,0,0,103.568229,0.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,451,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0,56829
1,img_000_cell_num_001,368.885714,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,1,247,76.113310,1.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,559,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,1,490
2,img_000_cell_num_004,399.934066,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,4,257,76.095102,4.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,580,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,4,546
3,img_000_cell_num_007,499.791395,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,7,277,218.635792,7.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,1435,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,7,767
4,img_000_cell_num_008,416.572519,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,8,267,92.213777,8.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,647,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,8,655
5,img_000_cell_num_011,393.357430,MouseBrain_s7_nuc_raw_x_00_y_00_frame_000.tif,11,287,61.407690,11.0,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,0.0,552,MouseBrain_s7_nuc_x_00_y_00_frame_000.png,11,249
6,img_001_cell_num_000,229.918960,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,0,0,104.644415,0.0,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,0.0,634,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,0,56170
7,img_001_cell_num_001,412.756757,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,1,237,115.856333,1.0,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,0.0,752,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,1,518
8,img_001_cell_num_004,442.575563,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,4,254,123.342502,4.0,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,0.0,723,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,4,622
9,img_001_cell_num_007,651.972308,MouseBrain_s7_nuc_raw_x_00_y_00_frame_001.tif,7,292,367.238362,7.0,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,0.0,2227,MouseBrain_s7_nuc_x_00_y_00_frame_001.png,7,650


In [104]:
brightness_size_scatter = {'x' : csv_data['size'], 'y' : csv_data['raw_avg'], 'mode' : "markers"}

ratio_scatter = {'x' : csv_data['size'], 'y' : csv_data['raw_avg']/csv_data['raw_max'], 'mode' : 'markers'}

size_hist = {'type': 'histogram', 'x' : csv_data['size'], 'xbins': {'size' :50}}

intensity_hist = {'type': 'histogram', 'x' : csv_data['raw_avg'], 'xbins': {'size' :10}}

In [102]:
import numpy as np
trace = {'type': 'histogram', 'x': df['size'], 'xbins': {'size': 100}}

In [105]:
plotly.offline.iplot([
    intensity_hist
])