# Part 4:  Batching, Plotting, Saving

# Setup

In [1]:
import numpy as np
import pandas as pd
import napari
import tifffile
import skimage as ski
import scipy.ndimage as ndi
import glob
import cellpose.models as models
import matplotlib.pyplot as plt
import cv2
import dask
import cellpose.models as models
import sutils
import plotly.graph_objs as go
import plotly.express as px

import glob

In [2]:
viewer = napari.Viewer()

In [3]:
models = models.Cellpose(gpu=True, model_type='cyto')

# Batch processing

Frequently you are going to want to process several images, not just a single, and then look at the results.  First we must get a list of all of the files we want to process.

## Getting the list of files

In [4]:
python_notebooks = glob.glob('*.ipynb')
python_notebooks

['0-Download-Images.ipynb',
 '1-Intro.ipynb',
 '2-Segmentation.ipynb',
 '3-HomeworkIdea.ipynb',
 '4-Batching-Plotting.ipynb',
 'EC-Watershed.ipynb']

glob.glob() takes a string (something in '...') and much like in linux, lets you use * to denote anything, and returns a list of all file names that match that string.  In this case it was simple:  it looked for anything that ended with .ipynb in the current directory.

In [5]:
file_names = glob.glob('files/*/*projection.tif')
file_names

['files\\48hpa\\Plate000_Well2_Object4.tif_projection.tif',
 'files\\48hpa\\Plate000_Well2_Object6.tif_projection.tif',
 'files\\48hpa\\Plate000_Well2_Object9.tif_projection.tif',
 'files\\intact\\Plate000_Well1_Object3.tif_projection.tif',
 'files\\intact\\Plate000_Well1_Object4.tif_projection.tif',
 'files\\intact\\Plate000_Well1_Object6.tif_projection.tif']

 In this case, it is looking in the files folder for anything that is a folder ('files/*/') and then looking to see if anything in those sub-folders contains a file whose name ends with 'projection.tif'

## Making a processing pipeline

We can get one of the files from the list just like in a numpy array:  by using [].  We'll just get the irst one.

In [6]:
img = ski.io.imread(file_names[4])
img.shape

(3811, 5553, 3)

In [7]:
viewer.layers.clear()
viewer.add_image(img, channel_axis=2)

[<Image layer 'Image' at 0x18a2837a2f0>,
 <Image layer 'Image [1]' at 0x18a282b9600>,
 <Image layer 'Image [2]' at 0x18a286d7df0>]

Here we have spots in channel 0 and a whole worm in channel 2.  We want to find the area of the worm in channel 2 and then find the number of spots in channel 0.

### Processing DAPI

We will just use the same pipeline we did in lesson 2

In [8]:
DAPI = img[:, :, 2]

smoothed = ndi.gaussian_filter(DAPI, 30)
viewer.add_image(smoothed)

thresholded = smoothed > 150
viewer.add_image(thresholded)

labeled_img, object_count = ndi.label(thresholded)
labeled_img = sutils.remove_objects(labeled_img, area_min=1000, area_max=100000000)
viewer.add_labels(labeled_img)

<Labels layer 'labeled_img' at 0x18aa643e740>

### Processing H3p

And a similar pipeline for the spots to lesson 2 (I had to scale the image down by 1000.0 as gaussian_laplace gets buggy with high intensities)

In [9]:
h3p = img[:,:,0]

LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
viewer.add_image(LoG, colormap='gray', blending='additive')

peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=1.5)
viewer.add_points(peaks, size=3)

peak_img = np.zeros_like(LoG)
peak_img[peaks[:,0], peaks[:,1]] = 1
viewer.add_image(peak_img, colormap='gray', blending='additive')


<Image layer 'peak_img' at 0x18a34ead180>

Note the switch to **threshold_abs** instead of **threshold_rel**.  This is because we are going to process several images and some of them might have very bright junk that will skew the peak threshold very high and give inconsistent results between images.

### Combining the two

In [10]:
results = pd.DataFrame(ski.measure.regionprops_table(labeled_img, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
results['counts'] = results['mean_intensity'] * results['area']
results

Unnamed: 0,label,area,mean_intensity,counts
0,1,8269367.0,2.2e-05,186.0


## Looping over all files

It's easy to loop over all of the items in our list of file_names.  Anything we want to run everytime the loop runs has to be indented so python knows what to include in the loop.

In [11]:
for fname in file_names:
    print(fname)
print('Happy Birthday!')

files\48hpa\Plate000_Well2_Object4.tif_projection.tif
files\48hpa\Plate000_Well2_Object6.tif_projection.tif
files\48hpa\Plate000_Well2_Object9.tif_projection.tif
files\intact\Plate000_Well1_Object3.tif_projection.tif
files\intact\Plate000_Well1_Object4.tif_projection.tif
files\intact\Plate000_Well1_Object6.tif_projection.tif
Happy Birthday!


Now we combine the two pipelines we made above and puts it into the file loop.

In [12]:
all_results = []

for fname in file_names:
    print(fname)
    img = ski.io.imread(fname)

    DAPI = img[:, :, 2]
    smoothed = ndi.gaussian_filter(DAPI, 30)
    thresholded = smoothed > 150
    labeled_img, object_count = ndi.label(thresholded)
    labeled_img = sutils.remove_objects(labeled_img, area_min=1000, area_max=100000000)

    h3p = img[:,:,0]
    LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
    peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=1.5)
    peak_img = np.zeros_like(LoG)
    peak_img[peaks[:,0], peaks[:,1]] = 1

    results = pd.DataFrame(ski.measure.regionprops_table(labeled_img, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
    results['counts'] = results['mean_intensity'] * results['area']
    
    results['file'] = fname

    all_results.append(results)

    

files\48hpa\Plate000_Well2_Object4.tif_projection.tif
files\48hpa\Plate000_Well2_Object6.tif_projection.tif
files\48hpa\Plate000_Well2_Object9.tif_projection.tif
files\intact\Plate000_Well1_Object3.tif_projection.tif
files\intact\Plate000_Well1_Object4.tif_projection.tif
files\intact\Plate000_Well1_Object6.tif_projection.tif


Using pandas we will combine the 6 data tables that got put into "all_results" into a single data table.

In [13]:
df = pd.concat(all_results)
df

Unnamed: 0,label,area,mean_intensity,counts,file
0,1,1594637.0,0.000193,307.0,files\48hpa\Plate000_Well2_Object4.tif_project...
0,1,1306072.0,0.000149,194.0,files\48hpa\Plate000_Well2_Object6.tif_project...
0,1,973282.0,0.000186,181.0,files\48hpa\Plate000_Well2_Object9.tif_project...
0,1,5675248.0,2.1e-05,122.0,files\intact\Plate000_Well1_Object3.tif_projec...
0,1,8269367.0,2.2e-05,186.0,files\intact\Plate000_Well1_Object4.tif_projec...
0,1,5191331.0,2.2e-05,116.0,files\intact\Plate000_Well1_Object6.tif_projec...


# Visualizing Results

## Getting our h3p spot density

For this dataset, the value of interest is the number of spots per micron^2.  Our areas are in terms of pixels though, so to correct for that we need a conversion factor.

In [14]:
df['scaled_area']=df['area']*0.00078*0.00078

Next we calculate the Density and store it back in the table with a new column.

In [15]:
df['Density'] = df['counts'] / df['scaled_area']
df

Unnamed: 0,label,area,mean_intensity,counts,file,scaled_area,Density
0,1,1594637.0,0.000193,307.0,files\48hpa\Plate000_Well2_Object4.tif_project...,0.970177,316.437055
0,1,1306072.0,0.000149,194.0,files\48hpa\Plate000_Well2_Object6.tif_project...,0.794614,244.143635
0,1,973282.0,0.000186,181.0,files\48hpa\Plate000_Well2_Object9.tif_project...,0.592145,305.668494
0,1,5675248.0,2.1e-05,122.0,files\intact\Plate000_Well1_Object3.tif_projec...,3.452821,35.333429
0,1,8269367.0,2.2e-05,186.0,files\intact\Plate000_Well1_Object4.tif_projec...,5.031083,36.970172
0,1,5191331.0,2.2e-05,116.0,files\intact\Plate000_Well1_Object6.tif_projec...,3.158406,36.72739


## Plotting with plotly

We have used plotly a bit before, remember all plotly graphs take a table as the first input (our table's name is 'df'), and then x and y arguments for which of the columns of your table you want on those axes.

In [16]:
px.bar(df, x='file', y='Density', width=400)

But usually we will want to aggregate our data in some way.  For instance here we have 6 images, but 3 are from intact animals and 3 are from 48 hours post amputation animals.  We want to create a column to indicate which is which.  Fortunately we can use the folders they came from.  In pandas (kind of like R for Python), we can break up the 'file' column of our table using split.  The '.str' are to tell pandas that we want to treat these things as strings.

In [17]:
df['Timepoint'] = df['file'].str.split('\\').str[1]

Now we can plot the data as replicates.

In [18]:
px.box(df, x='Timepoint', y='Density', points='all', width=600)

It might be useful to track down a particularly aberrant example, so it would be nice to be able to mouse over a point and see which image it came from.  We can do this by adding a hover_data argument to our plotly graph.

In [19]:
px.box(df, x='Timepoint', y='Density', points='all',hover_data=['file'], width=600, title='Density of H3P+ cells')

There are a variety of different plots we can use from plotly, and if you want to learn more about pandas/plotly we will be offering a course on that shortly after this one.

In [20]:
px.violin(df, x='Timepoint', y='scaled_area', points='all',hover_data=['file'], width=600, title='Worm Area in um^2')

# Saving intermediate files

Frequently you will want to save intermediate files so you can look at them later. 

Let's process the first image in our list, but we'll save the filename we are working on as fname just like we did in the loop.

In [21]:
fname = file_names[0]

This is just our processing code from before run on this single 'fname'

In [22]:
img = ski.io.imread(fname)

DAPI = img[:, :, 2]
smoothed = ndi.gaussian_filter(DAPI, 30)
thresholded = smoothed > 150
labeled_img, object_count = ndi.label(thresholded)
labeled_img = sutils.remove_objects(labeled_img, area_min=1000, area_max=100000000)

h3p = img[:,:,0]
LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=1.5)
peak_img = np.zeros_like(LoG)
peak_img[peaks[:,0], peaks[:,1]] = 1

results = pd.DataFrame(ski.measure.regionprops_table(labeled_img, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
results['counts'] = results['mean_intensity'] * results['area']

results['file'] = fname

all_results.append(results)

For this processing pipeline we probably want to export our DAPI image, the label_image it found, our h3p image, the LoG image we used for peak finding, and then the peak_img for where it found the peaks.  To put these all together we can use np.array() which will combine the images along a new dimension.

In [23]:
print(DAPI.shape)
print(labeled_img.shape)
print(h3p.shape)
print(LoG.shape)
print(peak_img.shape)

combined_img = np.array([DAPI, labeled_img, h3p, LoG, peak_img])
print(combined_img.shape)

(2044, 2048)
(2044, 2048)
(2044, 2048)
(2044, 2048)
(2044, 2048)
(5, 2044, 2048)


If we want the image to be readable by ImageJ (which is convenient for dragging and dropping), then we need to convert it do single point floating.

In [24]:
combined_img = combined_img.astype(np.single)

We also need a name to save the file as, we are going to be lazy and just add an "f" to the end, making it *.tiff (meaning that our glob.glob() will not find it if we run things again).

In [25]:
output_filename = fname + 'f'
output_filename

'files\\48hpa\\Plate000_Well2_Object4.tif_projection.tiff'

Then we use ski.io.imsave() to save the file, we provide the imagej=True argument to make it readable by ImageJ, and the metadata={'axes':'CYX'} so that ImageJ knows what order the axes are in.

In [26]:
ski.io.imsave(output_filename, combined_img,  imagej=True, metadata={'axes': 'CYX'})

Now let's put it back into our loop and run it on all of the files.

In [27]:
all_results = []

for fname in file_names:
    print(fname)
    img = ski.io.imread(fname)

    DAPI = img[:, :, 2]
    smoothed = ndi.gaussian_filter(DAPI, 30)
    thresholded = smoothed > 150
    labeled_img, object_count = ndi.label(thresholded)
    labeled_img = sutils.remove_objects(labeled_img, area_min=1000, area_max=100000000)

    h3p = img[:,:,0]
    LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
    peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=1.5)
    peak_img = np.zeros_like(LoG)
    peak_img[peaks[:,0], peaks[:,1]] = 1

    results = pd.DataFrame(ski.measure.regionprops_table(labeled_img, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
    results['counts'] = results['mean_intensity'] * results['area']
    
    results['file'] = fname
    all_results.append(results)

    combined_img = np.array([DAPI, labeled_img, h3p, LoG, peak_img])
    combined_img = combined_img.astype(np.single)
    output_filename = fname + 'f'
    ski.io.imsave(output_filename, combined_img,  imagej=True, metadata={'axes': 'CYX'})

    

files\48hpa\Plate000_Well2_Object4.tif_projection.tif
files\48hpa\Plate000_Well2_Object6.tif_projection.tif
files\48hpa\Plate000_Well2_Object9.tif_projection.tif
files\intact\Plate000_Well1_Object3.tif_projection.tif
files\intact\Plate000_Well1_Object4.tif_projection.tif
files\intact\Plate000_Well1_Object6.tif_projection.tif


# Interactive Plots

Sometimes it is nice to be able to easily interact with our data.

This is relatively easy to do with plotly and napari.  If df['file] contains the path to our image we can write the function to open up the image when we click on the data point in the plot.

In [32]:


f=go.FigureWidget(
    px.violin(df, x='Timepoint', y='scaled_area', points='all',hover_data=['file'], width=600, title='Worm Area in um^2')
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        fname = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        print(fname)
        img = ski.io.imread(fname)
        viewer.layers.clear()
        viewer.add_image(img, channel_axis=-1)
 
for a in f.data:
    a.on_click(click_fn)

f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'box': {'visible': False},
              'customdata': array([['files\\48hpa\\Plate000_Well2_Object4.tif_projection.tif'],
                                   ['files\\48hpa\\Plate000_Well2_Object6.tif_projection.tif'],
                                   ['files\\48hpa\\Plate000_Well2_Object9.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object3.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object4.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object6.tif_projection.tif']],
                                  dtype=object),
              'hovertemplate': 'Timepoint=%{x}<br>scaled_area=%{y}<br>file=%{customdata[0]}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': '#636efa'},
              'name': '',
              'offsetgroup': '',
              'orientation':

files\intact\Plate000_Well1_Object6.tif_projection.tif
files\48hpa\Plate000_Well2_Object9.tif_projection.tif


Or we can have it open up the processed image that we just created, since all we did was add an 'f' to the end of the file.

In [None]:


f=go.FigureWidget(
    px.violin(df, x='Timepoint', y='scaled_area', points='all',hover_data=['file'], width=600, title='Worm Area in um^2')
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        fname = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        fname = fname + 'f'
        print(fname)
        img = ski.io.imread(fname)
        viewer.layers.clear()
        viewer.add_image(img, channel_axis=0)
 
for a in f.data:
    a.on_click(click_fn)

f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'box': {'visible': False},
              'customdata': array([['files\\48hpa\\Plate000_Well2_Object4.tif_projection.tif'],
                                   ['files\\48hpa\\Plate000_Well2_Object6.tif_projection.tif'],
                                   ['files\\48hpa\\Plate000_Well2_Object9.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object3.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object4.tif_projection.tif'],
                                   ['files\\intact\\Plate000_Well1_Object6.tif_projection.tif']],
                                  dtype=object),
              'hovertemplate': 'Timepoint=%{x}<br>scaled_area=%{y}<br>file=%{customdata[0]}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': '#636efa'},
              'name': '',
              'offsetgroup': '',
              'orientation':

files\intact\Plate000_Well1_Object6.tif_projection.tiff
