# Homework 4

# Setup

In [1]:
import numpy as np
import pandas as pd
import napari
import tifffile
import skimage as ski
import scipy.ndimage as ndi
import glob
import cellpose.models as models
import matplotlib.pyplot as plt
import cv2
import dask
import cellpose.models as models
import sutils
import plotly.graph_objs as go
import plotly.express as px
from scipy.stats import ttest_ind

import glob

In [2]:
viewer = napari.Viewer()

In [3]:
model = models.Cellpose(gpu=True, model_type='cyto')

# Homework

## Part 1:  Find in situ spots in image

Choose one of the files in files/batch_homework.  Use our peak finding tricks to find what you think are real peaks in the first channel of the image.  

In [4]:
fname = 'files/batch_homework/chamber1KLF5in488SOX8in647-1.tif'
img = ski.io.imread(fname)
img.shape

(3072, 3072, 3)

In [5]:
viewer.layers.clear()
viewer.add_image(img, channel_axis=2)

[<Image layer 'Image' at 0x230b75b1d50>,
 <Image layer 'Image [1]' at 0x230b75b2ec0>,
 <Image layer 'Image [2]' at 0x230b763ec20>]

In [6]:
h3p = img[:,:,0]
LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=5.5, num_peaks=1000000)  # I found threshold_abs needed to be above 5.5, but this is true only if a similar sigma is used for LoG
#peak_img = np.zeros_like(LoG)
#peak_img[peaks[:,0], peaks[:,1]] = 1

viewer.add_image(LoG, colormap='gray', blending='additive', contrast_limits=[0,1.5])
viewer.add_points(peaks, size=5)

<Points layer 'peaks' at 0x230c167c820>

If you chose an image from "chamber1", try your settings on an image from "chamber2", if the settings don't work well, adjust them until they work well for both.

In [7]:
fnameB = 'files/batch_homework/chamber2KLF5in488SOX8in647-1.tif'
img = ski.io.imread(fnameB)
img.shape

(3072, 3072, 3)

In [8]:
viewer.layers.clear()
viewer.add_image(img, channel_axis=2)

[<Image layer 'Image' at 0x230c17dffd0>,
 <Image layer 'Image [1]' at 0x230ad076b60>,
 <Image layer 'Image [2]' at 0x2311c58eaa0>]

In [9]:
h3p = img[:,:,0]
LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=5.5, num_peaks=1000000)  # I found threshold_abs needed to be above 5.5, but this is true only if a similar sigma is used for LoG
peak_img = np.zeros_like(LoG)
peak_img[peaks[:,0], peaks[:,1]] = 1

viewer.add_image(LoG, colormap='gray', blending='additive', contrast_limits=[0,1.5])
viewer.add_points(peaks, size=5)

<Points layer 'peaks' at 0x230c1559900>

## Part 2:  Segment cells using cellpose

Use cellpose to segment the cells in one of the images (should be able to use last week's homework to do so).

In [10]:
model = models.Cellpose(gpu=True, model_type='cyto')

In [12]:
cells, flows, styles, diams = model.eval(img, diameter=300, channels=[2,3], channel_axis=2, cellprob_threshold=-2, flow_threshold=1.0)
filtered_cells = sutils.remove_objects(cells, 50000, 10000000000)
shrunk_cells = sutils.shrink_labels(filtered_cells, shrinkage=40)
viewer.add_labels(shrunk_cells, name='Shrunk cells')

<Labels layer 'Shrunk cells' at 0x230ad4f5fc0>

## Part 3:  Create a table of spots per cell in an image

Use regionprops to calculate the number of spots per cell, and then pd.DataFrame to make a dataframe out of it.

In [13]:
results = pd.DataFrame(ski.measure.regionprops_table(shrunk_cells, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
results['counts'] = results['mean_intensity'] * results['area']
results.head()

Unnamed: 0,label,area,mean_intensity,counts
0,1,57769.0,0.00026,15.0
1,2,173090.0,2.3e-05,4.0
2,4,64750.0,3.1e-05,2.0
3,6,107958.0,0.000389,42.0
4,21,115401.0,8.7e-05,10.0


## Part 4:  Process all 8 images using glob

Use glob and a for loop to apply your in situ spot finder, cell segmenter, and table maker to all cells, and put together a master dataframe at the end that contains all the values.

MAKE SURE you are reasonably happy with your values above, as this will take a while to run.

In [14]:
all_results = []
for f in glob.glob('files/batch_homework/*'):
    img = ski.io.imread(f)
    h3p = img[:,:,0]
    LoG = -1000.0 * ndi.gaussian_laplace(h3p/1000.0, sigma=5)
    peaks = ski.feature.peak_local_max(LoG, min_distance=3, threshold_abs=5.5, num_peaks=1000000)  # I found threshold_abs needed to be above 5.5, but this is true only if a similar sigma is used for LoG
    peak_img = np.zeros_like(LoG)
    peak_img[peaks[:,0], peaks[:,1]] = 1

    cells, flows, styles, diams = model.eval(img, diameter=300, channels=[2,3], channel_axis=2, cellprob_threshold=-2, flow_threshold=1.0)
    filtered_cells = sutils.remove_objects(cells, 50000, 10000000000)
    shrunk_cells = sutils.shrink_labels(filtered_cells, shrinkage=40)

    results = pd.DataFrame(ski.measure.regionprops_table(shrunk_cells, intensity_image=peak_img, properties=['label', 'area', 'mean_intensity']))
    results['counts'] = results['mean_intensity'] * results['area']

    results['file'] = f
    all_results.append(results)

In [15]:
df = pd.concat(all_results)

In [17]:
df

Unnamed: 0,label,area,mean_intensity,counts,file
0,1,133282.0,0.000128,17.0,files/batch_homework\chamber1KLF5in488SOX8in64...
1,3,134508.0,0.000074,10.0,files/batch_homework\chamber1KLF5in488SOX8in64...
2,4,193714.0,0.000129,25.0,files/batch_homework\chamber1KLF5in488SOX8in64...
3,5,182534.0,0.000077,14.0,files/batch_homework\chamber1KLF5in488SOX8in64...
4,7,123346.0,0.000073,9.0,files/batch_homework\chamber1KLF5in488SOX8in64...
...,...,...,...,...,...
34,80,141012.0,0.000057,8.0,files/batch_homework\chamber2KLF5in488SOX8in64...
35,81,53999.0,0.000204,11.0,files/batch_homework\chamber2KLF5in488SOX8in64...
36,83,127399.0,0.000495,63.0,files/batch_homework\chamber2KLF5in488SOX8in64...
37,84,99566.0,0.000131,13.0,files/batch_homework\chamber2KLF5in488SOX8in64...


## Part 5:  Plot results

First create two new columns based on df['file']:  one that has just the filename without the path (eg we just want chamber1KLF5in488SOX8in647-1.tif).  The second one should just contain which chamber.  You will want to make use of df['file'].str.split('???').str[???] for both of these.

In [24]:
df['short_file'] = df['file'].str.split('\\').str[-1]
df['chamber'] = df['short_file'].str.split('chamber').str[1].str[0]

Make violin plots for the spots/area of all 8 files using the shortened name you created above, make sure you use points='all to visualize the individual cells.

In [25]:
px.violin(df, x='short_file', y='mean_intensity', points='all', width=1200)

## Part 6:  Aggregating

There is pretty clearly a difference between the two, but it is not being captured here.  The likely problem is that transfection sucks and should never be used for quantitative experiments:  only some of the cells appear to be different in some way.  One solution is to assume that only 20% of cells are transfected, and then take the most dense 20% as an upper bound.

.agg() takes names like 'mean' and 'median', but will also take whatever function we give it.  I have defined a function that returns the 80th percentile below, use this in .agg() to calculate the 80th percentile of each file's spot density.

In [27]:
## DO NOT DELETE IN STUDENT VERSION ###

def ptile(x):
    return np.percentile(x, 80)

In [28]:
agged = df.groupby(['chamber', 'short_file'])['mean_intensity'].agg(ptile).reset_index()
agged

Unnamed: 0,chamber,short_file,mean_intensity
0,1,chamber1KLF5in488SOX8in647-1.tif,0.000108
1,1,chamber1KLF5in488SOX8in647-2.tif,9.2e-05
2,1,chamber1KLF5in488SOX8in647-3.tif,8.2e-05
3,1,chamber1KLF5in488SOX8in647-4.tif,0.000107
4,2,chamber2KLF5in488SOX8in647-1.tif,0.00025
5,2,chamber2KLF5in488SOX8in647-2.tif,0.000203
6,2,chamber2KLF5in488SOX8in647-3.tif,0.000332
7,2,chamber2KLF5in488SOX8in647-4.tif,0.000289


Now plot the results of the agged version in a box or violin plot.

In [29]:
px.box(agged, x='chamber', y='mean_intensity', points='all', width=600)

## Part 7: Statistics

Calculate a p-value for our experiment, assuming each image is an independent experiment.

In [30]:
group1 = agged[agged['chamber']=='1']['mean_intensity']
group2 = agged[agged['chamber']=='2']['mean_intensity']

ttest_ind(group1, group2)


TtestResult(statistic=-6.100448558151106, pvalue=0.0008839080892721419, df=6.0)