In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from scipy import stats
from skimage.external import tifffile
from skimage.external.tifffile import imread

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data'

In [3]:
file=f'{bigDir}\cellinfo_200203_v6.csv'

In [4]:
data=pd.read_csv(file)

# data visualizations 

In [5]:
data=data.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

In [6]:
data.head()

Unnamed: 0,file,original cell name,movie,last frame movie position x,last frame movie position y,ab1 (MCM),ab1 ch,ab2 (heterochromatin),ab2 ch,DAPI ch,...,outer_DAPI_total,nuc_DAPI_het,inner_DAPI_het,outer_DAPI_het,nuc_DAPI_eu,inner_DAPI_eu,outer_DAPI_eu,day,physical_age,mol_age_mod
0,20200203_cell_00.ciz,20200203-06-13-63X 16bit,20200203 simcmxy06,647.518,153.039,MCM3,1.0,HP1beta,2.0,3.0,...,1781812000.0,942249500.0,553948216.0,388301238.0,1841376000.0,447865672.0,1393510000.0,d20200203,2.333333,0.225
1,20200203_cell_01.ciz,20200203-06-25-63X 16bit,20200203 simcmxy06,1490.165,332.591,MCM3,1.0,HP1beta,2.0,3.0,...,1768828000.0,733928600.0,333304109.0,400624497.0,1888008000.0,519804886.0,1368203000.0,d20200203,1.333333,0.166
2,20200203_cell_02.ciz,20200203-06-27-63X 16bit,20200203 simcmxy06,1560.535,380.034,MCM3,1.0,HP1beta,2.0,3.0,...,1844853000.0,744758800.0,334218140.0,410540672.0,1937367000.0,503054477.0,1434312000.0,d20200203,1.333333,0.166
3,20200203_cell_03.ciz,20200203-06-29-63X 16bit,20200203 simcmxy06,930.763,391.252,MCM3,1.0,HP1beta,2.0,3.0,...,2026255000.0,1034361000.0,571961971.0,462399349.0,1946238000.0,382382236.0,1563856000.0,d20200203,6.833333,0.684
4,20200203_cell_04.ciz,20200203-06-51-63X 16bit,20200203 simcmxy06,1653.035,635.701,MCM3,1.0,HP1beta,2.0,3.0,...,2058831000.0,1055251000.0,627925856.0,427324886.0,2059953000.0,428447160.0,1631506000.0,d20200203,4.166667,0.376


In [7]:
Luc=data.loc[(data.group=='siLuc'),:].copy()

In [8]:
MCM3=data.loc[(data.group=='siMCM3'),:].copy()

# violin plot of 'group vs nuc_mcm_total'

In [9]:
%matplotlib notebook
sn.violinplot(x='group',y='nuc_mcm_total',data=data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb16c03240>

In [10]:
%matplotlib notebook
sn.scatterplot(x='group',y='nuc_mcm_total', data=data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb16cb5ba8>

In [11]:
# normalizes the mcm signal to the volume 
%matplotlib notebook
sn.violinplot(x='group',y=data.nuc_mcm_total/data.nuc_vol_total,data=data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb17259e48>

## mean of nuc_mcm_total for two different groups

In [12]:
Luc.nuc_mcm_total.mean()

1505824099.162162

In [13]:
MCM3.nuc_mcm_total.mean()

1206158732.0277777

In [14]:
Luc.nuc_mcm_total.mean()>MCM3.nuc_mcm_total.mean()

True

## concern on the result from the violin plot

__5/18__
Liu has clarified that channels for this experiment was recorded differented in the csv files. I updated the signals and looked into the mcm signals for both groups. The violin plot shows the expected result.


# finding the top 5 cells that has the highest mcm signal in nucleus

Among earlyG1 cells for which we changed its molecular ages, we found a concerning pattern where these earlyG1 cells have too high mcm signals for entire nucleus. We decided to take a look into these cells. 

<br>

First, I took a look into the top 5 cells with highest mcm signals. 

In [15]:
Luc.loc[(Luc.category=='earlyG1'),('nuc_mcm_total')].sort_values(ascending=False).head()

27    1.539085e+09
28    1.529605e+09
54    1.489365e+09
0     1.424996e+09
66    1.354548e+09
Name: nuc_mcm_total, dtype: float64

I tried another sorting that will show the top 8 cells - these cells basically have mcm signal of higher than 1e9. 

In [16]:
Luc.loc[(Luc.category=='earlyG1'),('nuc_mcm_total')].sort_values(ascending=False).head(n=8)

27    1.539085e+09
28    1.529605e+09
54    1.489365e+09
0     1.424996e+09
66    1.354548e+09
50    1.315056e+09
35    1.280643e+09
67    1.267028e+09
Name: nuc_mcm_total, dtype: float64

but since the top 5 cells have mcm signals higher than 1.1e9, I will look into what is happening. 

## *images for these 5 cells*

In [17]:
top5=(Luc.loc[((Luc.category=='earlyG1')&(Luc.nuc_mcm_total>1.1*(10^9))),:]).sort_values(by='nuc_mcm_total',ascending=False).head()

## #1. 

In [34]:
file1=top5.iloc[0]['file'].replace('ciz','tif')
fileDir=f'{bigDir}\data_tiff\\{file1}'
img1=imread(fileDir)
print(img1.shape) # channel 1 = mcm // channel 2 = hp1beta // channel 3 = dapi

maskfile=file1.replace('.tif','_nucleus.tif')
maskDir=f'{bigDir}\segmentation_nucleus_Otsu\\{maskfile}'
mask1=imread(maskDir)
print(mask1.shape)
print(maskDir)

(22, 3, 380, 380)
(22, 380, 380)
Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data\segmentation_nucleus_Otsu\20200203_cell_27_nucleus.tif


In [35]:
%matplotlib notebook
plt.imshow(img1[15,0,:,:])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb18ac8be0>

In [36]:
%matplotlib notebook
plt.imshow(mask1[15])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb18dd0780>

## #2. 

In [37]:
file1=top5.iloc[1]['file'].replace('ciz','tif')
fileDir=f'{bigDir}\data_tiff\\{file1}'
img1=imread(fileDir)
print(img1.shape) # channel 1 = mcm // channel 2 = hp1beta // channel 3 = dapi

maskfile=file1.replace('.tif','_nucleus.tif')
maskDir=f'{bigDir}\segmentation_nucleus_Otsu\\{maskfile}'
mask1=imread(maskDir)
print(mask1.shape)
print(maskDir)

(22, 3, 380, 380)
(22, 380, 380)
Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data\segmentation_nucleus_Otsu\20200203_cell_28_nucleus.tif


In [38]:
%matplotlib notebook
plt.imshow(img1[15,0,:,:])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb18e0e358>

In [39]:
%matplotlib notebook
plt.imshow(mask1[15])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb193a3c88>

## #3. 

In [40]:
file1=top5.iloc[2]['file'].replace('ciz','tif')
fileDir=f'{bigDir}\data_tiff\\{file1}'
img1=imread(fileDir)
print(img1.shape) # channel 1 = mcm // channel 2 = hp1beta // channel 3 = dapi

maskfile=file1.replace('.tif','_nucleus.tif')
maskDir=f'{bigDir}\segmentation_nucleus_Otsu\\{maskfile}'
mask1=imread(maskDir)
print(mask1.shape)
print(maskDir)

(23, 3, 380, 380)
(23, 380, 380)
Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data\segmentation_nucleus_Otsu\20200203_cell_54_nucleus.tif


In [41]:
%matplotlib notebook
plt.imshow(img1[15,0,:,:])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb1968f780>

In [42]:
%matplotlib notebook
plt.imshow(mask1[15])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb19974f60>

## #4. 

In [43]:
file1=top5.iloc[3]['file'].replace('ciz','tif')
fileDir=f'{bigDir}\data_tiff\\{file1}'
img1=imread(fileDir)
print(img1.shape) # channel 1 = mcm // channel 2 = hp1beta // channel 3 = dapi

maskfile=file1.replace('.tif','_nucleus.tif')
maskDir=f'{bigDir}\segmentation_nucleus_Otsu\\{maskfile}'
mask1=imread(maskDir)
print(mask1.shape)
print(maskDir)

(22, 3, 380, 380)
(22, 380, 380)
Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data\segmentation_nucleus_Otsu\20200203_cell_00_nucleus.tif


In [44]:
%matplotlib notebook
plt.imshow(img1[15,0,:,:])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb19368e48>

In [45]:
%matplotlib notebook
plt.imshow(mask1[15])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb19f1f5c0>

## #5. 

In [46]:
file1=top5.iloc[4]['file'].replace('ciz','tif')
fileDir=f'{bigDir}\data_tiff\\{file1}'
img1=imread(fileDir)
print(img1.shape) # channel 1 = mcm // channel 2 = hp1beta // channel 3 = dapi

maskfile=file1.replace('.tif','_nucleus.tif')
maskDir=f'{bigDir}\segmentation_nucleus_Otsu\\{maskfile}'
mask1=imread(maskDir)
print(mask1.shape)
print(maskDir)

(24, 3, 380, 380)
(24, 380, 380)
Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data\segmentation_nucleus_Otsu\20200203_cell_66_nucleus.tif


In [47]:
%matplotlib notebook
plt.imshow(img1[15,0,:,:])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb196bacc0>

In [48]:
%matplotlib notebook
plt.imshow(mask1[15])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x1cb1a4f1748>

## thoughts

From the actual cell images and nucleus masks, these cells (with high mcm signals) do not seem to have any problems with segmentations. The mask does not include any concerning backgrounds - although it can raise a concern for covering too much area. However, from my perspective, the segmentations do not seem to be the primary factor for the cell's high mcm signals.