In [None]:
"""

We want to detect the situation of staining spillover from one cell
with real expression of the stain -- marked around the whole perimeter --
to a neighbor cell -- marked only in one area.

"""

%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import h5py

%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

import cv2

In [None]:
!ls /storage/codex/datasets_v1/*reg*.hdf5

In [None]:
h5f = h5py.File('/storage/codex/datasets_v1/210113_Breast_Cassette11_reg1.hdf5', 'r')
print(h5f.keys())

In [None]:
print(h5f['cells'].keys())

In [None]:
print(h5f['meta'].keys())

In [None]:
print(h5f['meta']['nuclear_masks'].shape)

In [None]:
kernel = np.ones((3,3))

def staining_border_nonzero(h5f, channel, i, kernel=kernel):
    x = h5f['cells'][channel][i,...]
    m = h5f['meta']['nuclear_masks'][i,...]
    
    md = cv2.dilate(m.astype(np.uint8),kernel,2)
    me = cv2.erode(m.astype(np.uint8),kernel,1)
#     border=md-m
#     inside=m-me
    border_signal=x[(md-me)>0]
    return np.sum(border_signal > 0)/len(border_signal)


In [None]:
cell_ids = np.array([x.decode('utf-8') for x in h5f['meta']['Cell_IDs'][:]])
# cell_ids

In [None]:
ncells = len(h5f['meta']['Cell_IDs'][:])
border_nz = np.array([staining_border_nonzero(h5f, 'CD3e', i) for i in range(ncells)])

In [None]:
cell_means = h5f['cell_intensity']['CD3e'][:]

In [None]:
inds = np.arange(ncells)
bs = border_nz.copy()
# bs[cell_means==0] = 0
bs[bs<0.2] = 0
inds_srt = inds[np.argsort(bs)]
inds_srt_r = inds[np.argsort(-bs)]
print((bs>0).sum())

In [None]:
offs=(bs==0).sum()
bs[inds_srt[offs]]

In [None]:
fig,axs=plt.subplots(5,5,figsize=(8,8),dpi=90)
axs=axs.ravel()
offs=(bs==0).sum()+6000
# offs=5500
# offs=0
print(offs)
for j,ax in enumerate(axs):
    ji = inds_srt[j+offs]
    print(bs[ji])
    m = h5f['meta']['nuclear_masks'][ji,...].astype(np.uint8)
    
    w = cv2.dilate(m,kernel,2)
    e = cv2.erode(m,kernel,1)
    
    img = h5f['cells']['CD3e'][ji,...]
    img[(w-e)>0] = img.max()
    
    ax.matshow(img)
    ax.set_xticks([])
    ax.set_yticks([])

In [None]:
bz = bs[inds_srt_r]
bz = bz[cell_means[inds_srt_r]>0]
plt.scatter(np.arange(len(bz)), bz,s=1)
plt.xlabel('sorted order')
plt.ylabel('percent nucleus ring >0')