# selecting light-sheet fluorescence microscopy pixels for analysis

## loading data

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import glob, os, psutil, time
import seaborn

data_path = '../fits/lsfm/grid_quick/'
y_vars = np.load(data_path + 'y_vars.npy')
y_means = np.load(data_path + 'y_means.npy') # using variance only for now

## pixel set selection

In [None]:
bins = np.linspace(np.log10(y_vars.min()), np.log10(y_vars.max()), 50)

plt.figure(figsize=(16,5))

plt.subplot(1,3,1)
idx_large = np.where(y_vars.reshape(-1) > 8 * 1e-13)[0]
plt.hist(np.log10(y_vars.reshape(-1)), bins=bins)
plt.hold(True)
plt.hist(np.log10(y_vars.reshape(-1)[idx_large]), bins=bins)
plt.xlabel('log_{10} variance')
plt.ylabel('counts')
plt.legend(('all', 'selected'))
plt.title('large pixel set')

plt.subplot(1,3,2)
idx_medium = np.where(y_vars.reshape(-1) > 1e-12)[0]
plt.hist(np.log10(y_vars.reshape(-1)), bins=bins)
plt.hold(True)
plt.hist(np.log10(y_vars.reshape(-1)[idx_medium]), bins=bins)
plt.xlabel('log_{10} variance')
plt.ylabel('counts')
plt.legend(('all', 'selected'))
plt.title('medium pixel set')

plt.subplot(1,3,3)
idx_small = np.where(y_vars.reshape(-1) > 1e-11)[0]
plt.hist(np.log10(y_vars.reshape(-1)), bins=bins)
plt.hold(True)
plt.hist(np.log10(y_vars.reshape(-1)[idx_small]), bins=bins)
plt.xlabel('log_{10} variance')
plt.ylabel('counts')
plt.legend(('all', 'selected'))
plt.title('small pixel set')
plt.show()

print('large idx set size ',  len(idx_large))
print('medium idx set size ', len(idx_medium))
print('small idx set size ',  len(idx_small))



## locating selected pixels within the image stack

In [None]:
nx, ny, nz = 41, 1024, 2048
p_full = nx * ny * nz

idx_occ = np.zeros(p_full, dtype=bool)
idx_occ[idx_large] = True
occ_large = idx_occ.reshape(nx,ny,nz).copy()

idx_occ = np.zeros(p_full, dtype=bool)
idx_occ[idx_medium] = True
occ_medium = idx_occ.reshape(nx,ny,nz).copy()

idx_occ = np.zeros(p_full, dtype=bool)
idx_occ[idx_small] = True
occ_small = idx_occ.reshape(nx,ny,nz).copy()

rel_dens_large = np.zeros((nx))
rel_dens_medium = np.zeros((nx))
rel_dens_small = np.zeros((nx))

for i in range(nx):
    plt.figure(figsize=(16, 4))
    plt.subplot(1,4,1)
    plt.imshow(y_means[i,:,:], interpolation='None')
    plt.grid('off')
    plt.title('z = ' + str(i))
    plt.subplot(1,4,2)
    plt.imshow(occ_large[i,:,:], interpolation='None')
    plt.grid('off')
    rel_dens_large[i] = occ_large[i,:,:].sum()
    if i == 0:
        plt.title('large idx set, # = ' + str(rel_dens_large[i]))
    else:
        plt.title('z = ' + str(i) + ', # = ' + str(rel_dens_large[i]))
    plt.subplot(1,4,3)
    plt.imshow(occ_medium[i,:,:], interpolation='None')
    rel_dens_medium[i] = occ_medium[i,:,:].sum()
    if i == 0:
        plt.title('medium idx set, # = ' + str(rel_dens_medium[i]))
    else:
        plt.title('z = ' + str(i) + ', # = ' + str(rel_dens_medium[i]))
    plt.grid('off')
    plt.subplot(1,4,4)
    plt.imshow(occ_small[i,:,:], interpolation='None')
    rel_dens_small[i] = occ_small[i,:,:].sum()
    if i == 0:
        plt.title('small idx set, # = ' + str(rel_dens_small[i]))
    else:
        plt.title('z = ' + str(i) + ', # = ' + str(rel_dens_small[i]))
    plt.grid('off')
    plt.show()

print(rel_dens_large.sum())    
print(rel_dens_medium.sum())    
print(rel_dens_small.sum())    
rel_dens_large /= rel_dens_large.sum()
rel_dens_medium /= rel_dens_medium.sum()
rel_dens_small /= rel_dens_small.sum()

plt.figure(figsize=(16, 8))
plt.subplot(1,2,1)
plt.plot(np.arange(nx)+1, rel_dens_large, 'b')
plt.hold('on')
plt.plot(np.arange(nx)+1, rel_dens_medium, 'r')
plt.plot(np.arange(nx)+1, rel_dens_small, 'g')
plt.title('distr. of cells accross image planes')
plt.xlabel('z plane')
plt.ylabel('distr. # cells')

plt.subplot(1,2,2)
plt.plot(np.arange(nx)+1, np.cumsum(rel_dens_large), 'b')
plt.hold('on')
plt.plot(np.arange(nx)+1, np.cumsum(rel_dens_medium), 'r')
plt.plot(np.arange(nx)+1, np.cumsum(rel_dens_small), 'g')
plt.plot((0, nx), [0.5, 0.5], 'k--')
plt.plot((21, 21), [0, 1], 'r--')
plt.xlabel('z plane')
plt.ylabel('cum. distr. # cells')
plt.title('cum. distr. (plane z=21 is natural overlap)')
plt.show()


In [None]:
np.save(data_path + 'idx_large', idx_large)
np.save(data_path + 'idx_medium', idx_medium)
np.save(data_path + 'idx_small', idx_small)


# extracting selected cells
- original data is in frames per time
- for data analysis, we want to keep this 'time-major' (row-major for T x p matrices)
- warning: computing variances and means (axis=0) can be horribly slow this way (would be better in column-major)...

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import glob, os, psutil, time
import seaborn
from ssidid import progprint_xrange
import bz2

idx_str = 'medium'

data_path = '/media/marcel/636f7b46-1fd1-4600-b69e-86d2ed82002c/stitching/hankel/zebrafish/'
idx_curr = np.load(data_path + 'idx_' + idx_str + '.npy')

p, T = len(idx_curr),1200
print('(p,T)', (p,T))

In [None]:
mmap = True

if mmap:
    y = np.memmap(data_path+'y_' + idx_str, dtype=np.float, mode='w+', shape=(T,p))
else:
    y = np.NaN * np.ones((T,p), dtype=np.float)
    
for i in progprint_xrange(1200, perline=20):
    t = i 
    filename = '/media/marcel/636f7b46-1fd1-4600-b69e-86d2ed82002c/stitching/data/dOMR0/TM' + ("%05d" % t) + '_CM0_CHN00.stack.bz2'
    dfile = bz2.BZ2File(filename,compresslevel=1)
    y[i,:] = np.frombuffer(dfile.read(), dtype=np.float16)[idx_curr] 
    if mmap:
        del y # releases RAM, forces flush to disk
        y = np.memmap(data_path+'y_' + idx_str, dtype=np.float, mode='r+', shape=(T,p))

if mmap:
    del y # releases RAM, forces flush to disk
    y = np.memmap(data_path+'y_' + idx_str, dtype=np.float, mode='r', shape=(T,p))
else:
    np.save(data_path + 'y_' + idx_str, y)

In [None]:
y = np.memmap(data_path+'y_' + idx_str, dtype=np.float, mode='r', shape=(T,p))
yvar, ymean  = np.var(y,axis=0), np.mean(y,axis=0)

In [None]:
yz = np.memmap(data_path + 'y_' + idx_str + '_zscore', dtype=np.float, mode='w+', shape=(T,p))
for i in progprint_xrange(T, perline=20):
    yz[i,:] = (y[i,:] - ymean) / np.sqrt(yvar)
    if mmap:
        del y # releases RAM, forces flush to disk
        del yz
        y = np.memmap(data_path+'y_' + idx_str, dtype=np.float, mode='r', shape=(T,p))
        yz = np.memmap(data_path + 'y_' + idx_str + '_zscore', dtype=np.float, mode='r+', shape=(T,p))
del y
del yz