In [29]:
%matplotlib inline
import shutil, os, sys, glob
import numpy as np
from matplotlib import pyplot as plt
import ipyvolume as ipv
#
sys.path.append('/home/fpoitevi/notebooks/cryo_home/slaclab/cryoEM-notebooks/src/')
import cryoemio
import imutils
import dataviz
import mrcutils

# some useful paths and parameters

In [2]:
input_dir = '/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/LocalRes/bin2_of_bin6mb_B_fscw'
body_dir  = '/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw'
# files
keyword='bodies_bin2_of_bin6_mbB_'
#mrc2diffpy   = bin_dir+'/mrc2diff.py'
#mrc2segpy    = bin_dir+'/mrc2seg.py'
input_mrc    = input_dir+'/relion_locres_filtered.mrc'
output_mrc   = body_dir+'/'+keyword+'diff.mrc'
output_seg   = body_dir+'/'+keyword+'seg.mrc'
output_mask  = body_dir+'/'+keyword
# parameters
std_lo = 1
std_hi = 3
sigma_lopass = 4
presegger_sdLevel  = 3
segger_nsteps   = 4
segger_stepsize = 2
segger_minregionsize = 100
segger_mincontactsize = 5
mask_blur = 2.0
#

# load map and retrieve standard deviation

In [3]:
data = mrcutils.mrc2data(input_mrc)
input_std = np.std(data)
print(input_std)

0.024938045


# visualize effect of thresholding

In [5]:
data_thresh = mrcutils.mrc_select(input_mrc, mode='above_value', value=1*input_std)

In [13]:
ipv.quickvolshow(data_thresh, level=[1*input_std, 3*input_std], opacity=0.03, level_width=1*input_std, data_min=np.min(data_thresh), data_max=np.max(data_thresh))

VBox(children=(VBox(children=(HBox(children=(Label(value='levels:'), FloatSlider(value=0.02493804506957531, ma…

# visualize effect of binarizing

In [16]:
mask = mrcutils.data2mask(data_thresh, sigma_blur=0, threshold=0.1)
ipv.quickvolshow(data_thresh, level=[0, 1], opacity=0.03)

VBox(children=(VBox(children=(HBox(children=(Label(value='levels:'), FloatSlider(value=0.0, max=1.0, step=0.00…

# define bodies: segment differential thresholding map

The idea is to
- blur mask_dry a lot
- blur mask_fat a little
- define the main body as the intersection of both
- define the other bodies as what remains of mask_fat after removing the intersection
- then segment them

In other words:

$B_0 = M_{fat} \cap M_{dry}$

$\bigcup_{i>1}^{K} B_{i} = M_{fat} \setminus B_0 $

In [17]:
data_dry = mrcutils.mrc_select(input_mrc, mode='above_value', value=5*input_std)
mask_dry = mrcutils.data2mask(data_dry, sigma_blur=15, threshold=0.01)
data_fat = mrcutils.mrc_select(input_mrc, mode='above_value', value=1*input_std)
mask_fat = mrcutils.data2mask(data_fat, sigma_blur=1, threshold=0.1)

In [18]:
body0 = np.minimum(mask_dry,mask_fat)
bodyK = mask_fat - body0

In [19]:
ipv.quickvolshow(body0, level=[0, 1], opacity=0.03)

VBox(children=(VBox(children=(HBox(children=(Label(value='levels:'), FloatSlider(value=0.0, max=1.0, step=0.00…

In [20]:
ipv.quickvolshow(bodyK, level=[0, 1], opacity=0.03)

VBox(children=(VBox(children=(HBox(children=(Label(value='levels:'), FloatSlider(value=0.0, max=1.0, step=0.00…

In [21]:
mrcutils.data2mrc(output_mask+'body_0.mrc',body0,mrc_template=input_mrc)
mrcutils.data2mrc(output_mask+'body_K.mrc',bodyK,mrc_template=input_mrc)

In [22]:
import skimage.morphology as morphology
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from scipy import ndimage
fp=60
distance = ndimage.distance_transform_edt(bodyK)
local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((fp,fp,fp)), labels=bodyK)
markers = morphology.label(local_maxi)
labels_ws = watershed(-distance, markers, mask=bodyK)

In [27]:
ipv.quickvolshow(labels_ws, level=[0, float(np.max(labels_ws))], opacity=0.03)

VBox(children=(VBox(children=(HBox(children=(Label(value='levels:'), FloatSlider(value=0.0, max=1.0, step=0.00…

In [25]:
mrcutils.data2mrc(output_mask+'body_K_watershed_seg.mrc',labels_ws,mrc_template=input_mrc)
# break down: one mask per segment
mrcutils.seg2mask(output_mask+'body_K_watershed_seg.mrc', output_mask+'body_K_watershed', sigma_blur = mask_blur,sort='volume',verbose=True)

  return array(a, dtype, copy=False, order=order)


/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed1.mrc > volume = 925960.0
/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed2.mrc > volume = 728127.0
/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed3.mrc > volume = 383030.0
/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed4.mrc > volume = 221936.0
/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed5.mrc > volume = 206512.0
/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_K_watershed6.mrc > volume = 175584.0
/gpfs/slac/cryo/fs1/g/

At that point, a visual inspection (in Chimera) leads to following decision:
- bodies 0+5+8+10+11+13+14+15+16 will be merged into body 0
- body 1 is fine
- bodies 2+3+4 will be merged into body 2
- bodies 6+7 will be merged into body 3
- body 9 will be renamed body 4
- body 12 will be renamed body 5

In [34]:
# final body 0 
shutil.copyfile(output_mask+'body_0.mrc', 'tmp.mrc')
for i in [5,8,10,11,13,14,15,16]:
    mrcutils.mrc_algebra('tmp.mrc',output_mask+'body_K_watershed'+str(i)+'.mrc','tmp2.mrc')
    os.remove('tmp.mrc')
    os.rename('tmp2.mrc', 'tmp.mrc')
mrcutils.mrc2mask('tmp.mrc', output_mask+'body_0_final.mrc', sigma_blur=0., threshold=0.1)
os.remove('tmp.mrc')
# final body 1
shutil.copyfile(output_mask+'body_K_watershed1.mrc', output_mask+'body_1_final.mrc')
# final body 2
shutil.copyfile(output_mask+'body_K_watershed2.mrc', 'tmp.mrc')
for i in [3,4]:
    mrcutils.mrc_algebra('tmp.mrc',output_mask+'body_K_watershed'+str(i)+'.mrc','tmp2.mrc')
    os.remove('tmp.mrc')
    os.rename('tmp2.mrc', 'tmp.mrc')
mrcutils.mrc2mask('tmp.mrc', output_mask+'body_2_final.mrc', sigma_blur=0., threshold=0.1)
os.remove('tmp.mrc')
# final body 3
shutil.copyfile(output_mask+'body_K_watershed6.mrc', 'tmp.mrc')
for i in [7]:
    mrcutils.mrc_algebra('tmp.mrc',output_mask+'body_K_watershed'+str(i)+'.mrc','tmp2.mrc')
    os.remove('tmp.mrc')
    os.rename('tmp2.mrc', 'tmp.mrc')
mrcutils.mrc2mask('tmp.mrc', output_mask+'body_3_final.mrc', sigma_blur=0., threshold=0.1)
os.remove('tmp.mrc')
# final body 4
shutil.copyfile(output_mask+'body_K_watershed9.mrc', output_mask+'body_4_final.mrc')
# final body 5
shutil.copyfile(output_mask+'body_K_watershed12.mrc', output_mask+'body_5_final.mrc')

'/gpfs/slac/cryo/fs1/g/ML/cryoEM-notebooks/data/20181005-rib-TEM4/bodymaker/bin2_of_bin6mb_B_fscw/bodies_bin2_of_bin6_mbB_body_5_final.mrc'