# Pre-analysis progress checker notebook

Check how many experiments & positions have 

- alignment 
- segmentation (and if complete) 
- tracks

In [4]:
import os
import re
import glob
import pandas as pd
from natsort import natsorted
pd.set_option('display.max_rows', 200) 
# def sort(list_):
#     list_ = sorted(list_, 
#     key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
#     return list_
    

In [5]:
root_dir = '/home/nathan/data/kraken/ras/'

In [6]:
expt_list = sorted([expt for expt in os.listdir(root_dir) 
                    if 'ND' in expt and os.path.isdir(os.path.join(root_dir, expt))], 
                    key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
print(expt_list)

['ND0013', 'ND0014', 'ND0016', 'ND0017', 'ND0018', 'ND0019', 'ND0020', 'ND0021', 'ND0022', 'ND0023', 'ND0024', 'ND0025']


In [7]:
progress = []
for expt in expt_list:
    ### sorted position list
    pos_list = sorted([pos for pos in os.listdir(os.path.join(root_dir, expt)) 
                       if 'Pos' in pos 
                       and os.path.isdir(os.path.join(root_dir, expt, pos))],
                       key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
    for pos in pos_list:
        ### does any alignment tensors exist?
        directory = os.path.join(root_dir, expt, pos)
        directory_files = os.listdir(directory)
        tensor_file = len([fn for fn in directory_files if 'transform_tensor' in fn and '.npy' in fn])

        ## do masks exist
        image_dir = os.path.join(directory, f'{pos}_images')
        n_masks = len(glob.glob(f'{image_dir}/*channel099*.tif'))
        n_imgs = len(glob.glob(f'{image_dir}/*channel001*.tif'))
        
        if n_imgs == 0:
            image_files = 0
        else:
            image_files = int((n_masks/n_imgs)*100)

        ## do objects exist
        obj_files = len([fn for fn in directory_files if 'obj' in fn and '.h5' in fn])
        
        ## do tracks exist 
        track_files = len([fn for fn in directory_files if 'tracks' in fn and '.h5' in fn])
        
        ## append data
        progress.append((expt, pos, tensor_file, image_files, obj_files, track_files, n_imgs))
### turn into df
progress_df = pd.DataFrame(progress, columns=['Experiment', 'Position', 'Transformations', 'Masks (% complete)', 'Object files','Tracks', 'Frames'])

In [8]:
progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0013,Pos0,0,0,0,0,2785
1,ND0013,Pos1,0,0,0,0,2777
2,ND0013,Pos2,0,0,0,0,2775
3,ND0013,Pos3,0,0,0,0,2788
4,ND0013,Pos4,0,0,0,0,2780
5,ND0013,Pos5,0,0,0,0,2788
6,ND0013,Pos6,0,0,0,0,2787
7,ND0013,Pos7,1,100,3,1,2797
8,ND0013,Pos8,1,100,0,0,2796
9,ND0013,Pos9,1,0,0,0,2791


In [9]:
progress_df.loc[progress_df['Masks (% complete)'] == 100.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
7,ND0013,Pos7,1,100,3,1,2797
8,ND0013,Pos8,1,100,0,0,2796
19,ND0014,Pos5,1,100,3,1,1425
20,ND0014,Pos6,1,100,3,1,1432
21,ND0014,Pos7,1,100,3,1,1428
22,ND0014,Pos8,1,100,3,1,1427
23,ND0014,Pos9,1,100,3,1,1429
35,ND0016,Pos7,1,100,3,1,3457
45,ND0017,Pos3,1,100,3,1,1316
46,ND0017,Pos4,1,100,0,0,1329


In [10]:
import numpy as np

In [11]:
np.sum(progress_df.loc[progress_df['Masks (% complete)'] != 0.0]['Frames'])

53319

In [56]:
progress_df.loc[progress_df['Masks (% complete)'] != 0.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
7,ND0013,Pos7,1,100,3,1,2797
8,ND0013,Pos8,1,100,0,0,2796
19,ND0014,Pos5,1,100,3,1,1425
20,ND0014,Pos6,1,100,3,1,1432
21,ND0014,Pos7,1,100,3,1,1428
22,ND0014,Pos8,1,100,3,1,1427
23,ND0014,Pos9,1,100,3,1,1429
35,ND0016,Pos7,1,100,3,1,3457
51,ND0017,Pos9,1,100,3,1,1333
52,ND0017,Pos10,1,55,3,1,1326


# checking masks ch98

In [12]:
len(glob.glob('/home/nathan/data/kraken/ras/ND****/Pos*/Pos*_images/*channel098*'))

31423

In [13]:
files = glob.glob('/home/nathan/data/kraken/ras/ND****/Pos*/Pos*_images/*channel098*')

In [14]:
files[0]

'/home/nathan/data/kraken/ras/ND0013/Pos8/Pos8_images/img_channel098_position008_time000000158_z000.tif'

In [57]:
progress = []
for expt in expt_list:
    ### sorted position list
    pos_list = sorted([pos for pos in os.listdir(os.path.join(root_dir, expt)) 
                       if 'Pos' in pos 
                       and os.path.isdir(os.path.join(root_dir, expt, pos))],
                       key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
    for pos in pos_list:
        ### does any alignment tensors exist?
        directory = os.path.join(root_dir, expt, pos)
        directory_files = os.listdir(directory)
        tensor_file = len([fn for fn in directory_files if 'transform_tensor' in fn and '.npy' in fn])

        ## do masks exist
        image_dir = os.path.join(directory, f'{pos}_images')
        n_masks = len(glob.glob(f'{image_dir}/*channel098*.tif'))
        n_imgs = len(glob.glob(f'{image_dir}/*channel001*.tif'))
        
        if n_imgs == 0:
            image_files = 0
        else:
            image_files = int((n_masks/n_imgs)*100)

        ## do objects exist
        obj_files = len([fn for fn in directory_files if 'obj' in fn and '.h5' in fn])
        
        ## do tracks exist 
        track_files = len([fn for fn in directory_files if 'tracks' in fn and '.h5' in fn])
        
        ## append data
        progress.append((expt, pos, tensor_file, image_files, obj_files, track_files, n_imgs))
### turn into df
progress_df = pd.DataFrame(progress, columns=['Experiment', 'Position', 'Transformations', 'Masks (% complete)', 'Object files','Tracks', 'Frames'])

In [58]:
progress_df.loc[progress_df['Masks (% complete)'] != 0.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
7,ND0013,Pos7,1,100,3,1,2797
8,ND0013,Pos8,1,100,0,0,2796
19,ND0014,Pos5,1,100,3,1,1425
20,ND0014,Pos6,1,100,3,1,1432
21,ND0014,Pos7,1,100,3,1,1428
22,ND0014,Pos8,1,100,3,1,1427
23,ND0014,Pos9,1,100,3,1,1429
35,ND0016,Pos7,1,100,3,1,3457
51,ND0017,Pos9,1,100,3,1,1333
52,ND0017,Pos10,1,100,3,1,1326


In [59]:
np.sum(progress_df['Frames'])

164025

# number of ch98

In [20]:
np.sum(progress_df.loc[progress_df['Masks (% complete)'] != 0.0]['Frames'])

31422

In [21]:
from skimage.io import imread

In [22]:
from tqdm.auto import tqdm

In [34]:
unprocessed_files = []
for fn in tqdm(files):
    img = imread(fn)
    if 3 not in img:
        unprocessed_files.append(fn)

  0%|          | 0/31423 [00:00<?, ?it/s]

# number that need processing, negligible difference, will process all

In [36]:
len(unprocessed_files)

31384