# Pre-analysis progress checker notebook

Check how many experiments & positions have 

- alignment 
- segmentation (and if complete) 
- tracks

In [1]:
import os
import re
import glob
import pandas as pd
import numpy as np
from natsort import natsorted
pd.set_option('display.max_rows', 200) 
# def sort(list_):
#     list_ = sorted(list_, 
#     key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
#     return list_
    

In [2]:
root_dir = '/home/nathan/data/kraken/ras/'

In [3]:
expt_list = sorted([expt for expt in os.listdir(root_dir) 
                    if 'ND' in expt and os.path.isdir(os.path.join(root_dir, expt))], 
                    key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
print(expt_list)

['ND0013', 'ND0014', 'ND0016', 'ND0017', 'ND0018', 'ND0019', 'ND0020', 'ND0021', 'ND0022', 'ND0023', 'ND0024', 'ND0025']


In [4]:
progress_df = pd.DataFrame()
progress = []
for expt in expt_list:
    if expt == 'ND0013' or expt == 'ND0014' or expt == 'ND0023': ###nd23 is v short
        continue
    ### sorted position list
    pos_list = sorted([pos for pos in os.listdir(os.path.join(root_dir, expt)) 
                       if 'Pos' in pos 
                       and os.path.isdir(os.path.join(root_dir, expt, pos))],
                       key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
    for pos in pos_list:
        ### does any alignment tensors exist?
        directory = os.path.join(root_dir, expt, pos)
        directory_files = os.listdir(directory)
        tensor_file = len([fn for fn in directory_files if 'transform_tensor' in fn and '.npy' in fn])

        ## do masks exist
        image_dir = os.path.join(directory, f'{pos}_images')
        n_masks = len(glob.glob(f'{image_dir}/*channel099*.tif'))
        n_imgs = len(glob.glob(f'{image_dir}/*channel001*.tif'))
        
        if n_imgs == 0:
            image_files = 0
        else:
            image_files = int((n_masks/n_imgs)*100)

        ## do objects exist
        obj_files = len([fn for fn in directory_files if 'obj' in fn and '.h5' in fn])
        ## do tracks exist 
        track_files = len([fn for fn in directory_files if 'tracks' in fn and '.h5' in fn])
        
        ## append data
        progress.append((expt, pos, tensor_file, image_files, obj_files, track_files, n_imgs))
### turn into df
progress_df = pd.DataFrame(progress, columns=['Experiment', 'Position', 'Transformations', 'Masks (% complete)', 'Object files','Tracks', 'Frames'])

In [5]:
progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0016,Pos0,0,0,0,0,3470
1,ND0016,Pos1,0,0,0,0,3462
2,ND0016,Pos2,0,0,0,0,3464
3,ND0016,Pos3,0,0,0,0,3458
4,ND0016,Pos4,0,0,0,0,3454
5,ND0016,Pos5,0,0,0,0,3456
6,ND0016,Pos6,0,0,0,0,3461
7,ND0016,Pos7,1,100,3,1,3457
8,ND0016,Pos8,1,100,3,1,3459
9,ND0016,Pos9,0,0,0,0,3454


#### and for the server 

In [6]:
root_dir = '/run/user/1000/gvfs/smb-share:server=lowe-sn00.biochem.ucl.ac.uk,share=lowegrp/Data/Nathan/kraken/ras/preprocessed/'

In [7]:
expt_list = sorted([expt for expt in os.listdir(root_dir) 
                    if 'ND' in expt and os.path.isdir(os.path.join(root_dir, expt))], 
                    key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
print(expt_list)

['ND0010_test', 'ND0010', 'ND0011', 'ND0012', 'ND0013', 'ND0014', 'ND0016', 'ND0017', 'ND0018', 'ND0019', 'ND0020', 'ND0021', 'ND0022', 'ND0023', 'ND0024', 'ND0025']


In [8]:
server_progress_df = pd.DataFrame()
progress = []
for expt in expt_list:
    if 'ND0010' in expt or 'ND0011' in expt or 'ND0012' in expt or 'ND0024' in expt or 'ND0025' in expt:
        continue
    ### sorted position list
    pos_list = sorted([pos for pos in os.listdir(os.path.join(root_dir, expt)) 
                       if 'Pos' in pos 
                       and os.path.isdir(os.path.join(root_dir, expt, pos))],
                       key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
    for pos in pos_list:
        ### does any alignment tensors exist?
        directory = os.path.join(root_dir, expt, pos)
        directory_files = os.listdir(directory)
        tensor_file = len([fn for fn in directory_files if 'transform_tensor' in fn and '.npy' in fn])

        ## do masks exist
        image_dir = os.path.join(directory, f'{pos}_images')
        n_masks = len(glob.glob(f'{image_dir}/*channel099*.tif'))
        n_imgs = len(glob.glob(f'{image_dir}/*channel001*.tif'))
        
        if n_imgs == 0:
            image_files = 0
        else:
            image_files = int((n_masks/n_imgs)*100)

        ## do objects exist
        obj_files = len([fn for fn in directory_files if 'obj' in fn and '.h5' in fn])
        
        ## do tracks exist 
        track_files = len([fn for fn in directory_files if 'tracks' in fn and '.h5' in fn])
        
        ## append data
        progress.append((expt, pos, tensor_file, image_files, obj_files, track_files, n_imgs))
### turn into df
server_progress_df = pd.DataFrame(progress, columns=['Experiment', 'Position', 'Transformations', 'Masks (% complete)', 'Object files','Tracks', 'Frames'])

In [9]:
server_progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0013,Pos0,0,0,0,0,2785
1,ND0013,Pos1,0,0,0,0,2777
2,ND0013,Pos2,0,0,0,0,2775
3,ND0013,Pos3,1,100,0,0,2788
4,ND0013,Pos4,1,100,0,0,2780
5,ND0013,Pos5,1,100,0,0,2788
6,ND0013,Pos6,1,26,0,0,2794
7,ND0013,Pos7,1,100,0,0,2797
8,ND0013,Pos8,1,100,0,0,2796
9,ND0013,Pos9,1,100,0,0,2791


#### now append together but give the server progress priority as there are experiments complete on the server that are not complete locally

In [10]:
progress_df.update(server_progress_df, overwrite=True)

In [11]:
progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0013,Pos0,0,0,0,0,2785
1,ND0013,Pos1,0,0,0,0,2777
2,ND0013,Pos2,0,0,0,0,2775
3,ND0013,Pos3,1,100,0,0,2788
4,ND0013,Pos4,1,100,0,0,2780
5,ND0013,Pos5,1,100,0,0,2788
6,ND0013,Pos6,1,26,0,0,2794
7,ND0013,Pos7,1,100,0,0,2797
8,ND0013,Pos8,1,100,0,0,2796
9,ND0013,Pos9,1,100,0,0,2791


In [145]:
len(progress_df.loc[progress_df['Masks (% complete)'] == 100.0])

47

In [146]:
progress_df.loc[progress_df['Masks (% complete)'] == 100.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
2,ND0013,Pos3,1.0,100.0,0.0,0.0,2788.0
3,ND0013,Pos4,1.0,100.0,0.0,0.0,2780.0
4,ND0013,Pos5,1.0,100.0,0.0,0.0,2788.0
6,ND0013,Pos7,1.0,100.0,0.0,0.0,2797.0
7,ND0013,Pos8,1.0,100.0,0.0,0.0,2796.0
8,ND0013,Pos9,1.0,100.0,0.0,0.0,2791.0
9,ND0013,Pos10,1.0,100.0,0.0,0.0,2791.0
13,ND0014,Pos0,1.0,100.0,0.0,0.0,1428.0
14,ND0014,Pos1,1.0,100.0,0.0,0.0,1420.0
15,ND0014,Pos2,1.0,100.0,0.0,0.0,1432.0


In [147]:
progress_df.loc[progress_df['Masks (% complete)'] != 100.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0013,Pos0,0.0,0.0,0.0,0.0,2785.0
1,ND0013,Pos2,0.0,0.0,0.0,0.0,2775.0
5,ND0013,Pos6,1.0,0.0,0.0,0.0,535.0
10,ND0013,Pos11,0.0,0.0,0.0,0.0,1567.0
11,ND0013,Pos12,0.0,0.0,0.0,0.0,2791.0
12,ND0013,Pos13,0.0,0.0,0.0,0.0,2813.0
18,ND0011,Pos3,1.0,0.0,2.0,0.0,1638.0


In [148]:
np.sum(progress_df.loc[progress_df['Masks (% complete)'] != 0.0]['Frames'])

78874.0

In [149]:
progress_df.loc[progress_df['Masks (% complete)'] != 0.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
2,ND0013,Pos3,1.0,100.0,0.0,0.0,2788.0
3,ND0013,Pos4,1.0,100.0,0.0,0.0,2780.0
4,ND0013,Pos5,1.0,100.0,0.0,0.0,2788.0
6,ND0013,Pos7,1.0,100.0,0.0,0.0,2797.0
7,ND0013,Pos8,1.0,100.0,0.0,0.0,2796.0
8,ND0013,Pos9,1.0,100.0,0.0,0.0,2791.0
9,ND0013,Pos10,1.0,100.0,0.0,0.0,2791.0
13,ND0014,Pos0,1.0,100.0,0.0,0.0,1428.0
14,ND0014,Pos1,1.0,100.0,0.0,0.0,1420.0
15,ND0014,Pos2,1.0,100.0,0.0,0.0,1432.0


# Estimate of the total amount of analysis

In [12]:
expt_info = pd.read_csv('/home/nathan/data/kraken/ras/experiment_info_final.csv', header = 1)

In [13]:
expt_info = expt_info.rename(columns = {'EXP n˚':'Experiments', 'POSITION':'Positions', 'CELL TYPE':'Condition', 'Useable (in radial analysis)':'Valid'})

In [14]:
expt_info

Unnamed: 0,Experiments,Positions,Condition,Well,EXPT NOTES,POS NOTES,Valid,BF CHANNEL,GFP CHANNEL,RFP CHANNEL,...,Focus?,ALIGNED?,SEGMENTED?,Localised?,TRACKED?,segmentation notes,SEG Model,TRACK MODEL,BLISTERING?,COMPETITION?
0,08.11.2021,stopped due to focus issue,,,,,,,,,...,,FALSE,FALSE,False,False,,,,,
1,ND0000,Pos0,MDCK Rasv12 -,,stopped due to focus issue,uninduced,False,,Ras,mutant(ras)-h2b,...,,FALSE,FALSE,False,False,,,,,
2,ND0000,Pos1,50:50 wt:ras+,,stopped due to focus issue,induced,False,,Ras + wt-h2b,mutant(ras)-h2b,...,,FALSE,FALSE,False,False,,,,,
3,ND0000,Pos2,MDCK Rasv12 +,,stopped due to focus issue,induced,False,,Ras,mutant(ras)-h2b,...,,FALSE,FALSE,False,False,,,,,
4,ND0000,Pos3,50:50 wt:ras+,,stopped due to focus issue,induced,False,,Ras + wt-h2b,mutant(ras)-h2b,...,,FALSE,FALSE,False,False,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,ND0025,Pos9,97.5:2.5 wt:ras+,6.0,,induced 3x seed dens,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,,,,,,,,,
364,ND0025,Pos10,97.5:2.5 wt:ras+,6.0,,induced 3x seed dens,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,,,,,,,,,
365,ND0025,Pos11,97.5:2.5 wt:ras+,6.0,,induced 3x seed dens,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,,,,,,,,,
366,ND0025,Pos12,97.5:2.5 wt:ras+,6.0,,induced 3x seed dens,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,,,,,,,,,


In [15]:
comp_expts = expt_info.loc[(expt_info['Valid'] == True)]

In [16]:
comp_expts

Unnamed: 0,Experiments,Positions,Condition,Well,EXPT NOTES,POS NOTES,Valid,BF CHANNEL,GFP CHANNEL,RFP CHANNEL,...,Focus?,ALIGNED?,SEGMENTED?,Localised?,TRACKED?,segmentation notes,SEG Model,TRACK MODEL,BLISTERING?,COMPETITION?
176,ND0013,Pos3,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,bad,TRUE,False,,irfp signal results in bad segmentation after ...,,,,extr 350f
177,ND0013,Pos4,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,FALSE,irfp dips,False,,irfp signal results in bad segmentation after ...,,,,
178,ND0013,Pos5,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
179,ND0013,Pos6,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
180,ND0013,Pos7,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
181,ND0013,Pos8,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
182,ND0013,Pos9,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
183,ND0013,Pos10,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,TRUE,TRUE,False,,irfp signal results in bad segmentation after ...,,,,
188,ND0014,Pos0,90:10 wt:ras+,,,induced,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,FALSE,FALSE,False,,,,,,
189,ND0014,Pos1,90:10 wt:ras+,,,induced 2x seed dens,True,,Ras + wt-h2b,mutant(ras)-h2b,...,,FALSE,FALSE,False,,,,,,


In [41]:
N_comp_expts = len(comp_expts.loc[(expt_info['Valid'] == True)])
N_comp_expts

106

In [42]:
### exp number of masks
total_expected = np.sum(comp_expts['FRAMES n˚'].astype(int))
### for every experiment, 1x alignment, 3x obj files and 1x track i.e. 5 extra files
total_expected += N_comp_expts *5

In [43]:
total_expected

149112

In [28]:
so_far = np.sum(progress_df['Transformations'] 
                +((progress_df['Masks (% complete)']/100)*progress_df['Frames'])
                + progress_df['Object files']
                + progress_df['Tracks']) 

In [29]:
so_far

87615.08

In [39]:
### percentage complete
so_far/total_expected*100

58.7579001019368

# What experiments need to be done??

In [17]:
progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
0,ND0013,Pos0,0,0,0,0,2785
1,ND0013,Pos1,0,0,0,0,2777
2,ND0013,Pos2,0,0,0,0,2775
3,ND0013,Pos3,1,100,0,0,2788
4,ND0013,Pos4,1,100,0,0,2780
5,ND0013,Pos5,1,100,0,0,2788
6,ND0013,Pos6,1,26,0,0,2794
7,ND0013,Pos7,1,100,0,0,2797
8,ND0013,Pos8,1,100,0,0,2796
9,ND0013,Pos9,1,100,0,0,2791


In [151]:
progress_df['Valid'] = False

In [18]:
progress_df['Valid'] = False
progress_df['Condition'] = ''
drop_indices = []
### combine the valid column with the progress df
for i, row in expt_info.iterrows():
    if row['Valid'] == True:
        expt = row['Experiments']
        pos = row['Positions']
        condition = row['Condition'] 
        progress_df.at[progress_df.loc[(progress_df['Experiment'] == expt) & (progress_df['Position'] == pos)].index, 'Valid'] = True
        progress_df.at[progress_df.loc[(progress_df['Experiment'] == expt) & (progress_df['Position'] == pos)].index, 'Condition'] = condition
    if row['Valid'] == False:
        expt = row['Experiments']
        pos = row['Positions']
        
        try:
            drop_indices.append(progress_df.loc[(progress_df['Experiment'] == expt) & (progress_df['Position'] == pos)].index[0])
            #progress_df.loc[(progress_df['Experiment'] == expt) & (progress_df['Position'] == pos)].index[0])
        except:
            continue
progress_df = progress_df.drop(index = drop_indices)

In [19]:
progress_df

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames,Valid,Condition
3,ND0013,Pos3,1,100,0,0,2788,True,90:10 wt:ras+
4,ND0013,Pos4,1,100,0,0,2780,True,90:10 wt:ras+
5,ND0013,Pos5,1,100,0,0,2788,True,90:10 wt:ras+
6,ND0013,Pos6,1,26,0,0,2794,True,90:10 wt:ras+
7,ND0013,Pos7,1,100,0,0,2797,True,90:10 wt:ras+
8,ND0013,Pos8,1,100,0,0,2796,True,90:10 wt:ras+
9,ND0013,Pos9,1,100,0,0,2791,True,90:10 wt:ras+
10,ND0013,Pos10,1,100,0,0,2791,True,90:10 wt:ras+
14,ND0014,Pos0,1,100,0,0,1428,True,90:10 wt:ras+
15,ND0014,Pos1,1,100,0,0,1420,True,90:10 wt:ras+


In [154]:
progress_df.loc[progress_df['Masks (% complete)'] != 100.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames,Valid,Condition
5,ND0013,Pos6,1.0,0.0,0.0,0.0,535.0,True,90:10 wt:ras+


In [136]:
progress_df.loc[progress_df['Masks (% complete)'] != 100.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames,Valid,Condition
5,ND0013,Pos6,1.0,0.0,0.0,0.0,535.0,True,90:10 wt:ras+
31,ND0018,Pos3,0.0,0.0,0.0,0.0,395.0,True,99:1 wt:ras+
32,ND0018,Pos4,0.0,0.0,0.0,0.0,398.0,True,99:1 wt:ras+
33,ND0018,Pos5,0.0,0.0,0.0,0.0,393.0,True,99:1 wt:ras+
34,ND0018,Pos6,0.0,0.0,0.0,0.0,399.0,True,99:1 wt:ras+
35,ND0018,Pos7,0.0,0.0,0.0,0.0,400.0,True,99:1 wt:ras+
45,ND0019,Pos3,0.0,0.0,0.0,0.0,733.0,True,99:1 wt:ras+
46,ND0019,Pos4,0.0,0.0,0.0,0.0,736.0,True,99:1 wt:ras+
47,ND0019,Pos5,0.0,0.0,0.0,0.0,738.0,True,99:1 wt:ras+
48,ND0019,Pos6,0.0,0.0,0.0,0.0,734.0,True,99:1 wt:ras+


In [None]:
progress_df

# checking masks ch98

In [35]:
len(glob.glob('/home/nathan/data/kraken/ras/ND****/Pos*/Pos*_images/*channel097*'))

0

In [13]:
files = glob.glob('/home/nathan/data/kraken/ras/ND****/Pos*/Pos*_images/*channel098*')

In [14]:
files[0]

'/home/nathan/data/kraken/ras/ND0013/Pos8/Pos8_images/img_channel098_position008_time000000158_z000.tif'

In [57]:
progress = []
for expt in expt_list:
    ### sorted position list
    pos_list = sorted([pos for pos in os.listdir(os.path.join(root_dir, expt)) 
                       if 'Pos' in pos 
                       and os.path.isdir(os.path.join(root_dir, expt, pos))],
                       key = lambda x: [int(y) for y in re.findall(r'\d+', x)])
    for pos in pos_list:
        ### does any alignment tensors exist?
        directory = os.path.join(root_dir, expt, pos)
        directory_files = os.listdir(directory)
        tensor_file = len([fn for fn in directory_files if 'transform_tensor' in fn and '.npy' in fn])

        ## do masks exist
        image_dir = os.path.join(directory, f'{pos}_images')
        n_masks = len(glob.glob(f'{image_dir}/*channel098*.tif'))
        n_imgs = len(glob.glob(f'{image_dir}/*channel001*.tif'))
        
        if n_imgs == 0:
            image_files = 0
        else:
            image_files = int((n_masks/n_imgs)*100)

        ## do objects exist
        obj_files = len([fn for fn in directory_files if 'obj' in fn and '.h5' in fn])
        
        ## do tracks exist 
        track_files = len([fn for fn in directory_files if 'tracks' in fn and '.h5' in fn])
        
        ## append data
        progress.append((expt, pos, tensor_file, image_files, obj_files, track_files, n_imgs))
### turn into df
progress_df = pd.DataFrame(progress, columns=['Experiment', 'Position', 'Transformations', 'Masks (% complete)', 'Object files','Tracks', 'Frames'])

In [58]:
progress_df.loc[progress_df['Masks (% complete)'] != 0.0]

Unnamed: 0,Experiment,Position,Transformations,Masks (% complete),Object files,Tracks,Frames
7,ND0013,Pos7,1,100,3,1,2797
8,ND0013,Pos8,1,100,0,0,2796
19,ND0014,Pos5,1,100,3,1,1425
20,ND0014,Pos6,1,100,3,1,1432
21,ND0014,Pos7,1,100,3,1,1428
22,ND0014,Pos8,1,100,3,1,1427
23,ND0014,Pos9,1,100,3,1,1429
35,ND0016,Pos7,1,100,3,1,3457
51,ND0017,Pos9,1,100,3,1,1333
52,ND0017,Pos10,1,100,3,1,1326


In [49]:
np.sum(progress_df['Frames'])

197969

# number of ch98

In [20]:
np.sum(progress_df.loc[progress_df['Masks (% complete)'] != 0.0]['Frames'])

31422

In [21]:
from skimage.io import imread

In [22]:
from tqdm.auto import tqdm

In [34]:
unprocessed_files = []
for fn in tqdm(files):
    img = imread(fn)
    if 3 not in img:
        unprocessed_files.append(fn)

  0%|          | 0/31423 [00:00<?, ?it/s]

# number that need processing, negligible difference, will process all

In [36]:
len(unprocessed_files)

31384