__Preambule__

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
%%capture
from tqdm.notebook import tqdm
tqdm().pandas()

import numpy as np
np.corrcoef(*np.arange(10).reshape(2,-1))
import bioformats as bf
bf.javabridge.start_vm(class_path=bf.JARS, max_heap_size="20G")
## importing stuff

import json
from collections import OrderedDict
from copy import deepcopy
from sys import exc_info
import os
import pickle
from time import sleep
from sys import path as syspath
syspath.append(os.path.expanduser("~/srdjan_functs/"))

%load_ext autoreload
%autoreload 2
import pandas as pd
from islets.Recording import Recording, parse_leica
from islets.utils import get_filterSizes
from islets.general_functions import td_nanfloor, td2str
from copy import deepcopy

from jupyter_plotly_dash import JupyterDash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html

In [3]:
#%% other imports
def cellTransform(cellvalue, id=None, verbose=False):
    import numpy as np
    
    if issubclass(type(cellvalue),(bool,np.bool_)):
        if verbose:
            print ("creating a checklist")
        out = dcc.Checklist(id=id,
                             options=[{"label":"✔" if cellvalue else "✘","value":None}],
                             value=[None]*(1-int(cellvalue)),
                             labelStyle={
                                 "color":"green" if cellvalue else "red",
#                                  "padding":"3px"
                             }
                            )
#     elif issubclass(type(cellvalue),(dict,)):
#         out = dcc.Checklist(id=id,
#                              options=[{"label":(str(k).replace(" ","")+("✔" if cellvalue[k] else "✘")), "value":k} for k in cellvalue],
#                              value = [k for k in cellvalue if not cellvalue[k]],
# #                              style={"height":"10px",},
#                              labelStyle={
#                                  "width":"100px",
#                                  "display":"block"
#                                         }
#                             )
    elif issubclass(type(cellvalue),(str,int,float,np.int_)):
        out = cellvalue
    else:
        out = str(cellvalue)
    return html.Div(out,style={
                                 "font-family":"monospace",
                                 "font-size":"14px",
                             },)
#     if issubclass(type(value),str):

def mystyle(col):
    return {"border":'thin lightgrey solid',
            "text-align": "left" if "pickle" in col else None,
            "width": {
                "Series Durations":"120px",
                "Duration":"100px",
                "pickles":"150px",
                "exp":"150px",
                "series":"150px",
                "images":"150px",
                "line scan":"100px",
                     }.get(col),
           }

def generate_table(dataframe, max_rows=100):
    rows = []
    for i in range(min(len(dataframe), max_rows)):
        row = []
        for col in dataframe.columns:
            value = dataframe.iloc[i][col]
#             row.append(html.Td(value if issubclass(type(value),str) else dcc.Checklist(options=[{"label":"item"}]),
# #                                style=style
#                               ))
            row.append(html.Td(cellTransform(value, id=f"table-{col}-{i}"),
                               style=mystyle(col),
                              ))
        rows.append(html.Tr(row,
                            style={"border":'thin lightgrey solid'},
#                             border='thin lightgrey solid' 
                           ))

    return html.Table(
        # Header
        [html.Tr([html.Th(col, style=mystyle(col)) for col in dataframe.columns],
                            style={"border":'thick lightgrey solid'},)] +
        # Body
        rows,
        style={
            "width": "1300px",
            "text-align":"right",
            "table-layout": "fixed"
        }
    ) 


def import_data(recordings,forceMetadataParse=False):
    status = []
    ilifs = 0
    for pathToRecording in tqdm(recordings):
        print ("#"*20, pathToRecording)
        try:
            rec = Recording(pathToRecording)
            if forceMetadataParse:
                rec.parse_metadata()
                rec.save_metadata()
        except:
            continue
        recType = "Nikon" if pathToRecording.endswith(".nd2") else "Leica"

        if recType=="Leica":
            sers = parse_leica(rec, index=True)
        else:
            sers = [(0,"all")]

        analysisFolder = os.path.join(rec.folder, rec.Experiment+"_analysis")
    #     if not os.path.isdir(analysisFolder):
    #         os.makedirs(analysisFolder)
#         rec.tag_linescans()
#         print (sers)
        for indices, ser in sers:
            md = pd.Series(dtype=object)
            md["path"] = pathToRecording
            md["exp"] = os.path.split(pathToRecording)[-1]
            md["series"] = ser
            index = indices[0]
            ser0 = "-".join(ser.split("-")[:-1]) if "-" in ser else ser
            assert rec.metadata.loc[index,"Name"]==ser0
#             singleLineScan = rec.metadata.loc[index,"SizeY"]>10
            try:
                rec.import_series(ser, onlyMeta=True,
#                                   isLineScan=rec.metadata.loc[index,"line scan"]=="single"
                                 )
            except:
                print (f"could not import {ser}")
                status += [md]
                continue
            for k,v in rec.Series[ser]["metadata"].items():
                md[k] = v
                
            saveDir = os.path.join(analysisFolder, ser)
            for k in ["bit depth", "Start time", "End time","Name"]: # , "individual Series"
                try:    del md[k]
                except: passs
            md["Series Durations"] = ["%s [%s]"%(r["Name"].replace("Series","S"), td2str(td_nanfloor(r["Duration"]))) for _,r in md["individual Series"].iterrows()]
            md["Series Durations"] = "\n".join(md["Series Durations"])
#             md["line scan"] = md["SizeY"]*md["pxSize"]<6 # treat also this as a line scan
            if md["line scan"] != "none":
                imagesDone = {}
                for name in md["individual Series"].Name:
                    imageName = os.path.join(saveDir, rec.Experiment+"_"+name+".png")
                    imagesDone[name] = os.path.isfile(imageName)
                md["images"] = imagesDone
                md["images done"] = all(imagesDone.values())
            else:
                movieFilename = os.path.join(saveDir, rec.Experiment+"_"+ser+".mp4")
                md["movie"] = os.path.isfile(movieFilename)
                if md["movie"]:
                    md["movie size [MB]"] = os.path.getsize(movieFilename)/10**6
                pxSize = float(md.pxSize)
                if pxSize<.7: pxSize*=2
                fs = get_filterSizes(pxSize)
                pklsDone = {}
                imgsDone = {}
                for fsize in fs:
                    pickleFile = os.path.join(saveDir, ".".join(map(str,fsize))+"_rois.pkl")
                    imageFile = os.path.join(saveDir, ".image_"+".".join(map(str,fsize))+".png")
                    pickleThere = os.path.isfile(pickleFile)
                    imageThere = os.path.isfile(imageFile)
                    pklsDone[fsize] = pickleThere
                    imgsDone[fsize] = imageThere
                md["pickles"] = [pk if len(pk)>1 else pk for pk in pklsDone.keys()]
                md["(re)do pickles"] = all(pklsDone.values())
            del md["individual Series"]
            status += [dict(md.items())]
        ilifs +=1
    #     if ilifs>3:
    #         break
    try:
        status = pd.DataFrame(status)
    except:
        from sys import exc_info
        print (str(exc_info()))
    return status

Enter Path to the folder you wish to process:

In [4]:
mainFolder = "/data/Sandra/2021/2021_02_16/"
# mainFolder = "/data/Marjan/MB2020_lifs_2_2/Arginine-photon-counting/"
# mainFolder = "/data/Sandra/2019/2019_09_03/"


recordings = []
for cur,ds,fs in os.walk(mainFolder):
    #### if you wish to restrict to only certain folders: ####
#     if "2020_11_05" not in cur: continue
    for f in fs:
        if not (f.endswith(".lif")):# or f.endswith(".nd2")):
            continue
        path = os.path.join(cur,f)
        recordings += [path]
recordings = sorted(recordings)

In [5]:
status_orig = import_data(recordings, forceMetadataParse=False)

  0%|          | 0/1 [00:00<?, ?it/s]

#################### /data/Sandra/2021/2021_02_16/Experiment90a.lif
Recording /data/Sandra/2021/2021_02_16/Experiment90a.lif not yet preprocessed. Preprocessing takes a few seconds and will speed up the usage later... Finished.


In [6]:
# for _,row in status_orig.iterrows():
#     print (f"""/data/useful_notebooks/process_single.py --rec='{row.path}' --ser='{row.series}' --verbose""")

In [7]:
# recast for presentation
status = deepcopy(status_orig)
status.pxSize = status.pxSize.apply("{:.3f}".format).astype("str")
for c in status.columns:
    if "Size" in c and "px" not in c:
        status[c.replace("Size","")] = status[c]
        del status[c]

status["location"] = status.path.apply(lambda xi: os.path.split(xi)[0])
status["Freq"] = status["Frequency"].apply("{:.1f}".format).astype("str")
del status["Frequency"]
status["Duration"] = status["Duration"].apply(str).apply(lambda xi: xi.split()[-1].split(".")[0] )

firstCols = ["exp", "series","movie","Freq","Duration", "movie size [MB]", ]#,"images",]
status = status[[c for c in firstCols if c in status]+[c for c in status.columns if c not in firstCols]]
lastCols = ["path"]
status = status[[c for c in status.columns if c not in lastCols]+lastCols]

if "movie size [MB]" in status:
    status["movie size [MB]"] = status["movie size [MB]"].round(1)
for col in [
    "pxUnit",
    "frame_range",
    "gap",
    "pickles"
           ]:
    if col in status:
        del status[col]
# if "pickles" in status:
#     status.pickles = status_orig.pickles.apply(lambda xi: str(xi)[1:-1].replace(" ","") if "[" in str(xi) else "")

# status = status[~status[list("XYZT")].isna().all(1)]
# status = status[status.Z==1]
# status = status[~status[["movie","(re)do pickles"]].all(1)]
# status = status[~status.exp.apply(lambda xi: xi.endswith("nd2"))]
# status = status[status_orig.Duration>pd.Timedelta("30s")]
# status["line_scan"] = status["line scan"].astype(int)

status = status.sort_values(["exp","Y"])
status.index = range(len(status))

pd.set_option('display.max_rows', 500)

ix = status[status["line scan"]=="none"].query("T<10").index

status = status.drop(index=ix)

status.index = range(len(status))

In [8]:
# to hide
nshow = len(status)
htmltable = generate_table(status.iloc[:nshow,:-2], max_rows=nshow)
ixOrder = OrderedDict([(f"table-{col}-{i}",{"col":col,"index":i}) \
                       for i in range(nshow) for col in ["movie","(re)do pickles","images done"] \
                           if (col in status.columns) and (isinstance(status.loc[i,col],(bool,np.bool_)))
                      ])

app = JupyterDash(__name__, width=1000)

app.layout = html.Div([
    html.Div("Please check which of the following series you wish processed into movies/pickles. When done, please click on the button and follow instructions."),
    html.Br(),
    html.Button(id="save",children=["Prepare script"],n_clicks=0),
    html.Div(id="output"),
#     html.Button(id="check-all",children=["Check all"],n_clicks=0),
    html.Div(htmltable,style={"width":"1000px"}),
    html.Pre(id="marked",children="------------",
             style={
                "width": "700px",
                "height": "300px",
                'overflowX': 'scroll',
                'overflowY': 'scroll',
                "display": "block" if "srdjan" in os.getcwd() else "none",
            }),
],style={"font-family":"Arial"})

inputs = [Input(k,"value") for k in ixOrder]

@app.callback(
    Output("marked","children"),
    inputs
)
def see(*manyinputs):
    output = ""
    try:
        out = deepcopy(ixOrder)
        for k,v in zip(out, manyinputs):
            out[k]["value"] = v
        out = list(out.values())
        for el in out:
            el["value"] = bool(len(el["value"]))
            i = status.index[el["index"]]
            el["rec"] = status.loc[i,"path"]
            el["ser"] = status.loc[i,"series"]
            el["line scan"] = status.loc[i,"line scan"]
        out = pd.DataFrame(out)
        checklist_parse = out.query("col=='images done'").copy()
        if len(checklist_parse):
            del checklist_parse["col"], checklist_parse["index"], checklist_parse["value"]
        else: 
            checklist_parse = pd.DataFrame()
        out = out.query("col!='images done'")

        for (rec,ser),ddf in out.groupby(["rec","ser"]):
            el = {"rec":rec,"ser":ser}
            el["movie"] = ddf.query("col=='movie'")["value"].iloc[0]
            el["pickles"] = ddf.query("col=='(re)do pickles'")["value"].iloc[0]
            el["line scan"] = "none"
            checklist_parse = checklist_parse.append(el,ignore_index=True)
        ################################### good begin
        allrecs = checklist_parse.rec.unique()
#         output += "\n"+checklist_parse.__repr__()

        for rec in np.unique(allrecs):
            outFile = "/.".join(os.path.split(rec))+".out"
            output += f"rm {outFile}\n"
        for _,row in checklist_parse.iterrows():
            if row["line scan"]=="none":
                if not row[["movie","pickles"]].any():
                    continue
            outFile = "/.".join(os.path.split(row.rec))+".out"
            line = f'echo "" >> {outFile}\n'
            line += f'echo "" >> {outFile}\n'
            line += f'''echo "{'#'*50}" >> {outFile}\n'''
            line += f'''echo "###### processing of {row.ser} started at: $(date)" >> {outFile}\n'''
            line += f'''{{ time /data/useful_notebooks/process_single.py --recording="{row.rec}" --series="{row.ser}" --verbose'''

            if row["line scan"]!="none":
                line += " --line-scan="+row["line scan"]
            else:
                if not row.movie:
                    line += " --leave-movie"
                if not row.pickles:
                    line += " --leave-pickles"
            line += f" >> {outFile} ; }} 2>> {outFile}"
            output += line+"\n"
        ################################### good end
    except:
        output += str(exc_info())
    return output

# @app.callback(
#     [Output(k,"value") for k in ixOrder],
#     [Input("check-all","n_clicks")]
# )
# def checkall(nc):
#     if nc>0:
#         if nc%2:
#             return tuple([[None] for k in  ixOrder])
#         else:
#             return tuple([[] for k in  ixOrder])

@app.callback(
    Output("output","children"),
    [Input("save","n_clicks")],
    [State("marked","children")]
)
def prepare_script(n_clicks, text):
    if n_clicks>0:
        with open(os.path.expanduser("~/processing_script.sh"),"w") as f:
            f.write(text)
        return dcc.Markdown("""`processing_script.sh` prepared in your home folder.

Just open new terminal and run: `bash processing_script.sh` 

You can then close the window and wait until the processing is finished. 

_(For now, there is no reallyt good way how to follow progress, sorry. You can try the cell below...)_
""")

app._repr_html_() 
link2app = "https://ctn.physiologie.meduniwien.ac.at"+app.get_app_root_url()
HTML(f'open the following link in a different tab (do not close this tab!): <a href="{link2app}">{link2app}</a>')
# app

#### Hacky way to check progress

In [10]:
status_orig

Unnamed: 0,path,exp,series,SizeT,SizeX,SizeY,SizeZ,pxSize,pxUnit,Frequency,Duration,line scan,gap,frame_range,Series Durations,movie,movie size [MB],pickles,(re)do pickles
0,/data/Sandra/2020/2020_11_23/Experiment77a.lif,Experiment77a.lif,Series003-24,175907,512,512,1,0.91,µm,19.8,0 days 00:06:44.417999268,none,,"(0, 175907)",S003 [6:44]\nS004 [6:44]\nS005 [6:44]\nS006 [6...,True,48.370018,"[(7,), (8,), (7, 8), (7, 10)]",True
1,/data/Sandra/2020/2020_11_23/Experiment77b.lif,Experiment77b.lif,Series009-30,173397,512,512,1,0.91,µm,19.8,0 days 00:06:44.812999725,none,,"(0, 173397)",S009 [6:44]\nS010 [6:44]\nS011 [6:44]\nS012 [6...,True,62.340215,"[(7,), (8,), (7, 8), (7, 10)]",True
2,/data/Sandra/2020/2020_11_23/Experiment77c.lif,Experiment77c.lif,Series001-26,207793,512,512,1,0.91,µm,19.8,0 days 00:06:44.829999924,none,,"(0, 207793)",S001 [6:44]\nS002 [6:44]\nS003 [6:44]\nS004 [6...,True,97.559255,"[(7,), (8,), (7, 8), (7, 10)]",False
3,/data/Sandra/2020/2020_11_23/Experiment77d.lif,Experiment77d.lif,Series001-25,196696,512,512,1,0.828,µm,19.8,0 days 00:06:44.784999847,none,,"(0, 196696)",S001 [6:44]\nS002 [6:44]\nS003 [6:44]\nS004 [6...,True,54.898017,"[(7,), (8,), (7, 8), (7, 10)]",False
4,/data/Sandra/2020/2020_11_23/Experiment77e.lif,Experiment77e.lif,Series001-15,115422,512,512,1,0.91,µm,19.8,0 days 00:06:44.851999283,none,,"(0, 115422)",S001 [6:44]\nS002 [6:44]\nS003 [6:44]\nS004 [6...,True,28.562012,"[(7,), (8,), (7, 8), (7, 10)]",False
5,/data/Sandra/2020/2020_11_23/Experiment77f.lif,Experiment77f.lif,Series001-15,118803,512,512,1,0.91,µm,19.8,0 days 00:06:44.877000809,none,,"(0, 118803)",S001 [6:44]\nS002 [6:44]\nS003 [6:44]\nS004 [6...,True,49.787494,"[(7,), (8,), (7, 8), (7, 10)]",False
6,/data/Sandra/2020/2020_11_23/Experiment77g.lif,Experiment77g.lif,Series001-14,111969,512,512,1,0.91,µm,19.8,0 days 00:06:44.883001328,none,,"(0, 111969)",S001 [6:44]\nS002 [6:44]\nS003 [6:44]\nS004 [6...,True,41.733467,"[(7,), (8,), (7, 8), (7, 10)]",False


In [9]:
for exp,df in status.groupby("exp"):
    printExp = True
    for i,row in df.iterrows():
#         assert (status.loc[i].iloc[:2] == status_orig.loc[i,["exp","series"]]).all()
        try:
            images = [f".image_{'%i.%i'%fs if isinstance(fs,tuple) else fs}.png" for fs in status_orig.loc[i,"pickles"] ]
        except:
            continue
        serDir = f"{row.path}_analysis/{row.series}"
        if os.path.isdir(serDir):
            imDone = sum([el in images for el in os.listdir(serDir)])
        else:
            imDone = 0
        outPutFile = row.path.replace(row.exp, "."+row.exp+".out")
        percDone = 100/len(images)*imDone
    #     if percDone>=100: continue
        print ("%20s %13s  %3i%%"%(row.exp if printExp else "", row.series, percDone))#, outPutFile, os.path.isfile(outPutFile))
    #     if os.path.isfile(outPutFile):
    #         output = open(outPutFile).read()
    #         last = output#-1]
    #         last = "\n".join(["\t"+l for l in last.splitlines()])
    #         last += "\n"
    #         print(last)

    #     break
        printExp = False
    print ("-"*50)

--------------------------------------------------
--------------------------------------------------
--------------------------------------------------
--------------------------------------------------
--------------------------------------------------
--------------------------------------------------
--------------------------------------------------
