In [None]:
import os, sys, traceback
from datetime import datetime

lib_root = r'g:\IVFCA\UFMTrack'
sys.path.append(lib_root) # see the GitHub referenced in the paper

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from GUFMTrack import *
from UFMAna import *
from config_manager import ConfigManager as cfg

from time import time as timer


import shutil

from IPython.display import display
from ipywidgets import Layout, HBox, VBox, Text, IntText, Button, Output, HTML, SelectMultiple

from dataclasses import dataclass, asdict
import json
import traceback
import datetime

import pathlib as pl
import pandas as pd

# Processing constants

In [None]:
def prep_cfg(accumulation_end=30, start_proc_to_accumulation_end_tf=7):
    __ACCUMULATION_END = accumulation_end
    #start_proc_to_accumulation_end_tf = 7

    __DT_OFFSET = __ACCUMULATION_END - start_proc_to_accumulation_end_tf
    __ACCUMULATION_END = __ACCUMULATION_END - __DT_OFFSET

    print(f'__ACCUMULATION_END={__ACCUMULATION_END}')

    cfgm = cfg()
    cfgm.T_ACCUMULATION_END = __ACCUMULATION_END

    cfgm.CLASSIFIER_DET = lib_root + '\\' + cfgm.CLASSIFIER_DET
    cfgm.CLASSIFIER_NAC = lib_root + '\\' + cfgm.CLASSIFIER_NAC

    print(f'T_END_TO_COMPLETE = {cfgm.T_END_TO_COMPLETE}, T_ACCUMULATION_COMPLETE = {cfgm.T_ACCUMULATION_COMPLETE}')

    return cfgm

In [None]:
cfgm = prep_cfg(accumulation_end=30, start_proc_to_accumulation_end_tf=7)

In [None]:
@dataclass
class DataAnalysisConfig:
    seg_ds_path: str = os.path.abspath('../../datasets_seg')
    ds_inf_path: str = os.path.abspath('../../datasets_seg/info.txt')
    ana_ds_path: str = os.path.abspath('../../datasets_ana')
    ana_db_path: str = os.path.abspath('../../datasets_ana/ana_db.csv')


# methods for loading and storing the config to file
_cfg_filename = 'ana_cfg.json'


def load_analysis_cfg(cfg_path=None):
    cfg_path = cfg_path or os.path.join(os.path.abspath(os.path.curdir), _cfg_filename)
    if os.path.exists(cfg_path):
        with open(cfg_path, 'rt') as f:
            cfg = DataAnalysisConfig(**json.load(f))
    else:
        cfg = DataAnalysisConfig()
    return cfg


def save_analysis_cfg(cfg, cfg_path=None):
    cfg_path = cfg_path or os.path.join(os.path.abspath(os.path.curdir), _cfg_filename)
    with open(cfg_path, 'wt') as f:
        # human-readable, 4 spaces indentation
        json.dump(asdict(cfg), f, indent=4)

In [None]:
def read_info_file(ds_inf_file_path):
    try:
        with open(ds_inf_file_path, 'rt') as f:
            lines = f.readlines()
    except FileNotFoundError:
        lines = []
    ds_inf = {}
    for line in lines:
        line = line.strip()
        if not line:
            continue
        subs = line.split('-')
        k = int(subs[0].strip())
        v = '-'.join(subs[1:])

        ds_inf[k] = v.strip()
    return ds_inf

In [None]:
def get_ana_db(da_cfg: DataAnalysisConfig):
    ana_db_path = pl.Path(da_cfg.ana_db_path)
    if ana_db_path.exists():
        ana_db_df = pd.read_csv(ana_db_path)
    else:
       ana_db_df = None
    return ana_db_df

In [None]:
def sync_analysis_datasets(da_cfg: DataAnalysisConfig):
    # 1. load datasets info
    ds_inf = read_info_file(da_cfg.ds_inf_path)

    # 2. read csv db:
    ana_db_df = get_ana_db(da_cfg)
    ana_db_df = ana_db_df if ana_db_df is not None else pd.DataFrame(columns=['ds_id', 'ds_name', 'experimentor', 'date', 'details'])

    seg_dir = pl.Path(da_cfg.seg_ds_path)
    ana_dir = pl.Path(da_cfg.ana_ds_path)

    print(f'ds_inf:')
    db_updated = False
    for ds_id, ds_name in ds_inf.items():
        print(f'\t {ds_id}: "{ds_name}"', end='')

        # 2. create datasets in the analysis folder if files don't exist and prefill the csv accordingly
        ds_dir_name = f'{ds_id:03d}'
        seg_ds_dir = seg_dir / ds_dir_name
        ana_ds_dir = ana_dir / ds_dir_name

        cells_file = seg_ds_dir / 'segmentation' / 'cells' / 'tr_cells_tmp.dat'
        tgt_cells_file = ana_ds_dir / 'tr_cells_tmp.dat'

        if not cells_file.exists():
            # print(f' <- ! skipping - no cells file ({str(cells_file)})')
            print()
            continue

        if not tgt_cells_file.exists():
            tgt_cells_file.parent.mkdir(parents=True, exist_ok=True)

            # 3. copy the cells file to the analysis folder
            shutil.copy(cells_file, tgt_cells_file)
            print(f' <- new file copied ', end='')

        # 4. update the db
        if ds_id not in ana_db_df['ds_id'].values:
            ana_db_df = ana_db_df.append({'ds_id': ds_id, 'ds_name': ds_name}, ignore_index=True)

            print(f' <- db record added', end='')
            db_updated = True

        print()

    # 5. save the db
    ana_db_path = pl.Path(da_cfg.ana_db_path)
    ana_db_df.to_csv(ana_db_path, index=False)

    # 6. return if the db was updated
    if db_updated:
        print(f'\nUpdated db: {ana_db_path}'
              f'\nPlease fill the missing information in the db file manually before continuing')
    return db_updated


In [None]:
_da_cfg: DataAnalysisConfig or None = None


def save_with_widgets():
    """
    Saving widgets state for history hack, reused from
    https://stackoverflow.com/questions/59123005/how-to-save-state-of-ipython-widgets-in-jupyter-notebook-using-python-code
    """
    code = '<script>Jupyter.menubar.actions._actions["widgets:save-with-widgets"].handler()</script>'
    display(HTML(code))


def get_style():
    return HTML('''
    <style>
        .widget-label { min-width: 200px !important; }
    </style>''')


def analysis_gui():
    # 0. Load config
    da_cfg = load_analysis_cfg()

    # 1. Directory path input string "Segmentation datasets path"
    seg_ds_path = Text(value=da_cfg.seg_ds_path, description='Segmentation datasets path:', disabled=False,
                       layout=Layout(width='600px'))

    # 2. File path with datasets IDs list - text human readable file with dataset id - names pairs like " 85 - Untreated_2024.06.18"
    ds_inf_path = Text(value=da_cfg.ds_inf_path, description='Datasets info path:', disabled=False,
                       layout=Layout(width='600px'))

    # 3. Directory path input string "Analysis datasets path"
    ana_ds_path = Text(value=da_cfg.ana_ds_path, description='Analysis datasets path:', disabled=False,
                       layout=Layout(width='600px'))

    # 4. File path "Analysis DB path"
    ana_db_path = Text(value=da_cfg.ana_db_path, description='Analysis DB path:', disabled=False,
                       layout=Layout(width='600px'))

    # 5. Button "Process"
    process_btn = Button(description='Configure and sync', layout=Layout(width='600px'))

    display_styles = get_style()

    # 6. Output - text box with the progress
    out = Output()

    def on_process_click(b):
        with out:
            try:
                print('Configuring...')
                # save config
                da_cfg.seg_ds_path = seg_ds_path.value
                da_cfg.ds_inf_path = ds_inf_path.value
                da_cfg.ana_ds_path = ana_ds_path.value
                da_cfg.ana_db_path = ana_db_path.value

                save_analysis_cfg(da_cfg)
                # print('Saved cfg')

                ds_inf = read_info_file(da_cfg.ds_inf_path)

                sync_analysis_datasets(da_cfg)

                save_with_widgets()

                global _da_cfg
                _da_cfg = da_cfg

            except Exception as e:
                print('Error:', e)
                trace_str = traceback.format_exc()
                print(trace_str, flush=True)

    process_btn.on_click(on_process_click)

    controls_b = VBox([
        display_styles,
        seg_ds_path,
        ds_inf_path,
        ana_ds_path,
        ana_db_path,
        process_btn,
        out
    ])

    display(controls_b)



In [None]:
analysis_gui()

In [None]:
# Analysis GUI:
#
# Load the datasets info from the db file
# add study (name, user, date, details, n conditions)
# datasets panel - populate from the db, filter text field for the dataset name and other fields
# add conditions to the study - adds panels for each condition with title and add/remove buttons - adds selected from all
# add datasets to the conditions (from the db)
# save the study_info db in the studies folder as csv
# update the study record in the studies db csv
# reprocess check box

# 1. make a function creating pannel with all dataset given the ana_db_df
def get_ds_panel(ana_db_df: pd.DataFrame):
    # needs to have a filter textbox and a list of datasets that can be selected with ctrl+click
    formated_ids_dict = {ds_id: f'[{ds_id}] {ds_name}: {ds_exp}, "{ds_date}" ({ds_det})' for ds_id, ds_name, ds_exp, ds_date, ds_det in zip(
        ana_db_df['ds_id'].values,
        ana_db_df['ds_name'].values,
        ana_db_df['experimentor'].values,
        ana_db_df['date'].values,
        ana_db_df['details'].values)
                         }
    filter_text = Text(value='', description='Filter:', disabled=False, layout=Layout(width='600px'))
    datasets_list = SelectMultiple(options=formated_ids_dict.values(), layout=Layout(width='600px', height='430px'))
    style = get_style()

    # handle the filter - on change of the filter text, filter the datasets_list
    def filter_datasets_list(change):
        filter_text_val = filter_text.value
        if not filter_text_val:
            datasets_list.options = formated_ids_dict.values()
        else:
            datasets_list.options = [v for v in formated_ids_dict.values() if filter_text_val.lower() in v.lower()]

    filter_text.observe(filter_datasets_list, names='value')


    ds_panel = VBox([style, filter_text, datasets_list])
    return ds_panel

def get_ds_panel_selection(panel):
    selected_options = panel.children[2].value

    selected_ids = [opt.split(']')[0][1:] for opt in selected_options]

    #make dict of pairs form the selected_ids->selected_options
    seleced_dict = {id:opt for id, opt in zip(selected_ids, selected_options)}

    return seleced_dict

def get_cond_panel_list(panel):
    panel_options = panel.children[2].options

    selected_ids = [opt.split(']')[0][1:] for opt in panel_options]

    #make dict of pairs form the selected_ids->selected_options
    seleced_dict = {id:opt for id, opt in zip(selected_ids, panel_options)}

    return seleced_dict

def get_condition_panel(condition_id, ds_panel, study_datasets, status_bar):
    # given the condition_id and the ds_panel, create a panel with the condition name text field, SelectMultiple (empty) and two buttons below on one row add and remove
    condition_name = Text(value=f'Condition {condition_id}', description='Condition name:', disabled=False, layout=Layout(width='600px'))
    cond_ds_list = SelectMultiple(options=[], layout=Layout(width='600px', height='400px'))
    add_btn = Button(description='Add', layout=Layout(width='300px'))
    rem_btn = Button(description='Remove', layout=Layout(width='300px'))
    style = get_style()

    button_panel = HBox([add_btn, rem_btn])

    cond_panel = VBox([style, condition_name, cond_ds_list, button_panel])

    # on add button click, add the selected datasets from the ds_panel to the cond_ds_list

    def find_overlaps():
        # 1. fill dict condition name->list of ds_ids for all condition panels in study_datasets
        cond_ds_dict = {}
        for cond_panel_i in study_datasets.children[1:]:
            cond_name = cond_panel_i.children[1].value
            cond_ds_dict[cond_name] = list(get_cond_panel_list(cond_panel_i).keys())

        # print to status bar the cond_ds_dict
        status_bar.value = f'{cond_ds_dict}'

        # 2. check for overlaps in the datasets between the conditions
        overlaps = {}
        for i, (cond_name, ds_ids) in enumerate(cond_ds_dict.items()):
            for j, (cond_name_j, ds_ids_j) in enumerate(cond_ds_dict.items()):
                if i >= j:
                    continue
                overlap = set(ds_ids) & set(ds_ids_j)
                if overlap:
                    overlaps[(cond_name, cond_name_j)] = overlap

        # print to status bar the overlaps
        if overlaps:
            # set value in red color
            overlap_info = ''
            for (c1, c2), ds_ids in overlaps.items():
                overlap_info += f'{c1} - {c2}: datasets ids {" ,".join(ds_ids)}; '
            status_bar.value = f'<font color="red">Overlaps between conditions detected:</font>{overlap_info}'

        else:
            status_bar.value = 'OK'

    def on_add_btn_click(b):
        selected_ds = get_ds_panel_selection(ds_panel)
        options = list(cond_ds_list.options) + list(selected_ds.values())
        options = list(set(options))
        cond_ds_list.options = options

        find_overlaps()


    add_btn.on_click(on_add_btn_click)


    # on remove button click, remove the selected datasets from the cond_ds_list
    def on_rem_btn_click(b):
        selected_options = cond_ds_list.value
        cond_ds_list.options = [opt for opt in cond_ds_list.options if opt not in selected_options]
        find_overlaps()

    rem_btn.on_click(on_rem_btn_click)

    return cond_panel

def get_study_ui(ana_db_df):
    # create a panel with the study name, user, date, details (text fields) and number of conditions (int selector) all in vbox `study_info`
    # below it a h_box `study_datasets` with ds_panel

    study_name = Text(value='', description='Study name:', disabled=False, layout=Layout(width='600px'))
    user = Text(value='', description='User:', disabled=False, layout=Layout(width='600px'))
    date = Text(value=datetime.datetime.now().strftime('%Y.%m.%d'),
                description='Date:', disabled=False, layout=Layout(width='600px'))
    details = Text(value='', description='Details:', disabled=False, layout=Layout(width='600px'))
    n_conditions = IntText(value=1, description='Number of conditions:', disabled=False, layout=Layout(width='600px'))

    style = get_style()

    study_info = VBox([style, study_name, user, date, details, n_conditions])

    # spacer with horizontal line
    spacer_vert = HTML(value='<hr>', description='', layout=Layout(width='100vp', height='20px'))

    ds_panel = get_ds_panel(ana_db_df)

    study_datasets = HBox([ds_panel])

    # add vbox with status bar label for warnigns etc and the Process button
    status_bar = HTML(value='OK', description='Status:', layout=Layout(width='100vp'))
    proc_btn = Button(description='Process', layout=Layout(width='100px'))
    footer = VBox([style, status_bar, proc_btn])

    study_ui = VBox([study_info, spacer_vert, study_datasets, spacer_vert, footer])


    # upon change of the n_conditions, add or remove the condition panels to the `study_datasets`

    def on_n_conditions_change(change):
        n_conds = n_conditions.value
        n_children = len(study_datasets.children)
        if n_conds > n_children-1:  # -1 for the ds_panel
            for i in range(n_children-1, n_conds):
                cond_panel = get_condition_panel(i, ds_panel, study_datasets, status_bar)
                study_datasets.children = study_datasets.children + (cond_panel,)
        elif n_conds <= n_children-1:
            study_datasets.children = study_datasets.children[:n_conds+1]

    n_conditions.observe(on_n_conditions_change, names='value')

    # init the condition panels - according to initial n_conditions value
    on_n_conditions_change(None)

    # handle the process button click
    def on_proc_btn_click(b):
        study_info_dict = get_study_info(study_ui)
        print(study_info_dict)
        # set same to status bar
        status_bar.value = f'Study info: {study_info_dict}'

    proc_btn.on_click(on_proc_btn_click)

    return study_ui

def get_study_info(study_ui):
    study_info = study_ui.children[0]
    study_datasets = study_ui.children[2]

    study_name = study_info.children[1].value
    user = study_info.children[2].value
    date = study_info.children[3].value
    details = study_info.children[4].value
    n_conditions = study_info.children[5].value

    conditions = []
    for cond_panel in study_datasets.children[1:]:
        cond_name = cond_panel.children[1].value
        cond_ds = list(get_cond_panel_list(cond_panel).keys())
        conditions.append((cond_name, cond_ds))

    study_info_dict = {
        'study_name': study_name,
        'user': user,
        'date': date,
        'details': details,
        'n_conditions': n_conditions,
        'conditions': conditions
    }

    return study_info_dict


In [None]:
ana_db_df = get_ana_db(_da_cfg)

In [None]:
study_ui = get_study_ui(ana_db_df)
display(study_ui)

# Datasets info

In [None]:
datasets_ids = [472, 473, 474, 475, 476, 477, 478, 479, ]

datasets_acc_complete_t = [start_proc_to_accumulation_end_tf]*len(datasets_ids)

excl_ds = []

datasets_acc_complete_t = [t for ds_idx, t in zip(datasets_ids, datasets_acc_complete_t) if ds_idx not in excl_ds]

datasets_ids = [ds_idx for ds_idx in datasets_ids if ds_idx not in excl_ds]


assert len(datasets_ids) == len(datasets_acc_complete_t)

# human readable description of ds type
# ToDo: selection interface
condition_id_to_condition_name = {0: 'BMEC'}

condition_id_to_ds_id = {0:datasets_ids}

condition_id_to_ds_id = {k:[vi for vi in v if vi not in excl_ds] for k, v in condition_id_to_ds_id.items()}

ds_id_to_condition_id = {ds_id:cond_id for cond_id, ds_ids in condition_id_to_ds_id.items() for ds_id in ds_ids}

datasets_dir = r'' # path to the datasets
plot_dir=os.path.join(datasets_dir, 'ds_plot')

In [None]:
# ds_info = gen_ds_info(txt_filename=r'q:\deep\BBB_Home\jpnb\BBB_data_proc\info')
# save_pckl(ds_info, os.path.join(datasets_dir, 'ds_info.pckl'))

In [None]:
ds_info = load_pckl(os.path.join(datasets_dir, 'ds_info.pckl'))

# Resolving crossings & Track analysis

In [None]:
datasets_ids

In [None]:
%%time
# process with proper accumulation end time set:
failed_ds = []
n_att = 1
for acc_complete_t, ds_id in zip(datasets_acc_complete_t, datasets_ids):
    cfgm.T_ACCUMULATION_END = acc_complete_t - cfgm.T_END_TO_COMPLETE
    
    condition_id_to_ds_id_l = {0: [ds_id]}
    condition_id_to_condition_name_l = {0: 'T'}

    ds_id_to_condition_id_l = {ds_id:0}
    
    for att in range(n_att):
        try:
            tas_single = process_datasets([ds_id], 
                                     condition_id_to_condition_name_l, condition_id_to_ds_id_l, ds_id_to_condition_id_l,
                                     datasets_dir, plot_dir,
                                     use_ds_priors=False,
                                     skip_processed_xing=False, skip_processed_tr_ana=False,
                                     no_transm_mode=False # option for Binding assys: True - no transmigration in this case possible.
                                    )
            break
        except Exception as e:
            print(f'\nRaised exception in {ds_id}\n', e)
            print(traceback.format_exception(None, # <- type(e) by docs, but ignored 
                                             e, e.__traceback__),
                  file=sys.stderr, flush=True)
        print('\n')
    else:
        failed_ds.append(ds_id)


In [None]:
failed_ds

In [None]:
# read processed and fill the tas_all structure
filtered_condition_id_to_ds_id = {k: [vi for vi in v if vi not in failed_ds] for k, v in condition_id_to_ds_id.items()}
tas_all = process_datasets([ids for ids in datasets_ids if ids not in failed_ds], 
                             condition_id_to_condition_name, filtered_condition_id_to_ds_id, ds_id_to_condition_id,
                             datasets_dir, plot_dir,
                             use_ds_priors=False,
                             skip_processed_xing=True, skip_processed_tr_ana=True,
                             no_transm_mode=False # option for Binding assys: True - no transmigration in this case possible.
                            )

# Analyse all (prod)

### Selection of dataset into groups

In [None]:
condition_id_to_ds_id_g3 = {
    0:[472, 475, 478, 474, ],
    1:[473, 476, 477, 479, ]
    
}

comparisson_groups_g3 = {
    0: {'condition_id': 0, 'name': 'AAA'},
    1: {'condition_id': 1, 'name': 'BBB'},
}


### Analyze G3 (merged)

here we create a data structure where for each of the measured condition we jave n+1 dict for each condition + joined, dict for param_id (str) + human-readable param name + container object with fetchers, store/load to pickle, save all csvs, save plots 

+ aggregator pipeline class: point to track dirs, list ids, condition map -> run each step, xint-tra, analysis, save plots, gui for map/dir gen, cache tas, save csv; common work_dir - contains existing recoded conditions + tags - date etc, to differentiate. gui suggestes one of existing param names, or allows to create new one. 

, for migration regimes - just id ref+ map of HR values
study - produces folder with links to data (ims+tracks+pta) + plots + csv


In [None]:
study_dir_g3 = os.path.join(datasets_dir, 'groups_g3_analytics')

In [None]:
analyze_group(tas_all, comparisson_groups_g3, condition_id_to_ds_id_g3, study_dir_g3, show=False)

# end

In [None]:
nb_end_t = timer()
print(f'notebook run time: {(nb_end_t - nb_start_t):.2f} sec')