 This notebook takes pre-processing outputs that require access to the raw dataset and:
1. gives them sensible names
2. exports them to single location

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import shutil
import toml
import glob
from tqdm.auto import tqdm

In [3]:
root_dir = os.path.expanduser("~/shared_folder/active_lab_members/markowitz_jeffrey/active_projects")

In [4]:
qd_analysis_dir = toml.load("config.toml")["dirs"]["analysis"]
keypoint_training_dir = "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/" 

In [5]:
export_dir = os.path.join(root_dir, "quantum_dots/_for_paper/ulutas_et_al_data/")

from the analysis dir we need
1. histology from round1 and round2
2. fluorescence decay/intensity data
3. spatial autocorrelation data
4. manually labeled keypoint data
5. autolabeled keypoint data
6. other evaluations... 

### histology round 1

In [6]:
histology_data_dir = os.path.join(root_dir, "quantum_dots/histology/")
histology_data_files = sorted(glob.glob(os.path.join(histology_data_dir, "*.nd2")))
histology_data_files = [_file for _file in histology_data_files if "BF" not in _file]

In [7]:
use_export_dir = os.path.join(export_dir, "histology_epifluorescence")

In [8]:
os.makedirs(use_export_dir, exist_ok=True)

In [9]:
# now copy the data files over to the new directory

In [10]:
for _file in tqdm(histology_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/7 [00:00<?, ?it/s]

In [11]:
len(histology_data_files)

7

In [12]:
len(glob.glob(os.path.join(use_export_dir,"*.nd2")))

7

### histology round 2 -- wga & dapi

In [13]:
histology_round2_wga_data_dirs = [
    os.path.join(root_dir, "quantum_dots/sciadv_rebuttal/histology/Quantification_QDR_20um_cell_and_vasc/"),
    os.path.join(root_dir, "quantum_dots/sciadv_rebuttal/histology/QDR_cell_vasc_+4h_WGA_DAPI_20X_zeynep/"),
]
histology_round2_wga_data_files = []
for _dir in histology_round2_wga_data_dirs:
    histology_round2_wga_data_files += sorted(glob.glob(os.path.join(_dir, "*20X.czi")))
# data_files = [_file for _file in data_files if "BF" not in _file]

In [14]:
use_export_dir = os.path.join(export_dir, "histology_round2_wpa-and-dapi")
os.makedirs(use_export_dir, exist_ok=True)

In [15]:
for _file in tqdm(histology_round2_wga_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/17 [00:00<?, ?it/s]

In [16]:
len(histology_round2_wga_data_files)

17

In [17]:
len(glob.glob(os.path.join(use_export_dir, "*20X.czi")))

17

### histology round 2 -- collagen & dapi

In [18]:
histology_round2_coll_data_dir = os.path.join(root_dir, "quantum_dots/sciadv_rebuttal/histology/QDR_QDcoll_collagen_488_+1d_DAPI_63X/")
histology_round2_coll_data_files = sorted(glob.glob(os.path.join(histology_round2_coll_data_dir, "*63X.czi")))

In [19]:
use_export_dir = os.path.join(export_dir, "histology_round2_collagen-and-dapi")
os.makedirs(use_export_dir, exist_ok=True)

In [20]:
for _file in tqdm(histology_round2_coll_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/23 [00:00<?, ?it/s]

In [21]:
len(histology_round2_coll_data_files)

23

In [22]:
len(glob.glob(os.path.join(use_export_dir, "*.czi")))

23

### bead images

In [23]:
bead_image_data_dir = os.path.join(root_dir, "quantum_dots/beads_images/")
bead_image_data_files = sorted(glob.glob(os.path.join(bead_image_data_dir, "*.nd2")))

In [24]:
use_export_dir = os.path.join(export_dir, "bead_tests")
os.makedirs(use_export_dir, exist_ok=True)

In [25]:
for _file in tqdm(bead_image_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/22 [00:00<?, ?it/s]

In [26]:
len(bead_image_data_files)

22

In [27]:
len(glob.glob(os.path.join(use_export_dir, "*.nd2")))

22

### fluorescence intensity data

In [28]:
intensity_parquet_file = os.path.join(qd_analysis_dir, "fluorescence_intensity_over_time.parquet")

In [29]:
use_export_dir = os.path.join(export_dir, "dataframes")
os.makedirs(use_export_dir, exist_ok=True)

In [30]:
shutil.copyfile(intensity_parquet_file, os.path.join(use_export_dir, os.path.basename(intensity_parquet_file)))

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/dataframes/fluorescence_intensity_over_time.parquet'

### spatial autocorrelation data

In [31]:
autocorrelation_parquet_file = os.path.join(qd_analysis_dir, "fluorescence_autocorrelation.parquet")

In [32]:
use_export_dir = os.path.join(export_dir, "dataframes")
os.makedirs(use_export_dir, exist_ok=True)

In [33]:
shutil.copyfile(autocorrelation_parquet_file, os.path.join(use_export_dir, os.path.basename(autocorrelation_parquet_file)))

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/dataframes/fluorescence_autocorrelation.parquet'

### Autolabeled datasets

In [34]:
autolabel_keypoint_files = [
    os.path.join(root_dir, "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/_labels_qd/kpoint_training_dataset_qd_alignment_round2_manual_labeling_nofluo_version-v1-embed-True-sleap-version.slp"),
    os.path.join(root_dir, "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/_labels_qd/kpoint_training_dataset_qd_alignment_kneejoints_reflect_only_round2_manual_labeling_nofluo_version-v1-embed-True-sleap-version.slp")
]

In [35]:
use_export_dir = os.path.join(export_dir, "autolabeled_keypoints")
os.makedirs(use_export_dir, exist_ok=True)

In [36]:
for _file in tqdm(autolabel_keypoint_files):
    if "kneejoints" in _file:
        use_fname = "autolabel_kneejoints.slp"
    else:
        use_fname = "autolabel_standard-keypoints.slp"
    use_metadata_fname = use_fname.replace(".slp", ".toml")
    use_parquet_fname = use_fname.replace(".slp", ".parquet")
    _metadata_fname = _file.replace(".slp", ".toml")
    _parquet_fname = _file.replace("-embed-True-sleap-version.slp", ".parquet")
    shutil.copyfile(_file, os.path.join(use_export_dir, use_fname))
    shutil.copyfile(_metadata_fname, os.path.join(use_export_dir, use_metadata_fname))
    shutil.copyfile(_parquet_fname, os.path.join(use_export_dir, use_parquet_fname))

  0%|          | 0/2 [00:00<?, ?it/s]

### manually labeled datasets

In [69]:
manual_label_keypoints_files = [
     "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/keypoints_basler_nir_plexiglass_arena/sleap_training_round2/_labels/basler-nir-plexiglass-arena-keypoints-fused-round2_weights-None_bpass-None_fluo-aligned.slp",
     "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/keypoints_basler_nir_plexiglass_arena/sleap_training_round2/_labels/basler-nir-plexiglass-arena-keypoints-fused-kneejoints_weights-None_bpass-None_fluo-aligned.slp"
]

In [70]:
use_export_dir = os.path.join(export_dir, "manually_labeled_keypoints")
os.makedirs(use_export_dir, exist_ok=True)

In [72]:
for _file in tqdm(manual_label_keypoints_files):
    if "kneejoints" in _file:
        use_fname = "manual_label_kneejoints.slp"
    else:
        use_fname = "manual_label_standard-keypoints.slp"
    use_metadata_fname = use_fname.replace(".slp", ".toml")
    _metadata_fname = _file.replace(".slp", ".toml")
    shutil.copyfile(_file, os.path.join(use_export_dir, use_fname))
    shutil.copyfile(_metadata_fname, os.path.join(use_export_dir, use_metadata_fname))

  0%|          | 0/2 [00:00<?, ?it/s]

### Autolabeled data metrics

In [37]:
sleap_metrics_autolabel_training_parquet_file = os.path.join(qd_analysis_dir, "sleap_metrics_qd_training.parquet")

In [38]:
use_export_dir = os.path.join(export_dir, "dataframes")
os.makedirs(use_export_dir, exist_ok=True)

In [39]:
shutil.copyfile(
    sleap_metrics_autolabel_training_parquet_file,
    os.path.join(use_export_dir, os.path.basename(sleap_metrics_autolabel_training_parquet_file)),
)

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/dataframes/sleap_metrics_qd_training.parquet'

### Manually labeled data results

In [40]:
root_dir = "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/"
proc_dirs = {
    "parameter_sweep": "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_manual_data_parameter_sweep/",
    "different_modalities": "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_manual_data_different_modalities/",
    "subsample": "keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_manual_data_subsample/",
}

In [41]:
use_export_dir = os.path.join(export_dir, "dataframes")
os.makedirs(use_export_dir, exist_ok=True)

In [42]:
prefix = "manually_labeled_data_"

In [43]:
for k, v in tqdm(proc_dirs.items()):
    new_file = os.path.join(use_export_dir, f"{prefix}{k}.parquet")
    if not os.path.exists(new_file):
        shutil.copyfile(
            os.path.join(root_dir, v, "aggregated_results.parquet"),
            new_file,
        )

  0%|          | 0/3 [00:00<?, ?it/s]

### kneejoint cadaver

In [64]:
kneejoint_image_data_dir = os.path.join(root_dir, "quantum_dots/kneejoint_cadaver/")
kneejoint_image_data_files = sorted(glob.glob(os.path.join(kneejoint_image_data_dir, "*.tiff")))

In [65]:
use_export_dir = os.path.join(export_dir, "kneejoint_cadaver_images")
os.makedirs(use_export_dir, exist_ok=True)

In [67]:
for _file in tqdm(kneejoint_image_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/12 [00:00<?, ?it/s]

### interlabeler datasets

In [44]:
interlabeler_data_files = sorted(glob.glob(os.path.join(qd_analysis_dir, "interlabeler*samples*.pkl")))

In [45]:
use_export_dir = os.path.join(export_dir, "interlabeler_error")
os.makedirs(use_export_dir, exist_ok=True)

In [46]:
for _file in tqdm(interlabeler_data_files):
    new_file = os.path.join(use_export_dir, os.path.basename(_file))
    if not os.path.exists(new_file):
        shutil.copyfile(_file, new_file)

  0%|          | 0/4 [00:00<?, ?it/s]

### accuracy v. distance/angle from camera

In [47]:
accuracy_v_position_parquet = os.path.join(qd_analysis_dir, "accuracy_v_distance_from_camera.parquet")

In [48]:
use_export_dir = os.path.join(export_dir, "dataframes")
os.makedirs(use_export_dir, exist_ok=True)

In [49]:
shutil.copyfile(
    accuracy_v_position_parquet, os.path.join(use_export_dir, os.path.basename(accuracy_v_position_parquet))
)

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/dataframes/accuracy_v_distance_from_camera.parquet'

### qdot 800 spectrum from chroma viewer

In [50]:
qdot_800_csv = "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/misc/qdot_spectra/Qdot 800.csv"
led_excitation_xlsx = "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/misc/qdot_spectra/Raw Spectral Distribution Data.xlsx"

In [51]:
use_export_dir = os.path.join(export_dir, "misc")
os.makedirs(use_export_dir, exist_ok=True)

In [52]:
shutil.copyfile(
    qdot_800_csv, os.path.join(use_export_dir, "qdot_800_excitation_chroma_viewer.csv")
)

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/misc/qdot_800_excitation_chroma_viewer.csv'

In [53]:
shutil.copyfile(
    led_excitation_xlsx, os.path.join(use_export_dir, "advanced_illumination_led_data.xlsx")
)

'/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/_for_paper/ulutas_et_al_data/misc/advanced_illumination_led_data.xlsx'

### keypoint models...

In [87]:
# manual
model_dirs = {
    "manually_labeled_data_kpoint_models_standard": "/storage/coda1/p-jmarkowitz30/0/shared/active_lab_members/markowitz_jeffrey/active_projects/keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_manual_data_final_model/models",
    "manually_labeled_data_kpoint_models_kneejoints": "/storage/coda1/p-jmarkowitz30/0/shared/active_lab_members/markowitz_jeffrey/active_projects/keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_manual_data_final_model_kneejoints/models",
    "autolabeled_data_kpoint_models_standard": "/storage/coda1/p-jmarkowitz30/0/shared/active_lab_members/markowitz_jeffrey/active_projects/keypoints_basler_nir_plexiglass_arena/sleap_training_round2/keypoints_from_qds_all_cameras_v3/models",
}
# there will be model repeats, go ahead and copy all of them to the right place...
# also go ahead and keep everything in there for posterity...

In [88]:
use_export_dir = os.path.join(export_dir, "keypoint_models")
os.makedirs(use_export_dir, exist_ok=True)

In [98]:
import pandas as pd

In [110]:
for _new_dir, _model_dir in tqdm(model_dirs.items()):
    # get directories
    use_models = sorted(os.listdir(_model_dir))
    use_models = [
        os.path.join(_model_dir, _lst) for _lst in use_models if os.path.isdir(os.path.join(_model_dir, _lst))
    ]

    if "from_qds" in _model_dir:
        use_models = [_dir for _dir in use_models if "subsample-None" in _dir]

    # find the model with the best validation loss and save off
    training_loss = {}
    for _model in use_models:
        training_log_fname = os.path.join(_model, "training_log.csv")
        training_log = pd.read_csv(training_log_fname)
        training_loss[_model] = training_log["val_loss"].iat[-1]

    best_model = min(training_loss, key=training_loss.get)
    new_dir = os.path.join(use_export_dir, _new_dir)
    shutil.copytree(best_model, new_dir, dirs_exist_ok=True)
    
    # for i, _model in enumerate(use_models):
        # new_dir = os.path.join(use_export_dir, f"{_new_dir}_repeat-{i}")
        # shutil.copytree(_model, new_dir, dirs_exist_ok=True)
        # os.makedirs(new_dir, exist_ok=True) 

  0%|          | 0/3 [00:00<?, ?it/s]