#### Pipeline for labeling ROIs with ROINeT
Note that this has to be run in a ROICaT environment with vrAnalysis installed, not the typical ROICaT environment!!!!
(They are incompatible at the moment due to numpy, but installing roicat[all] then installing vrAnalysis with no-deps and just manually installing the required ones for database importing works fine). 

#### Progress and ToDo List:
- **<<<< DONE >>>>** Create training and testing set that spans all the mice and different imaging planes
- **<<<< DONE >>>>** Process training / testing sets to generate the roinet latents, umap embeddings, images, and umap model (umap model for training data only)
- **<<<< DONE >>>>** Do a bunch of labeling on both sets!!!
- **<<<< DONE >>>>** Save and generate sklearn model for the training data and print reports on the testing data
- **<<<< DONE >>>>** Run all data through model and save results.

In [None]:
%reload_ext autoreload
%autoreload 2

from roicat_support import get_classifier_files
from roicat_support.classifier import (
    choose_sessions, 
    define_classification_set, 
    load_classification_set, 
    prepare_suite2p_paths, 
    roi_should_be_ignored, 
    generate_latents_and_embeddings, 
    load_latents_and_embeddings, 
    read_labels,
    labels_to_df,
    save_labels,
    labels_df_to_dict,
    save_classifier,
    load_classifier,
    detect_local_concavities,
    run_integrated_labeler,
    update_labels,
    execute_label_updates,
    visualize_counts,
    visualize_examples,
    train_classifier,
    evaluate_classifier,
    visualize_predictions,
    process_sessions,
    classify_and_save,
)

files = get_classifier_files()
for k, v in files.items():
    print(k, v)

In [2]:
# Choose training vs testing data
use_training_data = True
use_train_model_for_embeddings = True

# Load saved data from roinet and umap to do labeling
data = load_latents_and_embeddings(use_training_data)
latents = data["latents"]
embeddings = data["embeddings"]
images = data["images"]
model = data["model_umap"]
label_path = files["train_labels"] if use_training_data else files["test_labels"]

if not use_training_data and use_train_model_for_embeddings:
    train_model = load_latents_and_embeddings(True)["model_umap"]
    embeddings = train_model.transform(latents) 

In [None]:
do_labeling = False
if do_labeling:
    labeler = run_integrated_labeler(embeddings, images, label_path, overwrite=False)

In [None]:
run_label_update = False
show_updates = True
execute_updates = False
if run_label_update:
    labels_to_change, labels_to_clear = update_labels(embeddings, images, label_path)
    execute_label_updates(label_path, labels_to_change, labels_to_clear, show_updates=show_updates, execute_updates=execute_updates)

In [8]:
show_counts = False
if show_counts:
    visualize_counts(label_path)

In [None]:
# Visualize some examples
show_examples = True
if show_examples:  
    visualize_examples(images, label_path, max_images_per_label=10, shuffle=True)

In [None]:
# Train a logistic regression model on the training data
train_new_classifier = False
if train_new_classifier:
    train_classifier()

In [None]:
# Check whether the model from the training labels does well on the test labels
show_evaluation_on_test_data = False
if show_evaluation_on_test_data:
    evaluate_classifier(convert_to_goodvsbad=True, show_confusion_matrix=True, checkout_bad_to_good=True)

In [None]:
show_predictions = False
if show_predictions:
    classifier = load_classifier()
    model = classifier["model"]
    id_to_label = classifier["id_to_label"]
    visualize_predictions(model, latents, embeddings, id_to_label)

In [13]:
process_data = False
if process_data:
    process_sessions()