### Process Tiff Stack

In [None]:
import time
from pvd_io import *
from pvd_par import PVD
from pvd_metrics import data_summary
from IPython.display import clear_output

### Get data paths

In [None]:
# Get all tiff stacks and their paths
data_dir = 'pvd_data/'
min_file_size = 1e8  # 100 MB
datasets, sessions, files = scan_directories(data_dir, min_file_size)
print(f"Located {len(files)} tiff stacks")

### Batch Process Data

In [None]:
# Execute pipeline
for ii, file in enumerate(files):  # Slice like so to get a range of files[:1]
    clear_output(wait=True)

    # Set path
    data_path = 'pvd_data'
    results_path = 'pvd_analysis'
    dataset = datasets[ii]
    session = sessions[ii]
    output_path = f"{results_path}/{dataset}/{session}/"

    # Capture and display output
    output_capture = OutputCapture()
    sys.stdout = output_capture

    try:
        print(f"Processing tiff stack {ii}/{len(files)}")
        print(f"{datasets[ii]} {sessions[ii]}")
        pvd = PVD(data_path, dataset, session, file)
        pvd.run_pipeline()

        print(f"Saving data to drive...")
        pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=False, save_labeled_tiff=False)

    finally:
        sys.stdout = output_capture.original_stdout

    # Save cell output
    with open(f"{output_path}output.txt", 'w') as f:
        f.write(str(output_capture.get_output()))

    # Clear memory
    del pvd

    time.sleep(1)

# Create summary file recording MIP similarity between timepoints and segments matched
data_summary(files, datasets, sessions)

### Single Stack

In [None]:
# Set path to zero-match file
data_path = 'pvd_data'
results_path = 'pvd_analysis'
dataset = 'DataSet00'
session = 'exp240104_00_01_'
file = 'exp240104_00_01.tif'
tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
output_path = f"{results_path}/{dataset}/{session}/"

# Capture and display output
output_capture = OutputCapture()
sys.stdout = output_capture

try:
    pvd = PVD(data_path, dataset, session, file)
    pvd.run_pipeline()

    print(f"Saving data to drive...")
    pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

finally:
    sys.stdout = output_capture.original_stdout

# Save cell output
with open(f"{output_path}pipeline_log.txt", 'w') as f:
    f.write(str(output_capture.get_output()))

time.sleep(1)

In [None]:
# # Set path to original test file
# data_path = 'pvd_data'
# results_path = 'pvd_analysis'
# dataset = 'DataSet01'
# session = 'exp240202_01_E_'
# file = 'exp240202_01_E.tif'
# tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
# output_path = f"{results_path}/{dataset}/{session}/"

# # Process stack
# pvd = PVD(data_path, dataset, session, file)
# pvd.run_pipeline()

# # Save relevant numpy arrays
# print(f"Saving data to drive...")
# pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=False, save_labeled_tiff=False)

# time.sleep(1)

### Matched Segment Summary

In [None]:
# # add a line to extract the quality score <-- add this to pvd_metrics.py
# import pandas as pd

# # Create a file with a tally of matched segments per tiff stack

# def data_summary(files, datasets, sessions, summary_file = "data_summary.txt"):
#     # Summary Report
#     for ii, file in enumerate(files):
#         results_path = 'pvd_analysis'
#         dataset = datasets[ii]
#         session = sessions[ii]
#         output_path = f"{results_path}/{dataset}/{session}/"

#         segment_csv = pd.read_csv(f"{output_path}segment_change.csv")
#         quality_csv = pd.read_csv(f"{output_path}mip_cosine_similarity.csv'")
#         csv_length = segment_csv.shape[1]-3  # Subtract 3 for index and core segment
#         quality_score = quality_csv.loc[5,'t1']  # Get avg. cosine similarity score from csv

#         with open(summary_file, 'a') as file:
#             file.write(f"{dataset} - {session} -- Segments: {csv_length}  Quality: {float(quality_score):.2f}\n")