### Process Tiff Stack

In [1]:
import os
import time
from pvd_par import PVD
from IPython.display import clear_output

### Get data paths

In [2]:
# Function to get files and paths
def scan_directories(data_directory, min_size_bytes):
    dataset_list = []
    session_list = []
    file_list = []

    for root, dirs, files in os.walk(data_directory):
        # Check if the current path contains "exclude" or "movement"
        if "exclude" in root.lower() or "movement" in root.lower() or "missing" in root.lower():
            continue  # Skip this directory

        for file in files:
            if file.endswith('.tif'):
                file_path = os.path.join(root, file)

                # Check file size
                if os.path.getsize(file_path) < min_size_bytes:
                    continue  # Skip this file if it's too small

                path_parts = os.path.normpath(root).split(os.sep)
                if len(path_parts) >= 3:
                    dataset = path_parts[-2]
                    session = path_parts[-1]
                    dataset_list.append(dataset)
                    session_list.append(session)
                    file_list.append(file)

    return dataset_list, session_list, file_list

In [3]:
# Get all tiff stacks and their paths
data_dir = 'S:/pvd_data/'
min_file_size = 1e8  # 100 MB
datasets, sessions, files = scan_directories(data_dir, min_file_size)
print(f"Located {len(files)} tiff stacks")

Located 53 tiff stacks


### Batch Process Data

In [4]:
# # Execute pipeline
# for ii, file in enumerate(files):  # Slice like so to get a range of files[:1]
#     clear_output(wait=True)

#     # Set path
#     data_path = 'pvd_data'
#     results_path = 'pvd_analysis'
#     dataset = datasets[ii]
#     session = sessions[ii]
#     output_path = f"{results_path}/{dataset}/{session}/"

#     # Process stack
#     print(f"Processing tiff stack {ii}/{len(files)}")
#     print(f"{datasets[ii]} {sessions[ii]}")
#     pvd = PVD(data_path, dataset, session, file)
#     pvd.run_pipeline()

#     # Save relevant numpy arrays
#     print(f"Saving data to drive...")
#     pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

#     del pvd

#     time.sleep(1)

### Single Stack

In [5]:
# # Set path to zero-match file
# data_path = 'pvd_data'
# results_path = 'pvd_analysis'
# dataset = 'DataSet00'
# session = 'exp240104_00_01_'
# file = 'exp240104_00_01.tif'
# tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
# output_path = f"{results_path}/{dataset}/{session}/"

# # Process stack
# pvd = PVD(data_path, dataset, session, file)
# pvd.run_pipeline()

# # Save relevant numpy arrays
# print(f"Saving data to drive...")
# pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

# time.sleep(1)

In [6]:
# Set path to original test file
data_path = 'pvd_data'
results_path = 'pvd_analysis'
dataset = 'DataSet01'
session = 'exp240202_01_E_'
file = 'exp240202_01_E.tif'
tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
output_path = f"{results_path}/{dataset}/{session}/"

# Process stack
pvd = PVD(data_path, dataset, session, file)
pvd.run_pipeline()

# Save relevant numpy arrays
print(f"Saving data to drive...")
pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

time.sleep(1)

Starting pipeline.
Data loaded. Shape: (4, 188, 2044, 2042): 49.12 seconds
Data cropped. Shape: (4, 188, 2000, 2000): 6.67 seconds
Preprocessing complete: 239.66 seconds
Data skeletonized: 328.02 seconds
Number of tips per timepoint: [117, 112, 117, 122]
Number of knots per timepoint: [342, 345, 335, 340]
Outer segments found. Number of outer segments per timepoint: [34, 32, 32, 35]: 5.70 seconds
Matched 15 segments across all timepoints.
Segments matched. Number of matched segments per timepoint: [15, 15, 15, 15]: 7.81 seconds
Unmatched segments grouped: 7.77 seconds
All timepoints labeled successfully
Number of unique labels: 16
Processed data labeled: 48.12 seconds
Volume changes DataFrame generated: 48.73 seconds
Pipeline complete. Total time: 741.61 seconds
Saving data to drive...
Skeleton visualizations saved to S:/pvd_analysis/DataSet01/exp240202_01_E_//visualizations
Outer segment visualizations saved to S:/pvd_analysis/DataSet01/exp240202_01_E_//visualizations
Volume changes D

### Matched Segment Summary

In [7]:
# import pandas as pd
# summary_file = "summary.txt"

# # Summary Report
# for ii, file in enumerate(files):
#     results_path = 'pvd_analysis'
#     dataset = datasets[ii]
#     session = sessions[ii]
#     output_path = f"{results_path}/{dataset}/{session}/"

#     csv = pd.read_csv(f"{output_path}segment_change.csv")
#     csv_length = csv.shape[1]-3  # Subtract 3 for index and core segment

#     with open(summary_file, 'a') as file:
#         file.write(f"{dataset} - {session}: {csv_length}\n")