### Process Tiff Stack

In [1]:
import time
from pvd_io import *
from pvd_par import PVD
from pvd_metrics import data_summary
from IPython.display import clear_output

# Check on slurm job
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)


### Get data paths

In [2]:
# Get all tiff stacks and their paths
data_dir = 'pvd_data/'
min_file_size = 1e8  # 100 MB
datasets, sessions, files = scan_directories(data_dir, min_file_size)
print(f"Located {len(files)} tiff stacks")

Located 53 tiff stacks


### Batch Process Data

In [3]:
# Execute pipeline
for ii, file in enumerate(files):  # Slice like so to get a range of files[:1]
    clear_output(wait=True)

    # Set path
    data_path = 'pvd_data'
    results_path = 'pvd_analysis'
    dataset = datasets[ii]
    session = sessions[ii]
    output_path = f"{results_path}/{dataset}/{session}/"

    # Capture and display output
    output_capture = OutputCapture()
    sys.stdout = output_capture

    try:
        print(f"Processing tiff stack {ii}/{len(files)}")
        print(f"{datasets[ii]} {sessions[ii]}")
        pvd = PVD(data_path, dataset, session, file)
        pvd.run_pipeline()

        print(f"Saving data to drive...")
        pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

    finally:
        sys.stdout = output_capture.original_stdout

    # Save cell output
    with open(f"{output_path}output.txt", 'w') as f:
        f.write(str(output_capture.get_output()))

    # Clear memory
    del pvd

    time.sleep(1)

# Create summary file recording MIP similarity between timepoints and segments matched
data_summary(files, datasets, sessions)

### Single Stack

In [3]:
# Set path to original test file
data_path = 'pvd_data'
results_path = 'pvd_analysis'
dataset = 'DataSet01'
session = 'exp240202_01_E_'
file = 'exp240202_01_E.tif'
tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
output_path = f"{results_path}/{dataset}/{session}/"

# Process stack
pvd = PVD(data_path, dataset, session, file)
pvd.run_pipeline()

# Save relevant numpy arrays
print(f"Saving data to drive...")
pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

time.sleep(1)

Starting pipeline.
Data loaded. Shape: (4, 188, 2044, 2042): 4.51 seconds
Data cropped. Shape: (4, 188, 2000, 2000): 8.81 seconds
Preprocessing complete: 149.39 seconds
Data skeletonized: 268.53 seconds
Number of tips per timepoint: [99, 104, 105, 106]
Number of knots per timepoint: [266, 281, 277, 271]
Outer segments found. Number of outer segments per timepoint: [52, 52, 48, 55]: 5.00 seconds
Matched 27 segments across all timepoints.
Segments matched. Number of matched segments per timepoint: [27, 27, 27, 27]: 0.16 seconds


  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  length, _ = quad(length_element, dist[0], dist[-1])


Segment length changes of time logged: 114.09 seconds
Unmatched segments grouped: 9.38 seconds
All timepoints labeled successfully
Number of unique labels: 28
Processed data labeled: 36.87 seconds
Volume changes DataFrame generated: 44.03 seconds
Pipeline complete. Total time: 640.78 seconds
Saving data to drive...

Quality Score: 0.7897
Skeleton visualizations saved to pvd_analysis/DataSet01/exp240202_01_E_//visualizations
Outer segment visualizations saved to pvd_analysis/DataSet01/exp240202_01_E_//visualizations
Volume changes DataFrame saved to pvd_analysis/DataSet01/exp240202_01_E_//segment_change.csv


In [3]:
# Set path to a *bad* test file
data_path = 'pvd_data'
results_path = 'pvd_analysis'
dataset = 'DataSet00'
session = 'exp240129_01_03_'
file = 'exp240129_01_03.tif'
tiff_stack_path = f"{data_path}/{dataset}/{session}_/{file}"
output_path = f"{results_path}/{dataset}/{session}/"

# Process stack
pvd = PVD(data_path, dataset, session, file)
pvd.run_pipeline()

# Save relevant numpy arrays
print(f"Saving data to drive...")
pvd.save_results(output_path, save_tiff=False, save_numpy=False, save_plotly=True, save_labeled_tiff=False)

time.sleep(1)

Starting pipeline.
Data loaded. Shape: (4, 87, 2044, 2042): 3.11 seconds
Data cropped. Shape: (4, 87, 2000, 2000): 4.12 seconds
Preprocessing complete: 68.18 seconds
Data skeletonized: 125.85 seconds
Number of tips per timepoint: [145, 153, 161, 137]
Number of knots per timepoint: [336, 352, 345, 284]
Outer segments found. Number of outer segments per timepoint: [47, 43, 45, 47]: 2.64 seconds
Matched 20 segments across all timepoints.
Segments matched. Number of matched segments per timepoint: [20, 20, 20, 20]: 0.13 seconds


  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  length, _ = quad(length_element, dist[0], dist[-1])


Segment length changes over timepoints logged: 63.51 seconds
Unmatched segments grouped: 4.37 seconds
All timepoints labeled successfully
Number of unique labels: 21
Processed data labeled: 17.88 seconds
Volume changes DataFrame generated: 19.66 seconds
Pipeline complete. Total time: 309.45 seconds
Saving data to drive...

Quality Score: 0.6833
Skeleton visualizations saved to pvd_analysis/DataSet00/exp240129_01_03_//visualizations
Outer segment visualizations saved to pvd_analysis/DataSet00/exp240129_01_03_//visualizations
Volume changes DataFrame saved to pvd_analysis/DataSet00/exp240129_01_03_//segment_change.csv


In [9]:
pvd.length_change_voxels[-1]

array([[  0,  -6, -10,  -7],
       [  6,   0,  -4,  -1],
       [ 10,   4,   0,   3],
       [  7,   1,  -3,   0]])

In [10]:
pvd.length_change_spline[-1]

array([[  0.        ,  -6.71317871, -15.31028191,  -9.95582884],
       [  6.71317871,   0.        ,  -8.59710321,  -3.24265013],
       [ 15.31028191,   8.59710321,   0.        ,   5.35445308],
       [  9.95582884,   3.24265013,  -5.35445308,   0.        ]])

In [12]:
# pvd.matched_segments[0][9][-1]

(np.int64(49), np.int64(1598), np.int64(656))

### Matched Segment Summary

In [6]:
# add a line to extract the quality score <-- add this to pvd_metrics.py
import pandas as pd

# Create a file with a tally of matched segments per tiff stack

def data_summary(files, datasets, sessions, summary_file="data_summary_100324.txt"):
    # Summary Report
    for ii, file in enumerate(files):
        results_path = 'pvd_analysis'
        dataset = datasets[ii]
        session = sessions[ii]
        output_path = f"{results_path}/{dataset}/{session}/"

        segment_csv = pd.read_csv(f"{output_path}segment_change.csv")
        quality_csv = pd.read_csv(f"{output_path}mip_cosine_similarity.csv")
        csv_length = segment_csv.shape[1]-3  # Subtract 3 for index and core segment
        quality_score = quality_csv.loc[5,'t1']  # Get avg. cosine similarity score from csv

        with open(summary_file, 'a') as file:
            file.write(f"{dataset} - {session} -- Segments: {csv_length}  Quality: {float(quality_score):.2f}\n")

In [7]:
data_summary(files, datasets, sessions)