In [1]:
from sleap_roots import Series, find_all_series
from sleap_roots import MultipleDicotPipeline
from sleap_roots.trait_pipelines import Pipeline

import numpy as np
import pandas as pd
import json

from pathlib import Path

In [2]:
csv_path = "tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv" # For sample information (count, group)
folder_path = "tests/data/multiple_arabidopsis_11do" # Location of h5 files and predictions
primary_name = "primary" # For loading primary root predictions
lateral_name = "lateral" # For loading lateral root predictions

In [3]:
# Find all h5 files in the folder
all_h5s = find_all_series(folder_path)
all_h5s

['tests/data/multiple_arabidopsis_11do/6039_1.h5',
 'tests/data/multiple_arabidopsis_11do/7327_2.h5',
 'tests/data/multiple_arabidopsis_11do/9535_1.h5',
 'tests/data/multiple_arabidopsis_11do/997_1.h5']

In [4]:
# Load the cylinder series (one per h5 file)
all_series = [Series.load(h5_path=h5, primary_name=primary_name, lateral_name=lateral_name, csv_path=csv_path) for h5 in all_h5s]
all_series

[Series(h5_path='tests/data/multiple_arabidopsis_11do/6039_1.h5', primary_labels=Labels(labeled_frames=67, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=68, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename="tests/data/multiple_arabidopsis_11do/6039_1.h5", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv'),
 Series(h5_path='tests/data/multiple_arabidopsis_11do/7327_2.h5', primary_labels=Labels(labeled_frames=43, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=31, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename="tests/data/multiple_arabidopsis_11do/7327_2.h5", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv'),
 Series(h5_path='tests/data/multiple_arabidopsis_11do/9535_1.h5', primary_labels=Labels(labe

In [5]:
# Get the first series in the list
series = all_series[0]

In [6]:
print(f"First sample has name {series.series_name}")
print(f"First sample has genotype {series.group}")

First sample has name 6039_1
First sample has genotype 6039


In [7]:
# Initialize the pipeline
pipeline = MultipleDicotPipeline()

In [8]:
# Get the traits of the first sample
first_sample_traits = pipeline.compute_multiple_dicots_traits(series=series, write_json=True, write_csv=True)

Aggregated traits saved to 6039_1.all_frames_traits.json
Summary statistics saved to 6039_1.all_frames_summary.csv


In [9]:
pd.DataFrame([first_sample_traits["summary_stats"]])

Unnamed: 0,lateral_count_min,lateral_count_max,lateral_count_mean,lateral_count_median,lateral_count_std,lateral_count_p5,lateral_count_p25,lateral_count_p75,lateral_count_p95,lateral_lengths_min,...,network_distribution_ratio_p95,network_solidity_min,network_solidity_max,network_solidity_mean,network_solidity_median,network_solidity_std,network_solidity_p5,network_solidity_p25,network_solidity_p75,network_solidity_p95
0,1,7,5.08209,5.0,1.240176,3.0,4.0,6.0,7.0,3.777593,...,0.757133,0.041121,0.150504,0.062255,0.057276,0.01982,0.042815,0.048231,0.070095,0.098175


In [10]:
# Get the traits grouped by genotype
grouped_traits = pipeline.compute_multiple_dicots_traits_for_groups(series_list=list(all_series), write_json=True, write_csv=True)

Processing series '6039_1'
Finished processing group '6039'
Aggregated traits for group 6039 saved to 6039.grouped_traits.json
Finished processing group '6039'
Summary statistics for group 6039 saved to 6039.grouped_summary.csv
Processing series '7327_2'
Finished processing group '7327'
Aggregated traits for group 7327 saved to 7327.grouped_traits.json
Finished processing group '7327'
Summary statistics for group 7327 saved to 7327.grouped_summary.csv
Processing series '9535_1'
Finished processing group '9535'
Aggregated traits for group 9535 saved to 9535.grouped_traits.json
Finished processing group '9535'
Summary statistics for group 9535 saved to 9535.grouped_summary.csv
Processing series '997_1'
Finished processing group '997'
Aggregated traits for group 997 saved to 997.grouped_traits.json
Finished processing group '997'
Summary statistics for group 997 saved to 997.grouped_summary.csv


In [11]:
len(grouped_traits)

4

In [12]:
pd.DataFrame([grouped_traits[0]["summary_stats"]])

Unnamed: 0,lateral_count_min,lateral_count_max,lateral_count_mean,lateral_count_median,lateral_count_std,lateral_count_p5,lateral_count_p25,lateral_count_p75,lateral_count_p95,lateral_lengths_min,...,network_distribution_ratio_p95,network_solidity_min,network_solidity_max,network_solidity_mean,network_solidity_median,network_solidity_std,network_solidity_p5,network_solidity_p25,network_solidity_p75,network_solidity_p95
0,1,7,5.08209,5.0,1.240176,3.0,4.0,6.0,7.0,3.777593,...,0.757133,0.041121,0.150504,0.062255,0.057276,0.01982,0.042815,0.048231,0.070095,0.098175


In [13]:
grouped_summary_df = pd.DataFrame([grouped_trait["summary_stats"] for grouped_trait in grouped_traits])

In [14]:
grouped_summary_df

Unnamed: 0,lateral_count_min,lateral_count_max,lateral_count_mean,lateral_count_median,lateral_count_std,lateral_count_p5,lateral_count_p25,lateral_count_p75,lateral_count_p95,lateral_lengths_min,...,network_distribution_ratio_p95,network_solidity_min,network_solidity_max,network_solidity_mean,network_solidity_median,network_solidity_std,network_solidity_p5,network_solidity_p25,network_solidity_p75,network_solidity_p95
0,1,7,5.08209,5.0,1.240176,3.0,4.0,6.0,7.0,3.777593,...,0.757133,0.041121,0.150504,0.062255,0.057276,0.01982,0.042815,0.048231,0.070095,0.098175
1,1,9,3.434109,1.0,2.82526,1.0,1.0,6.0,8.0,4.345694,...,0.67984,0.024168,0.293489,0.09292,0.087395,0.062009,0.030521,0.041196,0.125539,0.214581
2,1,13,6.007937,6.0,3.02764,1.0,4.0,8.0,11.0,4.431438,...,0.677514,0.032377,0.166538,0.055098,0.048888,0.023023,0.033393,0.03847,0.06584,0.092981
3,4,9,7.0,7.5,1.914854,4.25,5.5,8.75,9.0,17.140351,...,0.550392,0.017635,0.028867,0.021103,0.019285,0.004037,0.017699,0.017987,0.022816,0.027564


In [17]:
grouped_summary_df = pipeline.compute_batch_multiple_dicots_traits_for_groups(all_series=list(all_series), write_json=True, write_csv=True)

Processing series '6039_1'
Finished processing group '6039'
Aggregated traits for group 6039 saved to 6039.grouped_traits.json
Finished processing group '6039'
Processing series '7327_2'
Finished processing group '7327'
Aggregated traits for group 7327 saved to 7327.grouped_traits.json
Finished processing group '7327'
Processing series '9535_1'
Finished processing group '9535'
Aggregated traits for group 9535 saved to 9535.grouped_traits.json
Finished processing group '9535'
Processing series '997_1'
Finished processing group '997'
Aggregated traits for group 997 saved to 997.grouped_traits.json
Finished processing group '997'
Computed traits for all groups saved to group_summarized_traits.csv


In [16]:
grouped_traits[0]["group"]

'6039'

In [18]:
grouped_summary_df.shape

(4, 316)