In [1]:
import os
import ray
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from parsers.track_parser import TrackParserDistributed
from imaris.imaris import ImarisDataObject
from typing import List, Dict

In [2]:
data_path = "C:/Users/perer/Downloads/FIX/FIX/F1/GFP #1 Sec2 Roi1 2x2 1h30min.ims"
os.path.isfile(data_path)

True

In [3]:
parser1 = TrackParserDistributed.remote(data_path, surface_id=0)
parser2 = TrackParserDistributed.remote(data_path, surface_id=0)

2023-11-22 22:41:21,118	INFO worker.py:1673 -- Started a local Ray instance.


In [4]:
# get generated df
final_df = parser1.inspect.remote(surface_id=0)
final_df = ray.get(final_df)
generated_df = final_df["stats_df"]

# get stats dict that maps ID to Name
stats_dict = parser2.get_surface_track_stats_info.remote(surface_id=0)
stats_dict = ray.get(stats_dict)

# get original data
ims_obj = ImarisDataObject(data_path)
raw_stats = ims_obj.get_stats_values(final_df["surface_name"])

In [None]:
raw_stats

In [5]:
def validate(
    gen_stats: pd.DataFrame,
    raw_obj_stats: pd.DataFrame,
    stats_dict: Dict,
) -> None:
    """
    Given a single row of statistics ie: all the stats for a single
    objectid and the raw stats values dictionary for the same object id
    this function will compare each statistics value between the generated
    version and the raw stats values dictionary obtained directly from the
    imaris file to ensure every stat values match for the given object id.

    Args:
        gen_stats (pd.DataFrame): dataframe for all the stats for a single object id
        raw_obj_stats (pd.DataFrame): raw stats values for a single object id
        stats_dict (Dict): dictionary that maps Stats ID Number to Stat Name

    Raises:
        ValueError: _description_
    """
    raw_obj_id = raw_obj_stats["ID_Object"].unique().item()
    gen_obj_id = gen_stats["Object_ID"].item()
    assert raw_obj_id == gen_obj_id, "Object IDs Do NOT Match"

    gen_values = {}
    # key = stats name, value=generated stats value for stats name
    for stats_name in gen_stats.columns.to_list():
        gen_values[stats_name] = gen_stats[stats_name].item()
    gen_values.pop("Object_ID")

    raw_values = {}
    for stat_id in raw_obj_stats["ID_StatisticsType"].to_list():
        stats_name = stats_dict[stat_id]
        stats_value = raw_obj_stats[raw_obj_stats["ID_StatisticsType"] == stat_id]
        raw_values[stats_name] = stats_value["Value"].item()

    # double check all keys in both dicts are the same
    print(gen_values.keys())
    print(raw_values.keys())
    assert list(gen_values.keys()) == list(raw_values.keys())

    for key in gen_values.keys():
        raw_value = raw_values[key]
        gen_value = gen_values[key]
        if raw_value != gen_value:
            print(key, raw_value, gen_value)
            raise ValueError

    print(f"[info] -- confirmed all values for object {raw_obj_id}")

In [6]:
def run_stats_validation(
    generated_df: pd.DataFrame,
    raw_stats: pd.DataFrame,
    stats_dict: Dict,
) -> None:
    """_summary_

    Args:
        generated_df (pd.DataFrame): _description_
        raw_stats (pd.DataFrame): _description_
        stats_dict (Dict): _description_
    """
    for object_id in tqdm(generated_df["Object_ID"].to_list()):
        # get generated stats info for object id
        gen_stats = generated_df[generated_df["Object_ID"] == object_id]
        # get raw stats values for object id
        raw_obj_stats = raw_stats[raw_stats["ID_Object"] == object_id]

        validate(gen_stats, raw_obj_stats, stats_dict)

In [7]:
run_stats_validation(generated_df, raw_stats, stats_dict[0])

  0%|          | 0/2783 [00:00<?, ?it/s]

dict_keys(['Track Ar1 Mean', 'Track Ar1 X', 'Track Ar1 Y', 'Track Ar1 Z', 'Track Area Mean', 'Track Center of Homogeneous Mass X', 'Track Center of Homogeneous Mass Y', 'Track Center of Homogeneous Mass Z', 'Track Center of Image Mass X Channel_1', 'Track Center of Image Mass X Channel_2', 'Track Center of Image Mass X Channel_3', 'Track Center of Image Mass X Channel_4', 'Track Center of Image Mass X Channel_5', 'Track Center of Image Mass Y Channel_1', 'Track Center of Image Mass Y Channel_2', 'Track Center of Image Mass Y Channel_3', 'Track Center of Image Mass Y Channel_4', 'Track Center of Image Mass Y Channel_5', 'Track Center of Image Mass Z Channel_1', 'Track Center of Image Mass Z Channel_2', 'Track Center of Image Mass Z Channel_3', 'Track Center of Image Mass Z Channel_4', 'Track Center of Image Mass Z Channel_5', 'Track Displacement Length', 'Track Displacement X', 'Track Displacement Y', 'Track Displacement Z', 'Track Duration', 'Track Ellipsoid Axis A X Mean', 'Track Elli

AssertionError: 

In [9]:
generated_df[generated_df["Object_ID"] == 1000221198]

Unnamed: 0,Track Ar1 Mean,Track Ar1 X,Track Ar1 Y,Track Ar1 Z,Track Area Mean,Track Center of Homogeneous Mass X,Track Center of Homogeneous Mass Y,Track Center of Homogeneous Mass Z,Track Center of Image Mass X Channel_1,Track Center of Image Mass X Channel_2,...,Track Shortest Distance to Surfaces Min_Surfaces Unnamed 4 CG.GC.C-.G-.icsx_[ibrx_2023-09-27T11-04-45.364],Track Speed Max,Track Speed Mean,Track Speed Min,Track Speed StdDev,Track Speed Variation,Track Sphericity Mean,Track Straightness,Track Volume Mean,Object_ID
1000221198,-0.56268,-0.532461,-0.496516,-0.659063,233.107971,56470.480469,37532.207031,11.191582,56470.46875,56470.546875,...,21.172438,0.017622,0.01373,0.009269,0.002958,0.215454,0.74188,0.490289,214.955383,1000221198
