In [1]:
import h5py
import numpy as np
import pandas as pd
import re
import yaml
import os
from collections.abc import MutableMapping
from typing import Dict, List, Tuple
from exceptions import *
from copy import deepcopy
import collections
from parser_base import Parser
from imaris import ImarisDataObject
import ray
from functools import partial
import time
import polars as pl

In [38]:
# @ray.remote
class SurfaceParser(Parser):
    """
    Extracts Surface Level Information From Imaris File

    Args:
        Parser (ABCMeta): Parser Abstract Base Class
    """

    def __init__(self, ims_file_path: str) -> None:
        self.ims_file_path = ims_file_path
        self.ims = ImarisDataObject(self.ims_file_path)
        self.configure_instance()

    def configure_instance(self) -> None:
        """
        Extracts relevant information from ims object and
        instantiates it as instance variables for fast recall.

        Currently Extracts:
            - all the surface names -- List
            - all the stats_names -- {id: pd.DataFrame}
            - all the stats values -- {id: pd.DataFrame}
            - all the factor info -- {id: pd.DataFrame}
        """
        # extract all information and saves it as a instance var
        self.surface_names = self.ims.get_object_names("Surface")

        # get all the stats names for every surface {surf_id: stats_name_df}
        self.stats_names = {
            surface_id: self.ims.get_stats_names(surface_name)
            for surface_id, surface_name in enumerate(self.surface_names)
        }

        # get all the stats values for every surface {surf_id: stats_values_df}
        self.stats_values = {
            surface_id: self.ims.get_stats_values(surface_name)
            for surface_id, surface_name in enumerate(self.surface_names)
        }

        # get all the factor table info for every surface {surf_id: factor_df}
        self.factors = {
            surface_id: self.ims.get_object_factor(surface_name)
            for surface_id, surface_name in enumerate(self.surface_names)
        }

        # get all the factor table info for every surface {surf_id: factor_df}
        self.object_ids = {
            surface_id: self.ims.get_object_ids(surface_name)
            for surface_id, surface_name in enumerate(self.surface_names)
        }

    def organize_stats(self, stats_values: pd.DataFrame) -> Dict:
        """Organized the data such that it looks like
        {ID_Object: {Stats Name: Value}}

        Args:
            surface_stats_values (pd.DataFrame): a single dataframe
            that contains the statistics for a single surface

        Returns:
            Dict: _description_
        """
        grouped_stats = (
            stats_values.groupby("ID_Object")[["ID_StatisticsType", "Value"]]
            .apply(lambda x: x.set_index("ID_StatisticsType").to_dict(orient="dict"))
            .to_dict()
        )
        grouped_stats = {k: v["Value"] for k, v in grouped_stats.items()}
        return grouped_stats

    def generate_csv(
        self, stats_values: Dict, stat_names: pd.DataFrame
    ) -> pd.DataFrame:
        """_summary_

        Args:
            organized_stats (Dict): _description_

        Returns:
            pd.DataFrame: _description_
        """
        # create a dict that maps stat_id to stat_name
        column_names_dict = dict(zip(stat_names["ID"], stat_names["Name"]))
        dataframe = pd.DataFrame(stats_values).transpose()

        # replaces id columns with respective stat name and add idx
        dataframe = dataframe.rename(column_names_dict, axis=1)
        dataframe["Object_ID"] = dataframe.index

        return dataframe

    def save_csv(self):
        # a function to write csv information to disk
        pass

    def process(self, surface_id: int) -> None:
        """
        Runs a single end to end parser pipeline on a single surface
        Steps:
            - get stat names for a single surface
            - get stat values for a single surface
            - filter stat values to keep only track ids
            - filter stats values to remove track level stat information
            - rename certian columns (if needed)(need a custom func for this to add channel info)
            - organize the filtered stats
            - generate csv
            - save csv

        Args:
            surface_id (int): _description_
        """
        # gather info for current surface
        start = time.perf_counter()
        surface_name = self.surface_names[surface_id]
        print(f"surface_name: {time.perf_counter() - start}")
        stat_names = self.stats_names.get(surface_id)
        print(f"stat_names: {time.perf_counter() - start}")
        stat_values = self.stats_values.get(surface_id)
        print(f"stat_values: {time.perf_counter() - start}")
        object_id = self.object_ids.get(surface_id)
        print(f"object_id: {time.perf_counter() - start}")
        factor = self.factors.get(surface_id)
        print(f"factor: {time.perf_counter() - start}")

        # update channel and surface names
        stat_names = self.update_channel_info(stats_names=stat_names, factor=factor)
        print(f"stat_names_channel: {time.perf_counter() - start}")
        stat_names = self.update_surface_info(stats_names=stat_names, factor=factor)
        print(f"stat_names_surfaces: {time.perf_counter() - start}")

        # filter stats values by object ids (ie: ignore info related to trackids)
        stat_values = self.filter_stats(
            stats_values=stat_values,
            filter_col_names=["ID_Object"],
            filter_values=[object_id],
        )
        print(f"filtered_stat_values: {time.perf_counter() - start}")

        # organize stats values
        organized_stats = self.organize_stats(stat_values)
        print(f"organized_stats: {time.perf_counter() - start}")

        # generate csv
        stats_df = self.generate_csv(organized_stats, stat_names=stat_names)
        print(f"stats_df: {time.perf_counter() - start}")

        # add in track level info for each object
        object_info = self.ims.get_object_info(surface_name)
        track_info = self.ims.get_track_info(surface_name)
        database = self.create_track_id_database(
            object_data=object_info,
            track_data=track_info,
        )

        return stats_df, database

    def filter_stats(
        self,
        stats_values: pd.DataFrame,
        filter_col_names: List[str],
        filter_values: List[pd.Series],
    ) -> pd.DataFrame:
        """
        Filters the stats values dataframe. It keeps information
        from col_names and filter_values that is passed in as arguments.

        Args:
            stats_values (pd.DataFrame): _description_
            filter_col_name (str): name of the column we want to use to filter
            filter_values (str): values that we want to keep

        Returns:
            pd.DataFrame: _description_
        """
        # for surface parser need to filter out track id information
        # and statistics related to track information.
        for col_names, values in zip(filter_col_names, filter_values):
            stats_values = stats_values[stats_values[col_names].isin(values=values)]

        return stats_values

    def extract_and_save(self):
        # this function is the funtion that gets called externally
        # we can have this function as a ray method to help with distributed execution
        pass

    def get_available_stat_names(self):
        # interacts with data object and returns requested data for inspection
        pass

    def update_stats_with_real_names(
        self, surface_name: str, stats_names: Dict, user_defined_list: List
    ) -> Dict:
        """
        Update the stats names according to the real surface names found
        inside Contents->SurfaceName->Factor

        Args:
            surface_name (str): the name of the surface to extract data from
            stats_names (Dict): stats_names dictionary
            user_defined_list (List): list of stats name given by ...
                ...the user to be replaced by the real surface names

        Returns:
            Dict: stats name dict with the updated surface names
        """
        real_stats_names = self.ims.get_real_surface_names(surface_name)
        filtered_dicts = [
            self.get_filtered_stat_names(stats_names, keyword, exact=True)
            for keyword in user_defined_list
        ]
        for dict in filtered_dicts:
            for idx, (k, _) in enumerate(dict.items()):
                stats_names[k] = real_stats_names[idx]

        return stats_names

    def update_channel_info(
        self, stats_names: pd.DataFrame, factor: pd.DataFrame
    ) -> pd.DataFrame:
        """
        Updates the channel information for the relavent rows
        based on th ID_FactorList information in stats_names

        Args:
            stats_names (pd.DataFrame): _description_
            factor (pd.DataFrame): _description_

        Returns:
            pd.DataFrame: _description_
        """

        # create function get channel number from a pandas row from stats_names
        # inner func
        def get_channel_id(row_info, factor: pd.DataFrame):
            factor_id = row_info["ID_FactorList"]  # factor id
            name = row_info["Name"]  # stat name

            # filter factor to only include items related to Channel
            channel_info = factor[factor["Name"] == "Channel"]

            # main logic to select the right channel given the factor id
            if factor_id in channel_info["ID_List"].to_list():
                channel = channel_info[channel_info["ID_List"] == factor_id][
                    "Level"
                ].item()
                return f"{name} Channel_{channel}"
            # if factor id is not in the channel list no channel info is needed
            else:
                return name

        # create partial
        get_channel_id_partial = partial(get_channel_id, factor=factor)

        # update stats name with the newly mapped stats names values
        stats_names["Name"] = stats_names.apply(func=get_channel_id_partial, axis=1)

        return stats_names

    def update_surface_info(
        self,
        stats_names: pd.DataFrame,
        factor: pd.DataFrame,
    ) -> pd.DataFrame:
        """
        Updates the surface name information for the relavent rows
        based on th ID_FactorList information in stats_names

        Args:
            stats_names (pd.DataFrame): _description_
            factor (pd.DataFrame): _description_

        Returns:
            pd.DataFrame: _description_
        """

        # create function get channel number from a pandas row from stats_names
        # inner func
        def get_surface_name(row_info, factor: pd.DataFrame):
            factor_id = row_info["ID_FactorList"]  # factor id
            name = row_info["Name"]  # stat name

            # filter factor to only include items related to Channel
            channel_info = factor[factor["Name"] == "Surfaces"]

            # main logic to select the right channel given the factor id
            if factor_id in channel_info["ID_List"].to_list():
                channel = channel_info[channel_info["ID_List"] == factor_id][
                    "Level"
                ].item()
                return channel
            # if factor id is not in the channel list no channel info is needed
            else:
                return name

        # create partial
        get_surface_name_partial = partial(get_surface_name, factor=factor)

        # update stats name with the newly mapped stats names values
        stats_names["Name"] = stats_names.apply(func=get_surface_name_partial, axis=1)

        return stats_names

    def inspect(self, surface_id: int) -> Dict:
        """
        Used to inspect intermediate steps in the
        parser's process.

        Args:
            surface_id (int): _description_

        Returns:
            Dict: _description_
        """
        storage = {}
        surface_name = self.surface_names[surface_id]
        storage["surface_name"] = surface_name
        stat_names = self.stats_names.get(surface_id)
        storage["stat_names_raw"] = deepcopy(stat_names)
        stat_values = self.stats_values.get(surface_id)
        storage["stat_values_raw"] = stat_values
        object_id = self.object_ids.get(surface_id)
        factor = self.factors.get(surface_id)
        storage["factor"] = factor

        # update channel and surface names
        stat_names = self.update_channel_info(stats_names=stat_names, factor=factor)
        storage["stat_names_channel_added"] = deepcopy(stat_names)
        stat_names = self.update_surface_info(stats_names=stat_names, factor=factor)
        storage["stat_names_surface_added"] = deepcopy(stat_names)

        # # filter stats values by object ids (ie: ignore info related to trackids)
        stat_values = self.filter_stats(
            stats_values=stat_values,
            filter_col_names=["ID_Object"],
            filter_values=[object_id],
        )

        # organize stats values
        organized_stats = self.organize_stats(stat_values)
        storage["organized_stats"] = organized_stats

        # generate csv
        stats_df = self.generate_csv(organized_stats, stat_names=stat_names)
        storage["stats_df"] = stats_df

        # add track id information for each object
        stats_df = self.update_track_id_info(surface_name, stats_df)

        storage["final_df"] = stats_df

        return storage

    def update_track_id_info(self, surface_name, dataframe) -> pd.DataFrame:
        """Returns the track id an object belongs to

        Args:
            object_id (int): _description_

        Returns:
            int: _description_
        """
        object_info = self.ims.get_object_info(surface_name)
        track_info = self.ims.get_track_info(surface_name)
        database = self.create_track_id_database(
            object_data=object_info,
            track_data=track_info,
        )

        # create database to make obj to track matching efficient
        storage = {}
        for idx in range(len(track_info)):
            data = track_info.iloc[idx]
            start = data["IndexTrackObjectBegin"]
            end = data["IndexTrackObjectEnd"]
            track_id = data["ID"]
            for i in range(start, end):
                obj_id = object_info.iloc[i]["ID"]
                storage[obj_id] = track_id

        dataframe["Track_ID"] = dataframe.apply(
            func=lambda x: database[x["Object_ID"].item()],
            axis=1,
        )

        return dataframe

In [39]:
# data_path = "../../data/surface_parser_dev_data/P1 DHBR Roi2 6x6_TileScan_001_Merging_Crop_0_batch.ims"
# data_path = "../../data/surface_parser_dev_data/temp/Live P1 Sec2 2 pos x 2 pos 4h ev 2min_DHCR_001_S001_0_batch.ims"
# data_path = "../../data/temp/ILN with VLP Tiled 12x18 incomplete.ims"
# data_path = "../../data/surface_parser_dev_data/P1 DHBR Roi2 6x6_TileScan_001_Merging_Crop_0_batch.ims"
data_path = (
    "../../data/multi_surface_track_parser_dev_data/GFP #1 Sec1 Roi2 2x2 1h30min.ims"
)
os.path.isfile(data_path)

True

In [40]:
parser = SurfaceParser(data_path)
df = parser.inspect(surface_id=0)

creating database


In [16]:
list(df[0].keys())

['surface_name',
 'stat_names_raw',
 'stat_values_raw',
 'factor',
 'stat_names_channel_added',
 'stat_names_surface_added',
 'organized_stats',
 'stats_df']

In [17]:
df[0]["stats_df"]

Unnamed: 0,Acceleration,Acceleration X,Acceleration Y,Acceleration Z,Area,BoundingBoxAA Length X,BoundingBoxAA Length Y,BoundingBoxAA Length Z,BoundingBoxOO Length A,BoundingBoxOO Length B,...,Time Index,Time Since Track Start,Velocity Angle X,Velocity Angle Y,Velocity Angle Z,Velocity X,Velocity Y,Velocity Z,Volume,Object_ID
0,0.0,0.0,0.0,0.0,355.246979,9.097656,8.191406,22.753899,6.984375,9.031250,...,1.0,0.000000,101.478523,11.475876,90.002655,-0.001512,0.007449,0.000000,469.004700,0
2,0.0,0.0,0.0,0.0,1192.548340,20.929688,16.378906,45.507805,12.687500,19.205078,...,1.0,0.000000,80.397675,124.701721,36.385479,0.000952,-0.003248,0.004594,1495.459473,2
3,0.0,0.0,0.0,0.0,225.840988,8.187500,8.191406,22.753902,6.119141,7.179688,...,1.0,0.000000,97.669685,172.338272,90.002655,-0.000784,-0.005824,0.000000,231.663483,3
4,0.0,0.0,0.0,0.0,820.118103,13.652344,11.828125,45.507805,10.781250,11.408203,...,1.0,0.000000,28.869730,72.922729,112.535538,0.073142,0.024530,-0.032006,1502.495117,4
6,0.0,0.0,0.0,0.0,191.851913,6.371094,9.101562,22.753899,5.906250,8.093750,...,1.0,0.000000,41.890392,48.112255,90.002655,0.027162,0.024362,0.000000,151.075226,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218125,0.0,0.0,0.0,0.0,731.304199,12.742188,10.011719,45.507805,8.015625,11.960938,...,80.0,5544.541992,74.092293,111.013573,153.175903,0.018318,-0.023959,-0.059631,1279.479248,218125
218126,0.0,0.0,0.0,0.0,471.288635,8.191406,8.187500,34.130852,7.062500,7.593750,...,80.0,5544.541992,90.642616,105.883980,164.110428,-0.000112,-0.002737,-0.009618,594.885376,218126
218127,0.0,0.0,0.0,0.0,592.725220,10.917969,10.007812,34.130852,9.566406,10.851562,...,80.0,5544.541992,81.735985,76.363876,163.973068,0.002792,0.004579,-0.018666,1050.720581,218127
218128,0.0,0.0,0.0,0.0,543.955444,9.101562,9.101562,34.130852,7.609375,9.021484,...,80.0,5544.541992,154.623962,90.649757,115.374573,-0.004468,-0.000056,-0.002119,792.800781,218128


In [33]:
original_df = df[0]["stats_df"]
database = df[1]

In [34]:
original_df

Unnamed: 0,Acceleration,Acceleration X,Acceleration Y,Acceleration Z,Area,BoundingBoxAA Length X,BoundingBoxAA Length Y,BoundingBoxAA Length Z,BoundingBoxOO Length A,BoundingBoxOO Length B,...,Time Index,Time Since Track Start,Velocity Angle X,Velocity Angle Y,Velocity Angle Z,Velocity X,Velocity Y,Velocity Z,Volume,Object_ID
0,0.0,0.0,0.0,0.0,355.246979,9.097656,8.191406,22.753899,6.984375,9.031250,...,1.0,0.000000,101.478523,11.475876,90.002655,-0.001512,0.007449,0.000000,469.004700,0
2,0.0,0.0,0.0,0.0,1192.548340,20.929688,16.378906,45.507805,12.687500,19.205078,...,1.0,0.000000,80.397675,124.701721,36.385479,0.000952,-0.003248,0.004594,1495.459473,2
3,0.0,0.0,0.0,0.0,225.840988,8.187500,8.191406,22.753902,6.119141,7.179688,...,1.0,0.000000,97.669685,172.338272,90.002655,-0.000784,-0.005824,0.000000,231.663483,3
4,0.0,0.0,0.0,0.0,820.118103,13.652344,11.828125,45.507805,10.781250,11.408203,...,1.0,0.000000,28.869730,72.922729,112.535538,0.073142,0.024530,-0.032006,1502.495117,4
6,0.0,0.0,0.0,0.0,191.851913,6.371094,9.101562,22.753899,5.906250,8.093750,...,1.0,0.000000,41.890392,48.112255,90.002655,0.027162,0.024362,0.000000,151.075226,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218125,0.0,0.0,0.0,0.0,731.304199,12.742188,10.011719,45.507805,8.015625,11.960938,...,80.0,5544.541992,74.092293,111.013573,153.175903,0.018318,-0.023959,-0.059631,1279.479248,218125
218126,0.0,0.0,0.0,0.0,471.288635,8.191406,8.187500,34.130852,7.062500,7.593750,...,80.0,5544.541992,90.642616,105.883980,164.110428,-0.000112,-0.002737,-0.009618,594.885376,218126
218127,0.0,0.0,0.0,0.0,592.725220,10.917969,10.007812,34.130852,9.566406,10.851562,...,80.0,5544.541992,81.735985,76.363876,163.973068,0.002792,0.004579,-0.018666,1050.720581,218127
218128,0.0,0.0,0.0,0.0,543.955444,9.101562,9.101562,34.130852,7.609375,9.021484,...,80.0,5544.541992,154.623962,90.649757,115.374573,-0.004468,-0.000056,-0.002119,792.800781,218128


In [35]:
original_df["Track ID"] = original_df.apply(
    func=lambda x: database[x["Object_ID"].item()], axis=1
)

In [37]:
original_df

Unnamed: 0,Acceleration,Acceleration X,Acceleration Y,Acceleration Z,Area,BoundingBoxAA Length X,BoundingBoxAA Length Y,BoundingBoxAA Length Z,BoundingBoxOO Length A,BoundingBoxOO Length B,...,Time Since Track Start,Velocity Angle X,Velocity Angle Y,Velocity Angle Z,Velocity X,Velocity Y,Velocity Z,Volume,Object_ID,Track ID
0,0.0,0.0,0.0,0.0,355.246979,9.097656,8.191406,22.753899,6.984375,9.031250,...,0.000000,101.478523,11.475876,90.002655,-0.001512,0.007449,0.000000,469.004700,0,1000000000
2,0.0,0.0,0.0,0.0,1192.548340,20.929688,16.378906,45.507805,12.687500,19.205078,...,0.000000,80.397675,124.701721,36.385479,0.000952,-0.003248,0.004594,1495.459473,2,1000000000
3,0.0,0.0,0.0,0.0,225.840988,8.187500,8.191406,22.753902,6.119141,7.179688,...,0.000000,97.669685,172.338272,90.002655,-0.000784,-0.005824,0.000000,231.663483,3,1000000000
4,0.0,0.0,0.0,0.0,820.118103,13.652344,11.828125,45.507805,10.781250,11.408203,...,0.000000,28.869730,72.922729,112.535538,0.073142,0.024530,-0.032006,1502.495117,4,1000000000
6,0.0,0.0,0.0,0.0,191.851913,6.371094,9.101562,22.753899,5.906250,8.093750,...,0.000000,41.890392,48.112255,90.002655,0.027162,0.024362,0.000000,151.075226,6,1000000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218125,0.0,0.0,0.0,0.0,731.304199,12.742188,10.011719,45.507805,8.015625,11.960938,...,5544.541992,74.092293,111.013573,153.175903,0.018318,-0.023959,-0.059631,1279.479248,218125,1000215663
218126,0.0,0.0,0.0,0.0,471.288635,8.191406,8.187500,34.130852,7.062500,7.593750,...,5544.541992,90.642616,105.883980,164.110428,-0.000112,-0.002737,-0.009618,594.885376,218126,1000215663
218127,0.0,0.0,0.0,0.0,592.725220,10.917969,10.007812,34.130852,9.566406,10.851562,...,5544.541992,81.735985,76.363876,163.973068,0.002792,0.004579,-0.018666,1050.720581,218127,1000215677
218128,0.0,0.0,0.0,0.0,543.955444,9.101562,9.101562,34.130852,7.609375,9.021484,...,5544.541992,154.623962,90.649757,115.374573,-0.004468,-0.000056,-0.002119,792.800781,218128,1000215677


Get Track and Object ID Pairs

In [5]:
ims_data = parser.ims
print(parser.ims.contains_tracks("MegaSurfaces0"))

True


In [6]:
np.array(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"])

array(['BlockData', 'BlockInfo', 'BlockPath', 'Category',
       'CategoryFunction', 'CreationParameters', 'Factor',
       'FactorFunction', 'FactorList', 'FactorListFunction', 'LabelColor',
       'LabelColorData', 'LabelColorLabelGroupNames',
       'LabelColorLabelValues', 'LabelGroupNames', 'LabelSetLabelIDs',
       'LabelSetObjectIDs', 'LabelSets', 'LabelValues', 'LevelInfo',
       'MainTrackSegmentTable', 'MainTrackSegmentTable_Focus',
       'MainTrackTable', 'SplitOffset', 'StatisticsType',
       'StatisticsTypeFunction', 'StatisticsValue',
       'StatisticsValueFunction', 'StatisticsValueTimeOffset',
       'StatisticsValueTimeOffsetFunction', 'SurfaceModel',
       'SurfaceModelInfo', 'SurfaceTimeOffset', 'Time', 'TimeBegin',
       'Track0', 'TrackEdge0', 'TrackObject0', 'TrackSegment0',
       'TrackSegment0_Focus'], dtype='<U33')

In [7]:
pd.DataFrame(
    np.asarray(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"]["SurfaceModel"])
)

Unnamed: 0,ID_Time,ID,NumberOfLevels,SizeX,SizeY,SizeZ,ExtendMinX,ExtendMinY,ExtendMinZ,ExtendMaxX,ExtendMaxY,ExtendMaxZ,DataCountPerChildNode,Threshold,LevelInfoBegin,LevelInfoEnd,BlockInfoBegin,BlockInfoEnd
0,0,0,4,10,9,2,61265.500000,44933.863281,51.196281,61274.597656,44942.054688,73.950180,8,0.500061,0,4,0,4
1,0,2,5,23,18,4,61260.949219,44903.835938,-5.688475,61281.878906,44920.214844,39.819328,8,0.500061,4,9,4,9
2,0,3,4,9,9,2,61225.460938,44935.683594,-5.688475,61233.648438,44943.875000,17.065428,8,0.500061,9,13,9,13
3,0,4,4,15,13,4,61269.136719,44859.246094,5.688477,61282.789062,44871.074219,51.196281,8,0.500061,13,17,13,17
4,0,6,4,7,10,2,61300.078125,44794.636719,51.196281,61306.449219,44803.738281,73.950180,8,0.500061,17,21,17,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100564,79,218125,4,14,11,4,60533.871094,44118.519531,5.688477,60546.613281,44128.531250,51.196281,8,0.500061,431868,431872,431868,431872
100565,79,218126,4,9,9,3,60499.292969,44148.550781,28.442377,60507.484375,44156.738281,62.573227,8,0.500061,431872,431876,431872,431876
100566,79,218127,4,12,11,3,60540.242188,44103.960938,-5.688475,60551.160156,44113.968750,28.442377,8,0.500061,431876,431880,431876,431880
100567,79,218128,4,10,10,3,60532.960938,44089.398438,28.442377,60542.062500,44098.500000,62.573227,8,0.500061,431880,431884,431880,431884


In [8]:
pd.DataFrame(np.asarray(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"]["Track0"]))

Unnamed: 0,ID,IndexTrackObjectBegin,IndexTrackObjectEnd,IndexTrackEdgeBegin,IndexTrackEdgeEnd
0,1000000000,0,80,0,79
1,1000000002,80,160,79,158
2,1000000003,160,239,158,236
3,1000000004,239,256,236,252
4,1000000006,256,292,252,287
...,...,...,...,...,...
4027,1000215541,100555,100557,96528,96529
4028,1000215563,100557,100560,96529,96531
4029,1000215594,100560,100563,96531,96533
4030,1000215663,100563,100566,96533,96535


In [9]:
pd.DataFrame(
    np.asarray(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"]["SurfaceModel"])
).iloc[79]

ID_Time                      0.000000
ID                         106.000000
NumberOfLevels               4.000000
SizeX                        8.000000
SizeY                       11.000000
SizeZ                        2.000000
ExtendMinX               60913.335938
ExtendMinY               44942.054688
ExtendMinZ                  17.065428
ExtendMaxX               60920.613281
ExtendMaxY               44952.062500
ExtendMaxZ                  39.819328
DataCountPerChildNode        8.000000
Threshold                    0.500061
LevelInfoBegin             324.000000
LevelInfoEnd               328.000000
BlockInfoBegin             324.000000
BlockInfoEnd               328.000000
Name: 79, dtype: float64

In [10]:
id_info = pd.DataFrame(
    np.asarray(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"]["SurfaceModel"])
)

Problem: we have a bunch of object ids, we need to map it to the track it belongs to 
Need to be fast

In [None]:
map_data = pd.DataFrame(
    np.asarray(ims_data.data["Scene8"]["Content"]["MegaSurfaces0"]["Track0"])
)
map_data.head(3)

In [None]:
storage = {}
for idx in range(len(map_data)):
    data = map_data.iloc[idx]
    start = data["IndexTrackObjectBegin"]
    end = data["IndexTrackObjectEnd"]
    track_id = data["ID"]
    for i in range(start, end):
        obj_id = id_info.iloc[i]["ID"]
        storage[obj_id] = track_id

In [None]:
def create_track_id_database(
    track_data: pd.DataFrame, object_data: pd.DataFrame
) -> Dict:
    """Creates a dictionary that maps object id to track id

    Args:
        track_data (pd.DataFrame): _description_
        object_data (pd.DataFrame): _description_

    Returns:
        Dict: _description_
    """
    storage = {}
    for idx in range(len(track_data)):
        data = map_data.iloc[idx]
        start = data["IndexTrackObjectBegin"]
        end = data["IndexTrackObjectEnd"]
        track_id = data["ID"]
        for i in range(start, end):
            obj_id = object_data.iloc[i]["ID"]
            storage[obj_id] = track_id

    return storage

In [None]:
# create a func that takes in a object id and maps it to track id
def obj_to_track_id(object_id: int) -> int:
    """Returns the track id an object belongs to

    Args:
        object_id (int): _description_

    Returns:
        int: _description_
    """

Stats Names

In [None]:
(np.sort(np.array(df["stat_values_raw"]["ID_Object"].unique())) < 10000000).sum()

Factor

In [None]:
def get_object_ids(data, object_name: str) -> pd.Series:
    """_summary_

    Args:
        object_name (str): _description_

    Returns:
        pd.Series: _description_
    """
    try:
        object_ids = pd.DataFrame(
            np.asarray(data.get("Scene8").get("Content")[object_name]["SurfaceModel"])
        )
        return object_ids["ID"]
    except:
        raise ValueError("No Objects Present")

In [None]:
np.array(parser.ims.data["Scene8"]["Content"]["MegaSurfaces0"])

In [None]:
pd.DataFrame(
    np.array(parser.ims.data["Scene8"]["Content"]["MegaSurfaces0"]["SurfaceModel"])
)

In [None]:
pd.DataFrame(
    np.array(parser.ims.data["Scene8"]["Content"]["MegaSurfaces0"]["SurfaceModel"])
)["ID"].unique().shape