In [None]:
import utils
import numpy as np
import glob
import os
import pandas as pd
import ray

os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"  # disables read filelock


Get Available Categories

In [None]:
def generate_available_categories(config_path: str):
    # load config path
    yaml = utils.load_yaml(parser_type)

    # type of parser
    parser_type = yaml["parser_type"]

    # extract category func
    extract_categories = utils.get_category_function(parser_type)

    # files to scan
    directories = yaml["data_dir"]

    # valid surface
    valid_surface = int(yaml["valid_surface"]) - 1

    for directory in directories:
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # file path
            file_path = filename

            # get and save the available categories csv file
            extract_categories(file_path, valid_surface)

            break

        break

    print("[info] Please Edit The Statistics File")

In [None]:
# RUN GENERATE CATEGORIES
# generate_available_categories("config.yaml")

Generate Statistics 

In [None]:
def generate_statistics(config_path: str):
    # load config path
    yaml = utils.load_yaml(config_path)

    ray.init()

    # type of parser
    parser_type = yaml["parser_type"]

    if parser_type == "track":
        from track_parser import extract_data, process_and_save

    elif parser_type == "surface":
        from surface_parser import extract_data, process_and_save

    elif parser_type == "first":
        from first_surface_parser import extract_data, process_and_save

    else:
        raise ValueError("Invalid Parser Type")

    # files to scan
    directories = yaml["data_dir"]

    # get the stats categories
    stats_categories = utils.read_txt(yaml["stats_category_path"])

    # valid surface
    valid_surfaces = yaml["valid_surface"]

    # create a list to hold ray subprocess
    processes = []

    for idx, directory in enumerate(directories):
        # save dir
        save_dir = yaml["save_dir"][idx]

        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # load the imaris file
            data = utils.load_ims(filename)

            for surface in valid_surfaces:
                # create folder
                folder_path = os.path.join(save_dir, str(surface))
                if not os.path.isdir(folder_path):
                    os.makedirs(folder_path)

                current_surface = int(surface) - 1

                # save_file_path
                save_path = utils.get_save_filepath(
                    parser_type, folder_path, filename, (current_surface + 1)
                )

                # process data
                data_dict = extract_data(filename, data, current_surface, save_path)

                if data_dict:
                    processes.append(
                        process_and_save.remote(
                            ims_file_path=filename,
                            surface_stats_values=data_dict["surface_stats_values"],
                            surface_stats_names=data_dict["surface_stats_names"],
                            categories_list=stats_categories,
                            save_path=save_path,
                        )
                    )
                else:
                    # there is no surface deleting folde
                    pass

    print("\ninfo -- running subprocesses:")
    print(f"info -- num processes launched {len(processes)}\n")

    ray.get(processes)

In [None]:
# RUN GENERATE STATISTICS
generate_statistics(config_path="config.yaml")

In [None]:
# Working