In [1]:
import utils
import numpy as np
import glob
import os
import pandas as pd
from cell_parser import extract_and_save
import ray
import time

Get Available Categories

In [2]:
def available_categories(ims_file_path: str, valid_surface: int):
    # load the imaris file
    data = utils.load_ims(ims_file_path)

    # get surface we want to parse
    surface_name = utils.get_object_names(full_data_file=data, search_for="Cell")[
        valid_surface
    ]

    # get all the statistics names
    surface_stats_names = utils.get_statistics_names(
        full_data_file=data, object_name=surface_name
    )

    # invert dictionary + name modifications
    # this step is a cosmetic step
    inverted_stats_names = utils.invert_stats_dict(surface_stats_names)
    inverted_stats_names = utils.flatten(inverted_stats_names)

    # reverse the stats names again such that key=num, value=name
    # final_stats_names = {v: k for k,v in inverted_stats_names.items()}

    np.savetxt("stats_categories.txt", list(inverted_stats_names.keys()), fmt="%s")

    # print(f'Saved Stats Categories in directory: {os.path.dirname(os.path.abspath(ims_file_path))}')

    return list(inverted_stats_names.keys())

In [3]:
def generate_available_categories(config_path: str):
    # load config path
    yaml = utils.load_yaml("config.yaml")

    # files to scan
    directories = yaml["data_dir"]

    # valid surface
    valid_surface = int(yaml["valid_surface"]) - 1

    for directory in directories:
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # file path
            file_path = filename

            # print(file_path, filename, 'dafd')

            # get and save the available categories csv file
            available_categories(file_path, valid_surface)

            break

        break

    print("[info] Please Edit The Statistics File")

In [4]:
# RUN GENERATE CATEGORIES
generate_available_categories("config.yaml")

[info] Please Edit The Statistics File


Generate Statistics 

In [5]:
def generate_statistics(config_path: str):
    # load config path
    yaml = utils.load_yaml("config.yaml")

    # files to scan
    directories = yaml["data_dir"]

    # get the stats categories
    stats_categories = utils.read_txt(yaml["stats_category_path"])

    # valid surface
    valid_surface = int(yaml["valid_surface"]) - 1

    # create a list to hold ray subprocess
    processes = []

    for directory in directories:
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # file path
            file_path = filename  # os.path.join(directory, filename)

            # save_file_path
            save_path = (
                os.path.splitext(filename)[0]
                + "_cell_"
                + str(yaml["valid_surface"])
                + ".csv"
            )
            # save_path = os.path.join(directory, save_path)

            # extract and save
            processes.append(
                extract_and_save.remote(
                    file_path, valid_surface, stats_categories, save_path
                )
            )

    print("info -- running subprocesses:")

    ray.get(processes)

In [6]:
# RUN GENERATE STATISTICS
start = time.perf_counter()
generate_statistics(config_path="config.yaml")
stop = time.perf_counter()

print("total run time: ", stop - start)

info -- running subprocesses:
[2m[36m(extract_and_save pid=2405432)[0m [info] working on file ../data/cell.ims
[2m[36m(extract_and_save pid=2405462)[0m [info] working on file ../data/new_cell.ims
[2m[36m(extract_and_save pid=2405462)[0m [info] finished! 
[2m[36m(extract_and_save pid=2405462)[0m 
[2m[36m(extract_and_save pid=2405432)[0m [info] finished! 
[2m[36m(extract_and_save pid=2405432)[0m 
total run time:  16.47063789423555


In [7]:
# Working