In [1]:
import utils
import numpy as np
import glob
import os
import pandas as pd
import ray

Get Available Categories

In [2]:
def generate_available_categories(config_path: str):
    # load config path
    yaml = utils.load_yaml(parser_type)

    # type of parser
    parser_type = yaml["parser_type"]

    # extract category func
    extract_categories = utils.get_category_function(parser_type)

    # files to scan
    directories = yaml["data_dir"]

    # valid surface
    valid_surface = int(yaml["valid_surface"]) - 1

    for directory in directories:
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # file path
            file_path = filename

            # get and save the available categories csv file
            extract_categories(file_path, valid_surface)

            break

        break

    print("[info] Please Edit The Statistics File")

In [3]:
# RUN GENERATE CATEGORIES
# generate_available_categories("config.yaml")

Generate Statistics 

In [4]:
def generate_statistics(config_path: str):
    # load config path
    yaml = utils.load_yaml(config_path)

    ray.init()

    # type of parser
    parser_type = yaml["parser_type"]

    if parser_type == "track":
        from track_parser import extract_and_save

    elif parser_type == "surface":
        from surface_parser import extract_and_save

    elif parser_type == "first":
        from first_surface_parser import extract_and_save

    else:
        raise ValueError("Invalid Parser Type")

    # files to scan
    directories = yaml["data_dir"]

    # get the stats categories
    stats_categories = utils.read_txt(yaml["stats_category_path"])

    # save dir
    save_dir = yaml["save_dir"][0]

    # valid surface
    valid_surface = int(yaml["valid_surface"])

    # folder name to save data to
    folder_name = yaml["valid_surface_dict"][str(valid_surface)]
    valid_surface -= valid_surface

    # create folder
    folder_path = os.path.join(save_dir, folder_name)
    if not os.path.isdir(folder_path):
        os.makedirs(folder_path)

    # create a list to hold ray subprocess
    processes = []

    for directory in directories:
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, "*.ims")))

        for filename in filenames:
            # file path
            file_path = filename  # os.path.join(directory, filename)

            # save_file_path
            save_path = utils.get_save_filepath(parser_type, folder_path, filename)

            # extract and save
            processes.append(
                extract_and_save.remote(
                    file_path, valid_surface, stats_categories, save_path
                )
            )
            print(save_path)

    print("info -- running subprocesses:")

    ray.get(processes)

In [5]:
# RUN GENERATE STATISTICS
generate_statistics(config_path="config.yaml")

2023-08-15 11:47:59,888	INFO worker.py:1636 -- Started a local Ray instance.


surface_track_parser/temp/Blue/ALN Sec2 Roi1 2x2 1h20min_first.csv
info -- running subprocesses:
[2m[36m(extract_and_save pid=114357)[0m [info] working on file ../../data/surface_track_parser_dev_data/ALN Sec2 Roi1 2x2 1h20min.ims


In [6]:
# Working

[2m[36m(extract_and_save pid=114357)[0m [info] finished! 
[2m[36m(extract_and_save pid=114357)[0m 


[2m[33m(raylet)[0m [2023-08-15 11:48:09,789 E 114277 114290] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2023-08-15_11-47-58_296637_114092 is over 95% full, available space: 51585593344; capacity: 1958874054656. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-08-15 11:48:19,801 E 114277 114290] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2023-08-15_11-47-58_296637_114092 is over 95% full, available space: 51585576960; capacity: 1958874054656. Object creation will fail if spilling is required.
