In [1]:
import glob
import re
import numpy as np
import polars as pl
from collections import defaultdict
import os

filename = "random"
inPath = "../blossom_cpp/outputs/"
outputPath = "./parquet_files/"

dtype_agent = np.dtype([
    ('id', 'u4'), #unused
    ('biomass', 'f4'), # unused
    ('tick', 'u2'), 
    ('x', 'u2'),
    ('y', 'u2'),
    ('type', 'u1'),
    ('age', 'u1') #unused
])

dtype_agent_small = np.dtype([
    ('tick', 'u2'), 
    ('x', 'u2'),
    ('y', 'u2'),
    ('type', 'u1'),
])

dtype_som = np.dtype([
    ("som_value", "f4"),
    ("tick", "u2"),
    ("x", "u2"),
    ("y", "u2"),
])

# Code for local runs

In [3]:
for sub in ["agent/", "som/"]:
    files = glob.glob(inPath + sub + filename + "_*.bin")

    grouped_files = defaultdict(list)

    for file in files:
        match = re.match(filename + r"_(\d+)_(\d+)\.bin", os.path.basename(file))
        if match:
            setup_id, rotation_id = match.groups()
            grouped_files[int(setup_id)].append((int(rotation_id), file))

    for setup_id, file_list in grouped_files.items():
        print(f"Processing setup {setup_id} with {len(file_list)} rotated files...")

        file_list.sort()

        dfs = []
        for _, path in file_list:
            data = np.fromfile(
                path, dtype=dtype_agent_small if sub == "agent/" else dtype_som
            )
            df = pl.DataFrame(data)
            dfs.append(df)

        full_df = pl.concat(dfs)

        output_filename = (
            f"{filename}_{setup_id}{'_SOM' if sub == 'som/' else ''}.parquet"
        )
        full_df.write_parquet(outputPath + output_filename, compression="zstd")

Processing setup 0 with 1 rotated files...
Processing setup 1 with 2 rotated files...
Processing setup 0 with 2 rotated files...


# Code for HPC runs

In [5]:
inPath = "../../../outputs/"
for sub in ["agent/", "som/"]:
    # Updated glob pattern to match files like 0_random_0_0.bin
    files = glob.glob(inPath + sub + "*_" + filename + "_*_*.bin")
    print(files)

    grouped_files = defaultdict(list)

    for file in files:
        base = os.path.basename(file)
        match = re.match(r"(\d+)_" + filename + r"_(\d+)_(\d+)\.bin", base)
        if match:
            trial_id, _, rotation_id = match.groups()
            grouped_files[int(trial_id)].append((int(rotation_id), file))

    for trial_id, file_list in grouped_files.items():
        print(f"Processing setup {trial_id} with {len(file_list)} rotated files...")

        file_list.sort()

        dfs = []
        for _, path in file_list:
            data = np.fromfile(
                path, dtype=dtype_agent if sub == "agent/" else dtype_som
            )
            df = pl.DataFrame(data)
            dfs.append(df)

        full_df = pl.concat(dfs)

        output_filename = (
            f"{filename}_{trial_id}{'_SOM' if sub == 'som/' else ''}.parquet"
        )
        full_df.write_parquet(outputPath + output_filename, compression="zstd")

['../../../outputs/agent/7_random_0_0.bin', '../../../outputs/agent/22_random_0_0.bin', '../../../outputs/agent/37_random_0_0.bin', '../../../outputs/agent/46_random_0_0.bin', '../../../outputs/agent/30_random_0_0.bin', '../../../outputs/agent/25_random_0_0.bin', '../../../outputs/agent/0_random_0_0.bin', '../../../outputs/agent/41_random_0_0.bin', '../../../outputs/agent/8_random_0_0.bin', '../../../outputs/agent/38_random_0_0.bin', '../../../outputs/agent/39_random_0_0.bin', '../../../outputs/agent/9_random_0_0.bin', '../../../outputs/agent/1_random_0_0.bin', '../../../outputs/agent/24_random_0_0.bin', '../../../outputs/agent/31_random_0_0.bin', '../../../outputs/agent/40_random_0_0.bin', '../../../outputs/agent/36_random_0_0.bin', '../../../outputs/agent/23_random_0_0.bin', '../../../outputs/agent/6_random_0_0.bin', '../../../outputs/agent/29_random_0_0.bin', '../../../outputs/agent/45_random_0_0.bin', '../../../outputs/agent/4_random_0_0.bin', '../../../outputs/agent/34_random_0_0.