In [1]:
import glob
import re
import numpy as np
import polars as pl
from collections import defaultdict
import os

filename = "random"
inPath = "../blossom_cpp/outputs/"
outputPath = "./parquet_files/"

dtype_agent = np.dtype(
    [
        ("tick", "i4"),
        ("id", "i4"),
        ("type", "i4"),
        ("x", "i4"),
        ("y", "i4"),
        ("age", "i4"),
        ("biomass", "f4"),
    ]
)

dtype_som = np.dtype(
    [
        ("tick", "i4"),
        ("x", "i4"),
        ("y", "i4"),
        ("som_value", "f4"),
    ]
)

for sub in ["agent/", "som/"]:
    files = glob.glob(inPath + sub + filename + "_*.bin")

    grouped_files = defaultdict(list)

    for file in files:
        match = re.match(filename + r"_(\d+)_(\d+)\.bin", os.path.basename(file))
        if match:
            setup_id, rotation_id = match.groups()
            grouped_files[int(setup_id)].append((int(rotation_id), file))

    for setup_id, file_list in grouped_files.items():
        print(f"Processing setup {setup_id} with {len(file_list)} rotated files...")

        file_list.sort()

        dfs = []
        for _, path in file_list:
            data = np.fromfile(
                path, dtype=dtype_agent if sub == "agent/" else dtype_som
            )
            df = pl.DataFrame(data)
            dfs.append(df)

        full_df = pl.concat(dfs)

        output_filename = (
            f"{filename}_{setup_id}{'_SOM' if sub == 'som/' else ''}.parquet"
        )
        full_df.write_parquet(outputPath + output_filename, compression="zstd")

Processing setup 1 with 1 rotated files...
Processing setup 3 with 1 rotated files...
Processing setup 4 with 1 rotated files...
Processing setup 2 with 1 rotated files...
Processing setup 0 with 1 rotated files...
Processing setup 1 with 1 rotated files...
Processing setup 3 with 1 rotated files...
Processing setup 4 with 1 rotated files...
Processing setup 2 with 1 rotated files...
Processing setup 0 with 1 rotated files...
