In [40]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

In [41]:
def extract_exp_data_from_name(name: str):
    name = name.rsplit(".", 1)[0]  # removes trailing .csv (or any extension)
    return {k: v for k, v in (p.split("=", 1) for p in name.split("+"))}


In [42]:
def eval_ctx(path: str, seq_len: int, avg_over: int):
    arr = pd.read_csv(path, header=None, skiprows=1).to_numpy(dtype=float)
    print(f"shape: {arr.shape}")

    # use first seq_len columns (in case csv has extra)
    x = arr[:, :seq_len]

    first = float(np.mean(x[:, :avg_over]))
    last  = float(np.mean(x[:, seq_len-avg_over:seq_len]))
    total = float(np.mean(x))
    diff  = float(first - last)

    return first, last, total, diff


In [43]:
grid_folder = Path("/home/janek/Downloads/lr_grid/csvs")

seq_len = 2048
avg_over = 150

In [44]:
out_path = "parsed_names.csv"
files = sorted(p for p in grid_folder.glob("*.csv") if p != out_path)

rows = []
for p in tqdm(
    files,
    desc="Parsing/evaluating",
    unit="file",
    dynamic_ncols=True,
    smoothing=0.05,
):
    d = extract_exp_data_from_name(p.name)
    first, last, total, diff = eval_ctx(str(p), seq_len=seq_len, avg_over=avg_over)
    d.update({"first": first, "last": last, "total": total, "diff": diff,
              "file": p.name, "path": str(p)})
    rows.append(d)

df = pd.DataFrame(rows)
df.to_csv(out_path, index=False)


Parsing/evaluating:   0%|          | 0/20 [00:00<?, ?file/s]

shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
shape: (8192, 2047)
