In [1]:
import argparse
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer

In [2]:
ROOT = Path("data/inference_outputs")

In [3]:
arc_challenge = pd.read_csv("data/inference_outputs/arc_challenge/inference_data_batch_1_20250502_233812.csv")

In [4]:
arc_challenge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 15 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   doc_id                     100 non-null    int64  
 1   input_text                 100 non-null    object 
 2   benchmark_name             100 non-null    object 
 3   label_small                100 non-null    float64
 4   acc_norm_small             100 non-null    float64
 5   energy_consumption_small   100 non-null    float64
 6   inference_time_small       100 non-null    float64
 7   label_medium               100 non-null    float64
 8   acc_norm_medium            100 non-null    float64
 9   energy_consumption_medium  100 non-null    float64
 10  inference_time_medium      100 non-null    float64
 11  label_large                100 non-null    float64
 12  acc_norm_large             100 non-null    float64
 13  energy_consumption_large   100 non-null    float64


In [5]:
TOKENIZER_NAME = "meta-llama/Llama-3.1-8B-Instruct"              
tok = AutoTokenizer.from_pretrained(TOKENIZER_NAME, use_fast=True)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [6]:
def prompt_tokens_llama3(prompt: str) -> int:
    ids = tok.apply_chat_template(
        [{"role": "user", "content": prompt}],
        tokenize=True,
        add_generation_prompt=False
    )
    return len(ids)

In [7]:
for fp in ROOT.rglob("*.csv"):
    df = pd.read_csv(fp)

    if {"tokens_small", "tokens_medium", "tokens_large"}.issubset(df.columns):
        print(f"{fp.name}: already done")
        continue

    tqdm.pandas(desc=f"{fp.name}: prompt-tokenise")
    p_tok = df["input_text"].progress_apply(prompt_tokens_llama3)

    df["tokens_small"]  = p_tok
    df["tokens_medium"] = p_tok
    df["tokens_large"]  = p_tok

    df.to_csv(fp, index=False)
    print(f"{fp.name}: token columns written ✓")

inference_data_batch_10_20250503_013912.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1261.03it/s]


inference_data_batch_10_20250503_013912.csv: token columns written ✓


inference_data_batch_11_20250503_015234.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3276.52it/s]


inference_data_batch_11_20250503_015234.csv: token columns written ✓


inference_data_batch_1_20250502_233812.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3420.57it/s]


inference_data_batch_1_20250502_233812.csv: token columns written ✓


inference_data_batch_2_20250502_235133.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3582.24it/s]


inference_data_batch_2_20250502_235133.csv: token columns written ✓


inference_data_batch_3_20250503_000455.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3556.88it/s]


inference_data_batch_3_20250503_000455.csv: token columns written ✓


inference_data_batch_4_20250503_001822.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3777.87it/s]


inference_data_batch_4_20250503_001822.csv: token columns written ✓


inference_data_batch_5_20250503_003149.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3308.15it/s]


inference_data_batch_5_20250503_003149.csv: token columns written ✓


inference_data_batch_6_20250503_004516.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2254.25it/s]


inference_data_batch_6_20250503_004516.csv: token columns written ✓


inference_data_batch_7_20250503_005849.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3250.59it/s]


inference_data_batch_7_20250503_005849.csv: token columns written ✓


inference_data_batch_8_20250503_011213.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2727.98it/s]


inference_data_batch_8_20250503_011213.csv: token columns written ✓


inference_data_batch_9_20250503_012544.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2246.88it/s]


inference_data_batch_9_20250503_012544.csv: token columns written ✓


inference_data_final_20250503_020214.csv: prompt-tokenise: 100%|██████████████████████| 72/72 [00:00<00:00, 1031.00it/s]


inference_data_final_20250503_020214.csv: token columns written ✓


inference_data_batch_10_20250503_013814.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4149.74it/s]


inference_data_batch_10_20250503_013814.csv: token columns written ✓


inference_data_batch_10_20250503_120206.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3979.53it/s]


inference_data_batch_10_20250503_120206.csv: token columns written ✓


inference_data_batch_11_20250503_015132.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3496.94it/s]


inference_data_batch_11_20250503_015132.csv: token columns written ✓


inference_data_batch_11_20250503_121519.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2792.18it/s]


inference_data_batch_11_20250503_121519.csv: token columns written ✓


inference_data_batch_12_20250503_020449.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2923.86it/s]


inference_data_batch_12_20250503_020449.csv: token columns written ✓


inference_data_batch_12_20250503_122828.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3499.13it/s]


inference_data_batch_12_20250503_122828.csv: token columns written ✓


inference_data_batch_13_20250503_021804.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3232.90it/s]


inference_data_batch_13_20250503_021804.csv: token columns written ✓


inference_data_batch_13_20250503_124144.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3099.36it/s]


inference_data_batch_13_20250503_124144.csv: token columns written ✓


inference_data_batch_14_20250503_023121.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2852.34it/s]


inference_data_batch_14_20250503_023121.csv: token columns written ✓


inference_data_batch_14_20250503_125452.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3469.29it/s]


inference_data_batch_14_20250503_125452.csv: token columns written ✓


inference_data_batch_15_20250503_024439.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3967.75it/s]


inference_data_batch_15_20250503_024439.csv: token columns written ✓


inference_data_batch_15_20250503_130809.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3575.65it/s]


inference_data_batch_15_20250503_130809.csv: token columns written ✓


inference_data_batch_16_20250503_025759.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3259.51it/s]


inference_data_batch_16_20250503_025759.csv: token columns written ✓


inference_data_batch_16_20250503_132118.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3752.55it/s]


inference_data_batch_16_20250503_132118.csv: token columns written ✓


inference_data_batch_17_20250503_031115.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3019.53it/s]

inference_data_batch_17_20250503_031115.csv: token columns written ✓



inference_data_batch_17_20250503_133423.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3957.56it/s]


inference_data_batch_17_20250503_133423.csv: token columns written ✓


inference_data_batch_18_20250503_032432.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3559.59it/s]


inference_data_batch_18_20250503_032432.csv: token columns written ✓


inference_data_batch_18_20250503_134730.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3733.65it/s]


inference_data_batch_18_20250503_134730.csv: token columns written ✓


inference_data_batch_19_20250503_033747.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3675.09it/s]


inference_data_batch_19_20250503_033747.csv: token columns written ✓


inference_data_batch_19_20250503_140036.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3636.60it/s]


inference_data_batch_19_20250503_140036.csv: token columns written ✓


inference_data_batch_1_20250502_233834.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3178.76it/s]


inference_data_batch_1_20250502_233834.csv: token columns written ✓


inference_data_batch_1_20250503_100308.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2368.39it/s]


inference_data_batch_1_20250503_100308.csv: token columns written ✓


inference_data_batch_20_20250503_035102.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3203.25it/s]


inference_data_batch_20_20250503_035102.csv: token columns written ✓


inference_data_batch_20_20250503_141343.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3629.20it/s]


inference_data_batch_20_20250503_141343.csv: token columns written ✓


inference_data_batch_21_20250503_142652.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3849.61it/s]


inference_data_batch_21_20250503_142652.csv: token columns written ✓


inference_data_batch_22_20250503_144002.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4150.31it/s]


inference_data_batch_22_20250503_144002.csv: token columns written ✓


inference_data_batch_23_20250503_145313.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4163.08it/s]


inference_data_batch_23_20250503_145313.csv: token columns written ✓


inference_data_batch_2_20250502_235151.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4171.61it/s]


inference_data_batch_2_20250502_235151.csv: token columns written ✓


inference_data_batch_2_20250503_101629.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3611.76it/s]


inference_data_batch_2_20250503_101629.csv: token columns written ✓


inference_data_batch_3_20250503_000512.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3983.01it/s]


inference_data_batch_3_20250503_000512.csv: token columns written ✓


inference_data_batch_3_20250503_102950.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3911.14it/s]


inference_data_batch_3_20250503_102950.csv: token columns written ✓


inference_data_batch_4_20250503_001829.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4424.14it/s]


inference_data_batch_4_20250503_001829.csv: token columns written ✓


inference_data_batch_4_20250503_104300.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4665.00it/s]


inference_data_batch_4_20250503_104300.csv: token columns written ✓


inference_data_batch_5_20250503_003148.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4532.42it/s]


inference_data_batch_5_20250503_003148.csv: token columns written ✓


inference_data_batch_5_20250503_105614.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3246.97it/s]


inference_data_batch_5_20250503_105614.csv: token columns written ✓


inference_data_batch_6_20250503_004507.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3393.94it/s]


inference_data_batch_6_20250503_004507.csv: token columns written ✓


inference_data_batch_6_20250503_110927.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3517.68it/s]


inference_data_batch_6_20250503_110927.csv: token columns written ✓


inference_data_batch_7_20250503_005825.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4478.75it/s]


inference_data_batch_7_20250503_005825.csv: token columns written ✓


inference_data_batch_7_20250503_112243.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2764.76it/s]


inference_data_batch_7_20250503_112243.csv: token columns written ✓


inference_data_batch_8_20250503_011140.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3893.28it/s]


inference_data_batch_8_20250503_011140.csv: token columns written ✓


inference_data_batch_8_20250503_113553.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4856.09it/s]


inference_data_batch_8_20250503_113553.csv: token columns written ✓


inference_data_batch_9_20250503_012456.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3353.54it/s]


inference_data_batch_9_20250503_012456.csv: token columns written ✓


inference_data_batch_9_20250503_114900.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3284.22it/s]


inference_data_batch_9_20250503_114900.csv: token columns written ✓


inference_data_final_20250503_150308.csv: prompt-tokenise: 100%|██████████████████████| 76/76 [00:00<00:00, 3607.19it/s]


inference_data_final_20250503_150308.csv: token columns written ✓


inference_data_batch_10_20250502_235022.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3942.90it/s]


inference_data_batch_10_20250502_235022.csv: token columns written ✓


inference_data_batch_11_20250502_235447.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3067.16it/s]


inference_data_batch_11_20250502_235447.csv: token columns written ✓


inference_data_batch_12_20250502_235910.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3930.08it/s]


inference_data_batch_12_20250502_235910.csv: token columns written ✓


inference_data_batch_13_20250503_000334.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5210.90it/s]

inference_data_batch_13_20250503_000334.csv: token columns written ✓



inference_data_batch_14_20250503_000758.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4728.48it/s]


inference_data_batch_14_20250503_000758.csv: token columns written ✓


inference_data_batch_15_20250503_001223.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5308.51it/s]


inference_data_batch_15_20250503_001223.csv: token columns written ✓


inference_data_batch_16_20250503_001647.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2486.12it/s]


inference_data_batch_16_20250503_001647.csv: token columns written ✓


inference_data_batch_17_20250503_002111.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2700.78it/s]


inference_data_batch_17_20250503_002111.csv: token columns written ✓


inference_data_batch_18_20250503_002535.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4218.48it/s]

inference_data_batch_18_20250503_002535.csv: token columns written ✓



inference_data_batch_19_20250503_003000.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2956.92it/s]


inference_data_batch_19_20250503_003000.csv: token columns written ✓


inference_data_batch_1_20250502_231033.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4454.21it/s]


inference_data_batch_1_20250502_231033.csv: token columns written ✓


inference_data_batch_20_20250503_003424.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4142.69it/s]


inference_data_batch_20_20250503_003424.csv: token columns written ✓


inference_data_batch_21_20250503_003848.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4189.07it/s]


inference_data_batch_21_20250503_003848.csv: token columns written ✓


inference_data_batch_22_20250503_004312.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5196.18it/s]


inference_data_batch_22_20250503_004312.csv: token columns written ✓


inference_data_batch_23_20250503_004738.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5258.59it/s]


inference_data_batch_23_20250503_004738.csv: token columns written ✓


inference_data_batch_24_20250503_005202.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3976.40it/s]


inference_data_batch_24_20250503_005202.csv: token columns written ✓


inference_data_batch_25_20250503_005626.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4257.31it/s]


inference_data_batch_25_20250503_005626.csv: token columns written ✓


inference_data_batch_26_20250503_010050.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3973.50it/s]

inference_data_batch_26_20250503_010050.csv: token columns written ✓



inference_data_batch_27_20250503_010514.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4908.78it/s]


inference_data_batch_27_20250503_010514.csv: token columns written ✓


inference_data_batch_28_20250503_010938.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5321.98it/s]


inference_data_batch_28_20250503_010938.csv: token columns written ✓


inference_data_batch_29_20250503_011402.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3455.91it/s]


inference_data_batch_29_20250503_011402.csv: token columns written ✓


inference_data_batch_2_20250502_231502.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4138.76it/s]


inference_data_batch_2_20250502_231502.csv: token columns written ✓


inference_data_batch_30_20250503_011827.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5292.90it/s]


inference_data_batch_30_20250503_011827.csv: token columns written ✓


inference_data_batch_31_20250503_012250.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5504.19it/s]


inference_data_batch_31_20250503_012250.csv: token columns written ✓


inference_data_batch_32_20250503_012716.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5406.77it/s]


inference_data_batch_32_20250503_012716.csv: token columns written ✓


inference_data_batch_3_20250502_231931.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3473.95it/s]


inference_data_batch_3_20250502_231931.csv: token columns written ✓


inference_data_batch_4_20250502_232359.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4103.86it/s]


inference_data_batch_4_20250502_232359.csv: token columns written ✓


inference_data_batch_5_20250502_232823.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2570.95it/s]


inference_data_batch_5_20250502_232823.csv: token columns written ✓


inference_data_batch_6_20250502_233247.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4063.34it/s]


inference_data_batch_6_20250502_233247.csv: token columns written ✓


inference_data_batch_7_20250502_233710.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4808.88it/s]


inference_data_batch_7_20250502_233710.csv: token columns written ✓


inference_data_batch_8_20250502_234134.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5140.58it/s]


inference_data_batch_8_20250502_234134.csv: token columns written ✓


inference_data_batch_9_20250502_234558.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5858.62it/s]


inference_data_batch_9_20250502_234558.csv: token columns written ✓


inference_data_final_20250503_013020.csv: prompt-tokenise: 100%|██████████████████████| 70/70 [00:00<00:00, 4962.50it/s]


inference_data_final_20250503_013020.csv: token columns written ✓


inference_data_batch_10_20250503_144604.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2921.48it/s]


inference_data_batch_10_20250503_144604.csv: token columns written ✓


inference_data_batch_11_20250503_144928.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2595.36it/s]


inference_data_batch_11_20250503_144928.csv: token columns written ✓


inference_data_batch_12_20250503_145253.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2863.73it/s]


inference_data_batch_12_20250503_145253.csv: token columns written ✓


inference_data_batch_13_20250503_145617.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2711.88it/s]


inference_data_batch_13_20250503_145617.csv: token columns written ✓


inference_data_batch_14_20250503_145941.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2480.30it/s]


inference_data_batch_14_20250503_145941.csv: token columns written ✓


inference_data_batch_15_20250503_150305.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2387.28it/s]


inference_data_batch_15_20250503_150305.csv: token columns written ✓


inference_data_batch_16_20250503_150629.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1571.32it/s]


inference_data_batch_16_20250503_150629.csv: token columns written ✓


inference_data_batch_17_20250503_150953.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2565.04it/s]


inference_data_batch_17_20250503_150953.csv: token columns written ✓


inference_data_batch_18_20250503_151316.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2365.23it/s]


inference_data_batch_18_20250503_151316.csv: token columns written ✓


inference_data_batch_19_20250503_151640.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2526.55it/s]


inference_data_batch_19_20250503_151640.csv: token columns written ✓


inference_data_batch_1_20250503_141535.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2889.55it/s]


inference_data_batch_1_20250503_141535.csv: token columns written ✓


inference_data_batch_20_20250503_152002.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2466.54it/s]


inference_data_batch_20_20250503_152002.csv: token columns written ✓


inference_data_batch_21_20250503_152326.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1285.49it/s]


inference_data_batch_21_20250503_152326.csv: token columns written ✓


inference_data_batch_22_20250503_152649.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1847.14it/s]


inference_data_batch_22_20250503_152649.csv: token columns written ✓


inference_data_batch_23_20250503_153012.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1602.20it/s]


inference_data_batch_23_20250503_153012.csv: token columns written ✓


inference_data_batch_24_20250503_153336.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2716.01it/s]


inference_data_batch_24_20250503_153336.csv: token columns written ✓


inference_data_batch_25_20250503_153700.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2202.12it/s]


inference_data_batch_25_20250503_153700.csv: token columns written ✓


inference_data_batch_26_20250503_154024.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2146.13it/s]


inference_data_batch_26_20250503_154024.csv: token columns written ✓


inference_data_batch_27_20250503_154348.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2660.87it/s]


inference_data_batch_27_20250503_154348.csv: token columns written ✓


inference_data_batch_28_20250503_154712.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2289.19it/s]


inference_data_batch_28_20250503_154712.csv: token columns written ✓


inference_data_batch_29_20250503_155037.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2997.41it/s]


inference_data_batch_29_20250503_155037.csv: token columns written ✓


inference_data_batch_2_20250503_141858.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2083.13it/s]


inference_data_batch_2_20250503_141858.csv: token columns written ✓


inference_data_batch_30_20250503_155401.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2576.94it/s]


inference_data_batch_30_20250503_155401.csv: token columns written ✓


inference_data_batch_31_20250503_155725.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2849.24it/s]


inference_data_batch_31_20250503_155725.csv: token columns written ✓


inference_data_batch_32_20250503_160048.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2535.21it/s]


inference_data_batch_32_20250503_160048.csv: token columns written ✓


inference_data_batch_33_20250503_160412.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2287.21it/s]


inference_data_batch_33_20250503_160412.csv: token columns written ✓


inference_data_batch_34_20250503_160736.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1988.44it/s]


inference_data_batch_34_20250503_160736.csv: token columns written ✓


inference_data_batch_35_20250503_161059.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2753.54it/s]


inference_data_batch_35_20250503_161059.csv: token columns written ✓


inference_data_batch_36_20250503_161422.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2010.57it/s]


inference_data_batch_36_20250503_161422.csv: token columns written ✓


inference_data_batch_37_20250503_161745.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2637.36it/s]


inference_data_batch_37_20250503_161745.csv: token columns written ✓


inference_data_batch_38_20250503_162108.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2786.32it/s]


inference_data_batch_38_20250503_162108.csv: token columns written ✓


inference_data_batch_39_20250503_162432.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2358.91it/s]


inference_data_batch_39_20250503_162432.csv: token columns written ✓


inference_data_batch_3_20250503_142221.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2361.26it/s]


inference_data_batch_3_20250503_142221.csv: token columns written ✓


inference_data_batch_40_20250503_162755.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3125.60it/s]


inference_data_batch_40_20250503_162755.csv: token columns written ✓


inference_data_batch_41_20250503_163118.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2981.32it/s]


inference_data_batch_41_20250503_163118.csv: token columns written ✓


inference_data_batch_42_20250503_163441.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2801.19it/s]


inference_data_batch_42_20250503_163441.csv: token columns written ✓


inference_data_batch_43_20250503_163804.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2892.70it/s]


inference_data_batch_43_20250503_163804.csv: token columns written ✓


inference_data_batch_44_20250503_164128.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3018.33it/s]


inference_data_batch_44_20250503_164128.csv: token columns written ✓


inference_data_batch_45_20250503_164451.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2778.95it/s]


inference_data_batch_45_20250503_164451.csv: token columns written ✓


inference_data_batch_46_20250503_164814.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2523.66it/s]


inference_data_batch_46_20250503_164814.csv: token columns written ✓


inference_data_batch_47_20250503_165138.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2102.18it/s]


inference_data_batch_47_20250503_165138.csv: token columns written ✓


inference_data_batch_48_20250503_165501.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2693.27it/s]


inference_data_batch_48_20250503_165501.csv: token columns written ✓


inference_data_batch_49_20250503_165824.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 1993.55it/s]

inference_data_batch_49_20250503_165824.csv: token columns written ✓



inference_data_batch_4_20250503_142544.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2499.44it/s]

inference_data_batch_4_20250503_142544.csv: token columns written ✓



inference_data_batch_50_20250503_170147.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2148.41it/s]


inference_data_batch_50_20250503_170147.csv: token columns written ✓


inference_data_batch_51_20250503_170511.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2624.92it/s]


inference_data_batch_51_20250503_170511.csv: token columns written ✓


inference_data_batch_5_20250503_142907.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3022.77it/s]


inference_data_batch_5_20250503_142907.csv: token columns written ✓


inference_data_batch_6_20250503_143230.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2910.97it/s]


inference_data_batch_6_20250503_143230.csv: token columns written ✓


inference_data_batch_7_20250503_143554.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2070.53it/s]


inference_data_batch_7_20250503_143554.csv: token columns written ✓


inference_data_batch_8_20250503_143917.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2912.45it/s]


inference_data_batch_8_20250503_143917.csv: token columns written ✓


inference_data_batch_9_20250503_144240.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3091.87it/s]


inference_data_batch_9_20250503_144240.csv: token columns written ✓


inference_data_final_20250503_170659.csv: prompt-tokenise: 100%|██████████████████████| 53/53 [00:00<00:00, 2212.82it/s]


inference_data_final_20250503_170659.csv: token columns written ✓


inference_data_batch_1_20250502_233732.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4141.66it/s]


inference_data_batch_1_20250502_233732.csv: token columns written ✓


inference_data_batch_1_20250502_233744.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4239.63it/s]


inference_data_batch_1_20250502_233744.csv: token columns written ✓


inference_data_batch_1_20250502_234057.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4085.43it/s]


inference_data_batch_1_20250502_234057.csv: token columns written ✓


inference_data_batch_2_20250502_235042.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4948.04it/s]


inference_data_batch_2_20250502_235042.csv: token columns written ✓


inference_data_batch_2_20250502_235108.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5281.70it/s]


inference_data_batch_2_20250502_235108.csv: token columns written ✓


inference_data_batch_2_20250502_235433.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5208.12it/s]


inference_data_batch_2_20250502_235433.csv: token columns written ✓


inference_data_batch_3_20250503_000351.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3502.58it/s]


inference_data_batch_3_20250503_000351.csv: token columns written ✓


inference_data_batch_3_20250503_000434.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4643.36it/s]


inference_data_batch_3_20250503_000434.csv: token columns written ✓


inference_data_batch_3_20250503_000753.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3514.11it/s]


inference_data_batch_3_20250503_000753.csv: token columns written ✓


inference_data_batch_4_20250503_001657.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3346.13it/s]


inference_data_batch_4_20250503_001657.csv: token columns written ✓


inference_data_batch_4_20250503_001751.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4998.04it/s]


inference_data_batch_4_20250503_001751.csv: token columns written ✓


inference_data_batch_4_20250503_002112.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4842.02it/s]


inference_data_batch_4_20250503_002112.csv: token columns written ✓


inference_data_batch_5_20250503_002946.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4976.93it/s]

inference_data_batch_5_20250503_002946.csv: token columns written ✓



inference_data_batch_5_20250503_003051.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5298.38it/s]


inference_data_batch_5_20250503_003051.csv: token columns written ✓


inference_data_batch_5_20250503_003408.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4343.28it/s]


inference_data_batch_5_20250503_003408.csv: token columns written ✓


inference_data_batch_6_20250503_004252.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3879.45it/s]


inference_data_batch_6_20250503_004252.csv: token columns written ✓


inference_data_batch_6_20250503_004409.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3790.91it/s]


inference_data_batch_6_20250503_004409.csv: token columns written ✓


inference_data_batch_6_20250503_004719.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4323.85it/s]


inference_data_batch_6_20250503_004719.csv: token columns written ✓


inference_data_final_20250503_004935.csv: prompt-tokenise: 100%|██████████████████████| 51/51 [00:00<00:00, 4329.54it/s]

inference_data_final_20250503_004935.csv: token columns written ✓



inference_data_final_20250503_005059.csv: prompt-tokenise: 100%|██████████████████████| 51/51 [00:00<00:00, 4438.42it/s]


inference_data_final_20250503_005059.csv: token columns written ✓


inference_data_final_20250503_005404.csv: prompt-tokenise: 100%|██████████████████████| 51/51 [00:00<00:00, 3287.98it/s]


inference_data_final_20250503_005404.csv: token columns written ✓


inference_data_batch_10_20250503_163557.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4387.71it/s]


inference_data_batch_10_20250503_163557.csv: token columns written ✓


inference_data_batch_11_20250503_164851.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4174.02it/s]


inference_data_batch_11_20250503_164851.csv: token columns written ✓


inference_data_batch_12_20250503_170146.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4139.82it/s]


inference_data_batch_12_20250503_170146.csv: token columns written ✓


inference_data_batch_13_20250503_171430.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5399.33it/s]


inference_data_batch_13_20250503_171430.csv: token columns written ✓


inference_data_batch_14_20250503_172717.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3094.15it/s]


inference_data_batch_14_20250503_172717.csv: token columns written ✓


inference_data_batch_15_20250503_174022.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4181.59it/s]


inference_data_batch_15_20250503_174022.csv: token columns written ✓


inference_data_batch_1_20250503_143930.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3637.38it/s]


inference_data_batch_1_20250503_143930.csv: token columns written ✓


inference_data_batch_2_20250503_145219.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4055.05it/s]


inference_data_batch_2_20250503_145219.csv: token columns written ✓


inference_data_batch_3_20250503_150527.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4839.95it/s]


inference_data_batch_3_20250503_150527.csv: token columns written ✓


inference_data_batch_4_20250503_151827.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4298.28it/s]


inference_data_batch_4_20250503_151827.csv: token columns written ✓


inference_data_batch_5_20250503_153131.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4946.40it/s]


inference_data_batch_5_20250503_153131.csv: token columns written ✓


inference_data_batch_6_20250503_154423.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4724.65it/s]


inference_data_batch_6_20250503_154423.csv: token columns written ✓


inference_data_batch_7_20250503_155713.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5075.58it/s]


inference_data_batch_7_20250503_155713.csv: token columns written ✓


inference_data_batch_8_20250503_160947.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4913.84it/s]


inference_data_batch_8_20250503_160947.csv: token columns written ✓


inference_data_batch_9_20250503_162249.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5193.93it/s]


inference_data_batch_9_20250503_162249.csv: token columns written ✓


inference_data_final_20250503_174954.csv: prompt-tokenise: 100%|██████████████████████| 72/72 [00:00<00:00, 5382.87it/s]


inference_data_final_20250503_174954.csv: token columns written ✓


inference_data_batch_1_20250503_105806.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3953.35it/s]


inference_data_batch_1_20250503_105806.csv: token columns written ✓


inference_data_batch_10_20250503_003010.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5181.10it/s]


inference_data_batch_10_20250503_003010.csv: token columns written ✓


inference_data_batch_11_20250503_003645.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4538.55it/s]


inference_data_batch_11_20250503_003645.csv: token columns written ✓


inference_data_batch_12_20250503_004315.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5352.06it/s]


inference_data_batch_12_20250503_004315.csv: token columns written ✓


inference_data_batch_13_20250503_004945.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5714.54it/s]


inference_data_batch_13_20250503_004945.csv: token columns written ✓


inference_data_batch_14_20250503_005622.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5447.36it/s]


inference_data_batch_14_20250503_005622.csv: token columns written ✓


inference_data_batch_15_20250503_010253.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5853.14it/s]


inference_data_batch_15_20250503_010253.csv: token columns written ✓


inference_data_batch_16_20250503_010926.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 5505.56it/s]


inference_data_batch_16_20250503_010926.csv: token columns written ✓


inference_data_batch_17_20250503_011557.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3621.02it/s]


inference_data_batch_17_20250503_011557.csv: token columns written ✓


inference_data_batch_18_20250503_012231.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4327.15it/s]


inference_data_batch_18_20250503_012231.csv: token columns written ✓


inference_data_batch_1_20250502_233107.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4342.60it/s]


inference_data_batch_1_20250502_233107.csv: token columns written ✓


inference_data_batch_2_20250502_233739.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4195.23it/s]


inference_data_batch_2_20250502_233739.csv: token columns written ✓


inference_data_batch_3_20250502_234413.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4308.48it/s]


inference_data_batch_3_20250502_234413.csv: token columns written ✓


inference_data_batch_4_20250502_235049.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3379.26it/s]


inference_data_batch_4_20250502_235049.csv: token columns written ✓


inference_data_batch_5_20250502_235720.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3826.29it/s]


inference_data_batch_5_20250502_235720.csv: token columns written ✓


inference_data_batch_6_20250503_000355.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5392.59it/s]


inference_data_batch_6_20250503_000355.csv: token columns written ✓


inference_data_batch_7_20250503_001030.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5248.19it/s]


inference_data_batch_7_20250503_001030.csv: token columns written ✓


inference_data_batch_8_20250503_001703.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5524.56it/s]


inference_data_batch_8_20250503_001703.csv: token columns written ✓


inference_data_batch_9_20250503_002338.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4958.33it/s]


inference_data_batch_9_20250503_002338.csv: token columns written ✓


inference_data_final_20250503_012502.csv: prompt-tokenise: 100%|██████████████████████| 38/38 [00:00<00:00, 4361.53it/s]


inference_data_final_20250503_012502.csv: token columns written ✓


inference_data_batch_10_20250503_014213.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4851.77it/s]


inference_data_batch_10_20250503_014213.csv: token columns written ✓


inference_data_batch_1_20250502_233900.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4644.85it/s]


inference_data_batch_1_20250502_233900.csv: token columns written ✓


inference_data_batch_2_20250502_235237.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2949.91it/s]


inference_data_batch_2_20250502_235237.csv: token columns written ✓


inference_data_batch_3_20250503_000615.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3914.20it/s]


inference_data_batch_3_20250503_000615.csv: token columns written ✓


inference_data_batch_4_20250503_002002.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3158.36it/s]


inference_data_batch_4_20250503_002002.csv: token columns written ✓


inference_data_batch_5_20250503_003342.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3520.57it/s]


inference_data_batch_5_20250503_003342.csv: token columns written ✓


inference_data_batch_6_20250503_004723.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4083.56it/s]


inference_data_batch_6_20250503_004723.csv: token columns written ✓


inference_data_batch_7_20250503_010108.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3830.45it/s]


inference_data_batch_7_20250503_010108.csv: token columns written ✓


inference_data_batch_8_20250503_011443.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4213.56it/s]


inference_data_batch_8_20250503_011443.csv: token columns written ✓


inference_data_batch_9_20250503_012819.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4154.96it/s]


inference_data_batch_9_20250503_012819.csv: token columns written ✓


inference_data_batch_10_20250503_010414.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4403.51it/s]


inference_data_batch_10_20250503_010414.csv: token columns written ✓


inference_data_batch_11_20250503_011408.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4159.24it/s]

inference_data_batch_11_20250503_011408.csv: token columns written ✓



inference_data_batch_12_20250503_012405.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4477.41it/s]


inference_data_batch_12_20250503_012405.csv: token columns written ✓


inference_data_batch_13_20250503_013403.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2641.68it/s]


inference_data_batch_13_20250503_013403.csv: token columns written ✓


inference_data_batch_14_20250503_014401.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4719.86it/s]


inference_data_batch_14_20250503_014401.csv: token columns written ✓


inference_data_batch_15_20250503_015401.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4763.33it/s]


inference_data_batch_15_20250503_015401.csv: token columns written ✓


inference_data_batch_16_20250503_020405.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 2918.18it/s]


inference_data_batch_16_20250503_020405.csv: token columns written ✓


inference_data_batch_17_20250503_021411.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 3892.08it/s]


inference_data_batch_17_20250503_021411.csv: token columns written ✓


inference_data_batch_18_20250503_022419.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4020.54it/s]


inference_data_batch_18_20250503_022419.csv: token columns written ✓


inference_data_batch_19_20250503_023423.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4558.68it/s]

inference_data_batch_19_20250503_023423.csv: token columns written ✓



inference_data_batch_1_20250502_233435.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4259.86it/s]


inference_data_batch_1_20250502_233435.csv: token columns written ✓


inference_data_batch_2_20250502_234432.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3516.76it/s]


inference_data_batch_2_20250502_234432.csv: token columns written ✓


inference_data_batch_3_20250502_235428.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2651.62it/s]


inference_data_batch_3_20250502_235428.csv: token columns written ✓


inference_data_batch_4_20250503_000424.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3583.28it/s]


inference_data_batch_4_20250503_000424.csv: token columns written ✓


inference_data_batch_5_20250503_001420.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3926.66it/s]


inference_data_batch_5_20250503_001420.csv: token columns written ✓


inference_data_batch_6_20250503_002421.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3978.17it/s]


inference_data_batch_6_20250503_002421.csv: token columns written ✓


inference_data_batch_7_20250503_003418.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4516.12it/s]


inference_data_batch_7_20250503_003418.csv: token columns written ✓


inference_data_batch_8_20250503_004417.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4317.30it/s]


inference_data_batch_8_20250503_004417.csv: token columns written ✓


inference_data_batch_9_20250503_005415.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3136.05it/s]


inference_data_batch_9_20250503_005415.csv: token columns written ✓


inference_data_final_20250503_023954.csv: prompt-tokenise: 100%|██████████████████████| 54/54 [00:00<00:00, 4017.75it/s]

inference_data_final_20250503_023954.csv: token columns written ✓



inference_data_batch_10_20250503_003317.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4851.88it/s]


inference_data_batch_10_20250503_003317.csv: token columns written ✓


inference_data_batch_11_20250503_003956.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4283.27it/s]


inference_data_batch_11_20250503_003956.csv: token columns written ✓


inference_data_batch_12_20250503_004635.csv: prompt-tokenise: 100%|█████████████████| 100/100 [00:00<00:00, 4556.45it/s]


inference_data_batch_12_20250503_004635.csv: token columns written ✓


inference_data_batch_1_20250502_233333.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4702.24it/s]


inference_data_batch_1_20250502_233333.csv: token columns written ✓


inference_data_batch_2_20250502_234011.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4417.34it/s]


inference_data_batch_2_20250502_234011.csv: token columns written ✓


inference_data_batch_3_20250502_234650.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 5060.02it/s]


inference_data_batch_3_20250502_234650.csv: token columns written ✓


inference_data_batch_4_20250502_235328.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4805.41it/s]


inference_data_batch_4_20250502_235328.csv: token columns written ✓


inference_data_batch_5_20250503_000004.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 2674.89it/s]


inference_data_batch_5_20250503_000004.csv: token columns written ✓


inference_data_batch_6_20250503_000643.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4832.98it/s]


inference_data_batch_6_20250503_000643.csv: token columns written ✓


inference_data_batch_7_20250503_001322.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 3600.32it/s]

inference_data_batch_7_20250503_001322.csv: token columns written ✓



inference_data_batch_8_20250503_002001.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4724.22it/s]


inference_data_batch_8_20250503_002001.csv: token columns written ✓


inference_data_batch_9_20250503_002640.csv: prompt-tokenise: 100%|██████████████████| 100/100 [00:00<00:00, 4688.94it/s]


inference_data_batch_9_20250503_002640.csv: token columns written ✓


inference_data_final_20250503_005103.csv: prompt-tokenise: 100%|██████████████████████| 67/67 [00:00<00:00, 3760.30it/s]


inference_data_final_20250503_005103.csv: token columns written ✓


In [11]:
arc_challenge = pd.read_csv("data/inference_outputs/arc_challenge/inference_data_batch_1_20250502_233812.csv")

In [12]:
arc_challenge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   doc_id                     100 non-null    int64  
 1   input_text                 100 non-null    object 
 2   benchmark_name             100 non-null    object 
 3   label_small                100 non-null    float64
 4   acc_norm_small             100 non-null    float64
 5   energy_consumption_small   100 non-null    float64
 6   inference_time_small       100 non-null    float64
 7   label_medium               100 non-null    float64
 8   acc_norm_medium            100 non-null    float64
 9   energy_consumption_medium  100 non-null    float64
 10  inference_time_medium      100 non-null    float64
 11  label_large                100 non-null    float64
 12  acc_norm_large             100 non-null    float64
 13  energy_consumption_large   100 non-null    float64
