In [1]:
# list all the runs in `https://wandb.ai/generating-faster/hf-bench`
import wandb

api = wandb.Api()
runs = api.runs("generating-faster/hf-bench")
run_name_to_run = {run.name: run for run in runs}

In [2]:
submission_id_to_hardware = {
    '2025-02-06_06-42-07_ea3ed4b': 'A100',
    '2025-02-06_21-32-25_ab73cc7': 'A100',
    '2025-02-06_06-40-46_ea3ed4b': 'A100',
    '2025-02-06_17-07-07_ab73cc7': 'A100',
    '2025-02-06_02-10-28_d8c80fe': 'A100',
    '2025-02-06_06-54-21_ab73cc7': 'A100',
    '2025-02-06_06-29-56_17d3c65': 'A100',
    '2025-02-06_11-43-18_ab73cc7': 'A100',
    '2025-02-04_18-20-41_bccb1bd': 'A100',
    '2025-02-06_06-41-29_ea3ed4b': 'A100',
    '2025-02-06_06-39-35_e14af1d': 'A100',
    '2025-02-05_03-44-45_d7c02fb': 'A100',
    '2025-02-06_08-19-02_ab73cc7': 'A100',
    '2025-02-05_03-44-51_d7c02fb': 'A100',
    '2025-02-06_10-44-45_ab73cc7': 'A100',
    '2025-02-06_06-38-32_e14af1d': 'A100',
    '2025-02-06_06-38-19_e14af1d': 'A100',
    '2025-02-04_20-52-14_eb664c5': 'A100',
    '2025-02-04_01-05-29_4c55336': 'A100',
    '2025-02-06_16-28-26_ab73cc7': 'A100',
    '2025-02-06_17-46-21_ab73cc7': 'A100',
    '2025-02-05_03-18-59_d7c02fb': 'A100',
    '2025-02-06_17-06-51_ab73cc7': 'A100',
    '2025-02-06_02-07-38_ea3ed4b': 'A100',
    '2025-02-06_16-26-53_ab73cc7': 'A100',
    '2025-02-06_06-40-21_e14af1d': 'A100',
}
print(len(set(submission_id_to_hardware.keys())))

26


In [3]:
import pandas as pd


def get_last_lines(run) -> str:
    """
    Get the last 50 lines of the output log for a given run.
    """
    files = run.files()
    try:
        output_file = next(f for f in files if f.name == 'output.log')
    except StopIteration:
        return ""
    file_obj = output_file.download(root="./", replace=True)
    output_content = file_obj.read()
    return '\n'.join(output_content.splitlines()[-50:])


run_name_to_log_tail = {}
for run in runs:
    run_name_to_log_tail[run.name] = get_last_lines(run)

df = pd.DataFrame(run_name_to_log_tail.items(), columns=['run_name', 'log_tail'])
df

Unnamed: 0,run_name,log_tail
0,Qwen-Qwen2.5-1.5B-Instruct_tau-scrolls_qasper_...,Extended Response-Intent Prediction (ERIP) $\&...
1,google-gemma-2-9b-it_tau-scrolls_qasper_test_3...,### Function Breakdown:\nThe function `filter_...
2,microsoft-phi-4_tau-scrolls_qasper_test_30_mic...,from typing import List\n\n\n# The `filter_by...
3,meta-llama-Llama-3.1-8B-Instruct_tau-scrolls_q...,Assistants: 100%|##########| 4/4 [4:41:01<00:0...
4,codellama-CodeLlama-13b-Instruct-hf_tau-scroll...,"del assistant_obj\n ^^^^^^\n File ""/ho..."
5,meta-llama-Llama-3.1-70B-Instruct_tau-scrolls_...,
6,meta-llama-Llama-3.1-70B_tau-scrolls_qasper_te...,">>> filter_by_prefix(['abc', 'bcd', 'cde',..."
7,mistralai-Mixtral-8x22B-Instruct-v0.1_tau-scro...,\nYour code:\n\n==============================...
8,meta-llama-Llama-3.1-70B-Instruct_tau-scrolls_...,wandb: WARNING Serializing object of type str ...
9,deepseek-ai-DeepSeek-R1-Distill-Qwen-32B_tau-s...,"Compared to the hybrid model baseline, MISSA p..."


In [4]:
import re


def get_submission_id(log_text: str) -> str:
    """
    Get the submission id from the following pattern: `benchmark_results/{submission_id}/...` where {submission_id} could be any string
    """
    pattern = r"benchmark_results/([^/]+)/"
    match = re.search(pattern, log_text)
    if match is None:
        return ""
    return match.group(1)

df.fillna("", inplace=True)
df["submission_id"] = df["log_tail"].apply(get_submission_id)
df

Unnamed: 0,run_name,log_tail,submission_id
0,Qwen-Qwen2.5-1.5B-Instruct_tau-scrolls_qasper_...,Extended Response-Intent Prediction (ERIP) $\&...,2025-02-04_07-28-02_d6d09bd
1,google-gemma-2-9b-it_tau-scrolls_qasper_test_3...,### Function Breakdown:\nThe function `filter_...,2025-02-06_06-38-32_e14af1d
2,microsoft-phi-4_tau-scrolls_qasper_test_30_mic...,from typing import List\n\n\n# The `filter_by...,2025-02-06_17-07-07_ab73cc7
3,meta-llama-Llama-3.1-8B-Instruct_tau-scrolls_q...,Assistants: 100%|##########| 4/4 [4:41:01<00:0...,
4,codellama-CodeLlama-13b-Instruct-hf_tau-scroll...,"del assistant_obj\n ^^^^^^\n File ""/ho...",
5,meta-llama-Llama-3.1-70B-Instruct_tau-scrolls_...,,
6,meta-llama-Llama-3.1-70B_tau-scrolls_qasper_te...,">>> filter_by_prefix(['abc', 'bcd', 'cde',...",2025-02-06_06-41-36_ea3ed4b
7,mistralai-Mixtral-8x22B-Instruct-v0.1_tau-scro...,\nYour code:\n\n==============================...,2025-02-06_21-32-25_ab73cc7
8,meta-llama-Llama-3.1-70B-Instruct_tau-scrolls_...,wandb: WARNING Serializing object of type str ...,
9,deepseek-ai-DeepSeek-R1-Distill-Qwen-32B_tau-s...,"Compared to the hybrid model baseline, MISSA p...",


In [5]:
df_reported = df[df["submission_id"].isin(submission_id_to_hardware.keys())].reset_index(drop=True)
df_reported

Unnamed: 0,run_name,log_tail,submission_id
0,google-gemma-2-9b-it_tau-scrolls_qasper_test_3...,### Function Breakdown:\nThe function `filter_...,2025-02-06_06-38-32_e14af1d
1,microsoft-phi-4_tau-scrolls_qasper_test_30_mic...,from typing import List\n\n\n# The `filter_by...,2025-02-06_17-07-07_ab73cc7
2,mistralai-Mixtral-8x22B-Instruct-v0.1_tau-scro...,\nYour code:\n\n==============================...,2025-02-06_21-32-25_ab73cc7
3,deepseek-ai-DeepSeek-R1-Distill-Llama-70B_open...,\nSo the function will loop through each strin...,2025-02-04_01-05-29_4c55336
4,deepseek-ai-DeepSeek-R1-Distill-Qwen-32B_tau-s...,=============...,2025-02-06_06-41-29_ea3ed4b
5,deepseek-ai-DeepSeek-R1-Distill-Llama-70B_tau-...,Setting `pad_token_id` to `eos_token_id`:12800...,2025-02-04_20-52-14_eb664c5
6,deepseek-ai-DeepSeek-R1-Distill-Llama-8B_tau-s...,"Alright, let's outline this:\n\n- Central topi...",2025-02-06_17-06-51_ab73cc7
7,deepseek-ai-DeepSeek-R1-Distill-Llama-8B_opena...,"\nSo, the code is straightforward.\n\nSo, to i...",2025-02-06_16-26-53_ab73cc7
8,deepseek-ai-DeepSeek-R1-Distill-Qwen-7B_tau-sc...,I should note that this ad aims to spread awar...,2025-02-06_08-19-02_ab73cc7
9,deepseek-ai-DeepSeek-R1-Distill-Qwen-7B_openai...,Setting `pad_token_id` to `eos_token_id`:15164...,2025-02-06_10-44-45_ab73cc7


In [9]:
run_name_to_hardware = {}
for run in runs:
    hardware = "Unknown"
    if run.metadata:
        gpu_count = run.metadata.get('gpu_count', 'unknown')
        gpu_type = run.metadata.get('gpu', 'unknown')
        hardware = f"{gpu_count} * {gpu_type}"
    run_name_to_hardware[run.name] = hardware

df_reported["hardware"] = df_reported["run_name"].apply(lambda run_name: run_name_to_hardware[run_name]
                                                        .replace(" Ada Generation", "")
                                                        .replace("NVIDIA ", ""))
df_reported

Unnamed: 0,run_name,log_tail,submission_id,hardware
0,google-gemma-2-9b-it_tau-scrolls_qasper_test_3...,### Function Breakdown:\nThe function `filter_...,2025-02-06_06-38-32_e14af1d,1 * H100 NVL
1,microsoft-phi-4_tau-scrolls_qasper_test_30_mic...,from typing import List\n\n\n# The `filter_by...,2025-02-06_17-07-07_ab73cc7,1 * H100 NVL
2,mistralai-Mixtral-8x22B-Instruct-v0.1_tau-scro...,\nYour code:\n\n==============================...,2025-02-06_21-32-25_ab73cc7,4 * H100 NVL
3,deepseek-ai-DeepSeek-R1-Distill-Llama-70B_open...,\nSo the function will loop through each strin...,2025-02-04_01-05-29_4c55336,2 * A100 80GB PCIe
4,deepseek-ai-DeepSeek-R1-Distill-Qwen-32B_tau-s...,=============...,2025-02-06_06-41-29_ea3ed4b,1 * H100 NVL
5,deepseek-ai-DeepSeek-R1-Distill-Llama-70B_tau-...,Setting `pad_token_id` to `eos_token_id`:12800...,2025-02-04_20-52-14_eb664c5,2 * H100 NVL
6,deepseek-ai-DeepSeek-R1-Distill-Llama-8B_tau-s...,"Alright, let's outline this:\n\n- Central topi...",2025-02-06_17-06-51_ab73cc7,1 * H100 NVL
7,deepseek-ai-DeepSeek-R1-Distill-Llama-8B_opena...,"\nSo, the code is straightforward.\n\nSo, to i...",2025-02-06_16-26-53_ab73cc7,1 * H100 NVL
8,deepseek-ai-DeepSeek-R1-Distill-Qwen-7B_tau-sc...,I should note that this ad aims to spread awar...,2025-02-06_08-19-02_ab73cc7,1 * H100 NVL
9,deepseek-ai-DeepSeek-R1-Distill-Qwen-7B_openai...,Setting `pad_token_id` to `eos_token_id`:15164...,2025-02-06_10-44-45_ab73cc7,1 * H100 NVL


In [10]:
# print a dictionary of the form {submission_id: hardware}
submission_id_to_hardware_known = {row["submission_id"]: row["hardware"] for _, row in df_reported.iterrows()}
submission_id_to_hardware_known

{'2025-02-06_06-38-32_e14af1d': '1 * H100 NVL',
 '2025-02-06_17-07-07_ab73cc7': '1 * H100 NVL',
 '2025-02-06_21-32-25_ab73cc7': '4 * H100 NVL',
 '2025-02-04_01-05-29_4c55336': '2 * A100 80GB PCIe',
 '2025-02-06_06-41-29_ea3ed4b': '1 * H100 NVL',
 '2025-02-04_20-52-14_eb664c5': '2 * H100 NVL',
 '2025-02-06_17-06-51_ab73cc7': '1 * H100 NVL',
 '2025-02-06_16-26-53_ab73cc7': '1 * H100 NVL',
 '2025-02-06_08-19-02_ab73cc7': '1 * H100 NVL',
 '2025-02-06_10-44-45_ab73cc7': '1 * H100 NVL',
 '2025-02-06_06-39-35_e14af1d': '1 * H100 NVL',
 '2025-02-06_16-28-26_ab73cc7': '2 * H100 NVL',
 '2025-02-06_17-46-21_ab73cc7': '2 * H100 NVL',
 '2025-02-06_02-10-28_d8c80fe': '1 * RTX 6000',
 '2025-02-06_06-29-56_17d3c65': '1 * RTX 6000',
 '2025-02-06_02-07-38_ea3ed4b': '1 * RTX 6000'}

In [11]:
submission_id_unknown = [sid for sid in submission_id_to_hardware.keys() if sid not in submission_id_to_hardware_known]
print(len(submission_id_unknown))
submission_id_unknown

10


['2025-02-06_06-42-07_ea3ed4b',
 '2025-02-06_06-40-46_ea3ed4b',
 '2025-02-06_06-54-21_ab73cc7',
 '2025-02-06_11-43-18_ab73cc7',
 '2025-02-04_18-20-41_bccb1bd',
 '2025-02-05_03-44-45_d7c02fb',
 '2025-02-05_03-44-51_d7c02fb',
 '2025-02-06_06-38-19_e14af1d',
 '2025-02-05_03-18-59_d7c02fb',
 '2025-02-06_06-40-21_e14af1d']