In [24]:
from collections import defaultdict
from pprint import pprint

from datasets import load_dataset

In [2]:
collabllm_dataset_ids = {
    "code": "collabllm/collabllm-multiturn-bigcodebench",
    "code_large": "collabllm/collabllm-multiturn-bigcodebench-large",
    "math": "collabllm/collabllm-multiturn-math-hard",
    "math_large": "collabllm/collabllm-multiturn-math-hard-large",
    "doc_edit": "collabllm/collabllm-multiturn-medium",
    "doc_edit_large": "collabllm/collabllm-multiturn-medium-large",
}

In [None]:
collab_llm_datasets = {
    k: load_dataset(v, split="train") for k, v in collabllm_dataset_ids.items()
}

In [17]:
collab_llm_datasets["code"]

Dataset({
    features: ['prompt', 'completion', 'conv_id', 'score', 'single_turn_prompt', 'single_turn_completion', 'single_turn_metadata', 'turn_id', 'sessions', 'rewards'],
    num_rows: 2331
})

In [None]:
# count unique conv_id per dataset
def count_unique_conv_ids(dataset):
    return len(set(dataset["conv_id"]))


for k, v in collab_llm_datasets.items():
    unique_conv_ids = count_unique_conv_ids(v)
    print(f"{k}: {unique_conv_ids} unique conv_ids")

code: 200 unique conv_ids
code_large: 500 unique conv_ids
math: 100 unique conv_ids
math_large: 500 unique conv_ids
doc_edit: 190 unique conv_ids
doc_edit_large: 500 unique conv_ids


In [20]:
collab_llm_datasets["code"]

Dataset({
    features: ['prompt', 'completion', 'conv_id', 'score', 'single_turn_prompt', 'single_turn_completion', 'single_turn_metadata', 'turn_id', 'sessions', 'rewards'],
    num_rows: 2331
})

In [27]:
example_metadata = {
    k: v[0]["single_turn_metadata"] for k, v in collab_llm_datasets.items()
}
for k, v in example_metadata.items():
    print(f"metadata for {k} has keys: {list(v.keys())}")
    # pprint(v)

metadata for code has keys: ['entry_point', 'extraction_requirement', 'task_id', 'test']
metadata for code_large has keys: ['entry_point', 'extraction_requirement', 'task_id', 'test']
metadata for math has keys: ['level', 'type']
metadata for math_large has keys: ['level', 'type']
metadata for doc_edit has keys: ['authors', 'num_tokens', 'tags', 'timestamp', 'url']
metadata for doc_edit_large has keys: ['authors', 'num_tokens', 'tags', 'timestamp', 'url']


In [28]:
example_metadata["code"]["task_id"]

'BigCodeBench/4'

In [8]:
# I think all turn_ids are odd, let's confirm
for k, v in collab_llm_datasets.items():
    all_odd = all(turn_id % 2 == 1 for turn_id in v["turn_id"])
    print(f"{k}: all turn_ids odd? {all_odd}")

code: all turn_ids odd? True
code_large: all turn_ids odd? True
math: all turn_ids odd? True
math_large: all turn_ids odd? True
doc_edit: all turn_ids odd? True
doc_edit_large: all turn_ids odd? True


In [None]:
# let's group by conv_id and make a count (distribution) of max turn_id per conv_id
# each goes up to 13 max turns

task = "doc_edit"

conv_id_to_turn_ids = defaultdict(list)
for conv_id, turn_id in zip(
    collab_llm_datasets[task]["conv_id"], collab_llm_datasets[task]["turn_id"]
):
    conv_id_to_turn_ids[conv_id].append(turn_id)
max_turn_id_counts = defaultdict(int)
for conv_id, turn_ids in conv_id_to_turn_ids.items():
    max_turn_id = max(turn_ids)
    max_turn_id_counts[max_turn_id] += 1
print("Distribution of max turn_id per conv_id in 'code' dataset:")
for max_turn_id, count in sorted(max_turn_id_counts.items()):
    print(f"Max turn_id {max_turn_id}: {count} conv_ids")


Distribution of max turn_id per conv_id in 'code' dataset:
Max turn_id 1: 7 conv_ids
Max turn_id 3: 20 conv_ids
Max turn_id 5: 39 conv_ids
Max turn_id 7: 29 conv_ids
Max turn_id 9: 27 conv_ids
Max turn_id 11: 12 conv_ids
Max turn_id 13: 56 conv_ids
