# Test data

In [1]:
import pandas as pd

In [2]:
def _get_harmbench_behaviors():
    df_val_behaviors = pd.read_csv(
        "https://raw.githubusercontent.com/centerforaisafety/HarmBench/"
        "e2d308a711b77f5e6096ef7991e6b295a14f79d8"
        "/data/behavior_datasets/harmbench_behaviors_text_val.csv"
    )
    df_val_behaviors["split"] = "val"

    df_test_behaviors = pd.read_csv(
        "https://raw.githubusercontent.com/centerforaisafety/HarmBench/"
        "e2d308a711b77f5e6096ef7991e6b295a14f79d8"
        "/data/behavior_datasets/harmbench_behaviors_text_test.csv"
    )
    df_test_behaviors["split"] = "test"

    df_behaviors = pd.concat([df_val_behaviors, df_test_behaviors]).reset_index(drop=True)
    return df_behaviors

df_behaviors = _get_harmbench_behaviors()

In [4]:
# get all test behaviors that have FunctionalCategory=standard
test_standard = df_behaviors[(df_behaviors['split'] == 'test') & (df_behaviors['FunctionalCategory'] == 'standard')]

# display counts of different SemanticCategory labels
semantic_counts = test_standard['SemanticCategory'].value_counts()
print("SemanticCategory counts for test split with FunctionalCategory == 'standard':")
print(semantic_counts)

# also print the total sum
(test_standard['SemanticCategory'].value_counts().sum())

SemanticCategory counts for test split with FunctionalCategory == 'standard':
SemanticCategory
illegal                          47
cybercrime_intrusion             33
misinformation_disinformation    27
chemical_biological              19
harmful                          17
harassment_bullying              16
Name: count, dtype: int64


159

In [5]:
# output into a jsonl with question, id, and semantic_category
import json
import os

# Create data directory if it doesn't exist
os.makedirs("./data/questions", exist_ok=True)

# Prepare the data with the required fields
output_data = []
for i, (_, row) in enumerate(test_standard.iterrows()):
    output_data.append({
        "question": row["Behavior"],
        "id": i,
        "semantic_category": row["SemanticCategory"]
    })

# Write to JSONL file
output_file = "./data/questions/harmbench.jsonl"
with open(output_file, "w") as f:
    for item in output_data:
        f.write(json.dumps(item) + "\n")

print(f"Exported {len(output_data)} questions to {output_file}")

Exported 159 questions to ./data/questions/harmbench.jsonl
