In [None]:
import sys
sys.path.append('..')

from src.data.dataset_loader import DatasetLoader
from configs.datasets.dataset_config import DATASETS, CATEGORIES
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
%matplotlib inline

## 1. Load Datasets

In [None]:
loader = DatasetLoader()

# Load AdvBench
print("Loading AdvBench...")
advbench = loader.load_advbench()
print(f"âœ“ Loaded {len(advbench)} examples\n")

# Show example
print("Example:")
print(advbench[0])

## 2. Dataset Statistics

In [None]:
# Convert to DataFrame for analysis
df = pd.DataFrame(advbench)

# Prompt length distribution
df['prompt_length'] = df['prompt'].str.split().str.len()

print("Prompt Length Statistics:")
print(df['prompt_length'].describe())

# Plot distribution
plt.figure(figsize=(10, 5))
plt.hist(df['prompt_length'], bins=30, edgecolor='black')
plt.xlabel('Prompt Length (words)')
plt.ylabel('Frequency')
plt.title('Distribution of Prompt Lengths in AdvBench')
plt.show()

## 3. Sample Prompts by Category

In [None]:
# Show random samples
print("Random Adversarial Prompts:\n")
for i, example in enumerate(df.sample(5).itertuples(), 1):
    print(f"{i}. {example.prompt}")
    print()

## 4. Compare Datasets

In [None]:
# Load multiple datasets
datasets_info = []

for name, config in DATASETS.items():
    try:
        dataset = loader.load_dataset(name)
        datasets_info.append({
            'name': config['name'],
            'size': len(dataset),
            'license': config['license']
        })
    except Exception as e:
        print(f"Could not load {name}: {e}")

# Create comparison DataFrame
comparison_df = pd.DataFrame(datasets_info)
comparison_df

## 5. Prepare Preference Pairs

In [None]:
# Example of creating preference pairs
from src.data.prepare_datasets import DPODatasetPreparator

preparator = DPODatasetPreparator()

# Create safety preferences for sample prompts
sample_prompts = df['prompt'].head(5).tolist()
preferences = preparator.create_safety_preferences(sample_prompts)

print("Example Preference Pair:\n")
print(f"Prompt: {preferences[0]['prompt']}")
print(f"\nChosen (Safe): {preferences[0]['chosen']}")
print(f"\nRejected (Unsafe): {preferences[0]['rejected']}")

## Next Steps

1. Prepare full datasets: `python src/data/prepare_datasets.py --dataset all`
2. Explore training configurations in `configs/training/`
3. See model selection in `configs/models/model_config.py`