# Hate Speech Dataset Exploration

**Dataset:** ucberkeley-dlab/measuring-hate-speech

This notebook explores the hate speech dataset to find examples for filling placeholders:
- `[group]` - Target identity groups
- `[stereotype]` - Stereotypical statements
- `[slur]` - Actual slurs and offensive language
- `[ethnic group]` - Ethnic/racial group references
- `[industry]` - Industry/occupation stereotypes
- `[list of actual slurs used in study]` - Collection of slurs from research context

## Setup & Login

In [1]:
# Install required packages
!uv pip install datasets huggingface_hub pandas -q

In [2]:
# Login to HuggingFace (if needed)
from huggingface_hub import login

# This dataset is public, but login may help with rate limits
# Uncomment if you want to login:
# login()

## Load Dataset

In [3]:
from datasets import load_dataset
import pandas as pd
from collections import Counter

print("Loading ucberkeley-dlab/measuring-hate-speech dataset...")
print("This may take a few minutes on first load (will cache for future use)\n")

ds = load_dataset("ucberkeley-dlab/measuring-hate-speech")
data = ds['train']

print(f"✓ Dataset loaded successfully!")
print(f"Total examples: {len(data):,}")

Loading ucberkeley-dlab/measuring-hate-speech dataset...
This may take a few minutes on first load (will cache for future use)



README.md: 0.00B [00:00, ?B/s]

measuring-hate-speech.parquet:   0%|          | 0.00/14.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/135556 [00:00<?, ? examples/s]

✓ Dataset loaded successfully!
Total examples: 135,556


## Explore Schema

In [4]:
print("="*80)
print("DATASET SCHEMA")
print("="*80)
print(f"\nTotal columns: {len(data.column_names)}")
print("\nKey columns:")
for col in ['text', 'hate_speech_score', 'insult', 'dehumanize', 'violence']:
    if col in data.column_names:
        print(f"  - {col}")

# Show target group columns
target_cols = [col for col in data.column_names if col.startswith('target_')]
print(f"\nTarget identity columns ({len(target_cols)}):")
for col in target_cols[:15]:
    print(f"  - {col}")
if len(target_cols) > 15:
    print(f"  ... and {len(target_cols) - 15} more")

DATASET SCHEMA

Total columns: 131

Key columns:
  - text
  - hate_speech_score
  - insult
  - dehumanize
  - violence

Target identity columns (53):
  - target_race_asian
  - target_race_black
  - target_race_latinx
  - target_race_middle_eastern
  - target_race_native_american
  - target_race_pacific_islander
  - target_race_white
  - target_race_other
  - target_race
  - target_religion_atheist
  - target_religion_buddhist
  - target_religion_christian
  - target_religion_hindu
  - target_religion_jewish
  - target_religion_mormon
  ... and 38 more


## Sample Data

In [5]:
print("="*80)
print("SAMPLE EXAMPLES")
print("="*80)

# Show first example in detail
first = data[0]
print(f"\nExample 1:")
print(f"Text: {first['text'][:200]}..." if len(first['text']) > 200 else f"Text: {first['text']}")
print(f"Hate speech score: {first['hate_speech_score']}")
print(f"Insult: {first['insult']}")
print(f"Dehumanize: {first['dehumanize']}")

# Show which groups are targeted
targeted_groups = [col.replace('target_', '') for col in target_cols if first.get(col) == True]
if targeted_groups:
    print(f"Target groups: {', '.join(targeted_groups)}")

SAMPLE EXAMPLES

Example 1:
Text: Yes indeed. She sort of reminds me of the elder lady that played the part in the movie "Titanic" who was telling her story!!! And I wouldn't have wanted to cover who I really am!! I would be proud!!!!...
Hate speech score: -3.9
Insult: 0.0
Dehumanize: 0.0
Target groups: race_asian, race_black, race_latinx, race_middle_eastern, race_native_american, race_pacific_islander, race_white, race


## Analyze Hate Speech Scores

In [6]:
print("="*80)
print("HATE SPEECH SCORE DISTRIBUTION")
print("="*80)

scores = [ex['hate_speech_score'] for ex in data if ex['hate_speech_score'] is not None]

print(f"\nMin score: {min(scores):.2f}")
print(f"Max score: {max(scores):.2f}")
print(f"Mean score: {sum(scores)/len(scores):.2f}")

# Count by severity
print("\nBy severity:")
print(f"  High hate (>2.0): {len([s for s in scores if s > 2.0]):,}")
print(f"  Moderate (1.0-2.0): {len([s for s in scores if 1.0 <= s <= 2.0]):,}")
print(f"  Mild (0.5-1.0): {len([s for s in scores if 0.5 <= s < 1.0]):,}")
print(f"  Low/neutral (<0.5): {len([s for s in scores if s < 0.5]):,}")

HATE SPEECH SCORE DISTRIBUTION

Min score: -8.34
Max score: 6.30
Mean score: -0.57

By severity:
  High hate (>2.0): 20,338
  Moderate (1.0-2.0): 20,382
  Mild (0.5-1.0): 8,553
  Low/neutral (<0.5): 86,283


## Find Examples by Target Group

In [None]:
print("="*80)
print("EXAMPLES BY TARGET GROUP")
print("="*80)

# Check which target groups have the most examples
target_counts = {}
for col in target_cols:
    count = len([ex for ex in data if ex.get(col) == True])
    if count > 0:
        target_counts[col.replace('target_', '')] = count

# Sort by count
sorted_targets = sorted(target_counts.items(), key=lambda x: x[1], reverse=True)

print("\nTop 15 target groups by example count:")
for group, count in sorted_targets[:15]:
    print(f"  {group}: {count:,}")

EXAMPLES BY TARGET GROUP


## Find Examples for Each Placeholder Type

In [None]:
print("="*80)
print("PLACEHOLDER EXAMPLES")
print("="*80)

# 1. [slur] - High insult + high hate speech score
print("\n1. [slur] - Looking for high insult scores...")
slurs = data.filter(lambda x: x['insult'] >= 4 and x['hate_speech_score'] > 2.0)
print(f"   Found {len(slurs):,} examples with high insult scores")
if len(slurs) > 0:
    print(f"   Example: {slurs[0]['text'][:150]}...")

# 2. [stereotype] - Moderate hate speech
print("\n2. [stereotype] - Looking for moderate hate speech...")
stereotypes = data.filter(lambda x: 0.5 <= x['hate_speech_score'] <= 1.5)
print(f"   Found {len(stereotypes):,} examples with moderate scores")
if len(stereotypes) > 0:
    print(f"   Example: {stereotypes[0]['text'][:150]}...")

# 3. [ethnic group] - Race/ethnicity targets
print("\n3. [ethnic group] - Looking for race/ethnicity targets...")
race_cols = [col for col in target_cols if 'race' in col]
ethnic = data.filter(lambda x: any(x.get(col) == True for col in race_cols))
print(f"   Found {len(ethnic):,} examples targeting race/ethnicity")
if len(ethnic) > 0:
    # Show which race
    ex = ethnic[0]
    races = [col.replace('target_race_', '') for col in race_cols if ex.get(col) == True]
    print(f"   Example targeting {', '.join(races)}: {ex['text'][:150]}...")

# 4. [group] - Any target group
print("\n4. [group] - General target groups...")
has_target = data.filter(lambda x: any(x.get(col) == True for col in target_cols))
print(f"   Found {len(has_target):,} examples with any target group")

# 5. Dehumanizing language
print("\n5. Dehumanizing language...")
dehumanize = data.filter(lambda x: x['dehumanize'] >= 3)
print(f"   Found {len(dehumanize):,} examples with dehumanizing language")
if len(dehumanize) > 0:
    print(f"   Example: {dehumanize[0]['text'][:150]}...")

## Test Filtering Combinations

In [None]:
print("="*80)
print("FILTERING EXAMPLES")
print("="*80)

# Example 1: Anti-Muslim hate speech
print("\nExample 1: Anti-Muslim content (high hate score)")
muslim_hate = data.filter(
    lambda x: x.get('target_religion_muslim') == True and x['hate_speech_score'] > 1.5
)
print(f"Found {len(muslim_hate)} examples")
if len(muslim_hate) > 0:
    print(f"Sample: {muslim_hate[0]['text'][:150]}...")

# Example 2: Anti-Black hate speech
print("\nExample 2: Anti-Black content (high hate score)")
black_hate = data.filter(
    lambda x: x.get('target_race_black') == True and x['hate_speech_score'] > 1.5
)
print(f"Found {len(black_hate)} examples")
if len(black_hate) > 0:
    print(f"Sample: {black_hate[0]['text'][:150]}...")

# Example 3: LGBTQ+ targeting
print("\nExample 3: LGBTQ+ targeting")
lgbtq_cols = [col for col in target_cols if 'sexuality' in col or 'transgender' in col]
lgbtq_hate = data.filter(
    lambda x: any(x.get(col) == True for col in lgbtq_cols) and x['hate_speech_score'] > 1.0
)
print(f"Found {len(lgbtq_hate)} examples")
if len(lgbtq_hate) > 0:
    print(f"Sample: {lgbtq_hate[0]['text'][:150]}...")

## Export Summary

In [None]:
import json

print("="*80)
print("SUMMARY FOR SCRIPT DEVELOPMENT")
print("="*80)

summary = {
    'dataset_name': 'ucberkeley-dlab/measuring-hate-speech',
    'total_examples': len(data),
    'columns': data.column_names,
    'key_fields': {
        'text_field': 'text',
        'hate_score_field': 'hate_speech_score',
        'insult_field': 'insult',
        'dehumanize_field': 'dehumanize'
    },
    'target_group_columns': target_cols,
    'filtering_strategies': {
        '[slur]': 'insult >= 4 AND hate_speech_score > 2.0',
        '[stereotype]': '0.5 <= hate_speech_score <= 1.5',
        '[ethnic group]': 'target_race_* == True',
        '[group]': 'any target_* == True',
        'dehumanizing': 'dehumanize >= 3'
    },
    'example_counts': {
        'high_hate': len([s for s in scores if s > 2.0]),
        'moderate_hate': len([s for s in scores if 1.0 <= s <= 2.0]),
        'with_target_group': len(has_target)
    }
}

print(json.dumps(summary, indent=2))

# Save to file
with open('hate_speech_schema.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("\n✓ Schema saved to hate_speech_schema.json")

## Random Samples for Quality Check

In [None]:
print("="*80)
print("RANDOM SAMPLES (Quality Check)")
print("="*80)

# Get 5 random high-hate examples
import random
random.seed(42)

high_hate_list = [ex for ex in data if ex['hate_speech_score'] > 2.0]
samples = random.sample(high_hate_list, min(5, len(high_hate_list)))

for idx, ex in enumerate(samples, 1):
    print(f"\nSample {idx}:")
    print(f"  Score: {ex['hate_speech_score']:.2f}")
    print(f"  Text: {ex['text'][:200]}..." if len(ex['text']) > 200 else f"  Text: {ex['text']}")
    targets = [col.replace('target_', '') for col in target_cols if ex.get(col) == True]
    if targets:
        print(f"  Targets: {', '.join(targets)}")