<a href="https://colab.research.google.com/github/montben/ContentModAPI/blob/main/notebooks/simplified_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simplified Content Moderation Training

This notebook trains a BERT model using a **single comprehensive dataset** (Civil Comments).

**🚀 Best run on Google Colab for free GPU!**

## What Changed?
- ✅ **ONE dataset** instead of 5+ mixed datasets
- ✅ **6 labels** instead of 8 (removed self_harm and spam)
- ✅ **Consistent labeling** from a single source
- ✅ **Better training** with cleaner data

## Labels:
1. Toxicity - General toxic language
2. Hate Speech - Identity-based attacks
3. Harassment - Personal attacks
4. Violence - Threats and violent content
5. Sexual - Sexual content
6. Profanity - Explicit language

---

## ⚙️ Colab Setup Instructions

1. **Enable GPU**: Runtime → Change runtime type → Hardware accelerator → GPU
2. **Run all cells** in order
3. Training will take ~30-45 minutes with GPU


In [None]:
# ====================================
# STEP 0: Clone Repository (Colab Only)
# ====================================
# Run this cell if you're on Colab

import os
import sys

# Check if we're in Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("🔵 Running on Google Colab")

    # Clone the repository if not already cloned
    if not os.path.exists('/content/ContentModAPI'):
        print("📥 Cloning repository...")
        !git clone https://github.com/YOUR_GITHUB_USERNAME/ContentModAPI.git /content/ContentModAPI
        print("✅ Repository cloned!")
    else:
        print("✅ Repository already cloned!")

    # Change to project directory
    os.chdir('/content/ContentModAPI')
    print(f"📁 Working directory: {os.getcwd()}")

    # Install dependencies
    print("\n📦 Installing dependencies...")
    !pip install -q transformers datasets torch scikit-learn pandas numpy matplotlib seaborn
    print("✅ Dependencies installed!")
else:
    print("🟢 Running locally")

# Check GPU
import torch
print(f"\n🚀 GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU device: {torch.cuda.get_device_name(0)}")
else:
    print("   ⚠️  No GPU detected. Training will be slow.")
    print("   💡 In Colab: Runtime → Change runtime type → GPU")


In [None]:
# ====================================
# STEP 1: Import Libraries
# ====================================

import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

# Add project to path
project_root = os.getcwd()
if project_root not in sys.path:
    sys.path.insert(0, project_root)

print("✅ All imports loaded!")
print(f"📁 Working from: {os.getcwd()}")


## Step 2: Download Dataset

Download Civil Comments dataset from HuggingFace (~5-10 minutes for full dataset).

**Options:**
- `sample_size=10000` - Quick test (recommended for first run)
- `sample_size=None` - Full dataset (~300k samples, best results)


In [None]:
# ====================================
# STEP 2: Download Civil Comments Dataset
# ====================================

from scripts.data_collection.download_civil_comments import download_civil_comments

# Check if already downloaded
dataset_path = Path("data/datasets/civil_comments/processed_data.csv")

if not dataset_path.exists():
    print("📥 Downloading Civil Comments dataset from HuggingFace...")
    print("This will take ~5-10 minutes depending on sample size...")
    print()

    # Download dataset with BALANCED sampling
    # This gives you 60% toxic, 40% safe = much more efficient training!
    download_civil_comments(
        output_dir="data/datasets",
        sample_size=15000,  # With balanced=True, 15k is plenty!
        balanced=True  # ⭐ This is the magic - balances toxic vs safe samples
    )
    print("\n✅ Download complete!")
else:
    print("✅ Dataset already downloaded!")

# Show what we have
if dataset_path.exists():
    df_check = pd.read_csv(dataset_path)
    print(f"\n📊 Dataset ready: {len(df_check):,} samples")

    # Show toxic vs safe distribution
    label_cols = ['toxicity', 'hate_speech', 'harassment', 'violence', 'sexual', 'profanity']
    has_any_label = df_check[label_cols].any(axis=1)
    print(f"   Toxic samples: {has_any_label.sum():,} ({has_any_label.sum()/len(df_check)*100:.1f}%)")
    print(f"   Safe samples: {(~has_any_label).sum():,} ({(~has_any_label).sum()/len(df_check)*100:.1f}%)")
    del df_check


## Step 3: Load and Explore Data


In [None]:
# Load the dataset
df = pd.read_csv("data/datasets/civil_comments/processed_data.csv")

print(f"📊 Dataset size: {len(df):,} samples")
print(f"\n📋 Columns: {list(df.columns)}")
print(f"\n🔍 First few rows:")
df.head()


In [None]:
# Label distribution
from scripts.preprocessing.label_schema import LABEL_SCHEMA
import matplotlib.pyplot as plt

label_columns = list(LABEL_SCHEMA.keys())
label_counts = df[label_columns].sum()

print("📊 Label Distribution:")
for label, count in label_counts.items():
    percentage = (count / len(df)) * 100
    print(f"  {label}: {count:,} ({percentage:.2f}%)")

# Visualize
plt.figure(figsize=(10, 6))
label_counts.plot(kind='bar')
plt.title('Label Distribution - Civil Comments Dataset')
plt.xlabel('Label')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## Step 3: Train Model

Use the training script to train BERT on this single dataset.


## Step 4: Create Train/Val/Test Splits


In [None]:
# ====================================
# STEP 4: Create Data Splits
# ====================================

from sklearn.model_selection import train_test_split

# Create splits: 80% train, 10% val, 10% test
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Add split column
train_df = train_df.copy()
val_df = val_df.copy()
test_df = test_df.copy()

train_df['split'] = 'train'
val_df['split'] = 'val'
test_df['split'] = 'test'

# Combine back
final_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

print(f"📈 Data Split:")
print(f"  Train: {len(train_df):6,} samples ({len(train_df)/len(final_df)*100:.1f}%)")
print(f"  Val:   {len(val_df):6,} samples ({len(val_df)/len(final_df)*100:.1f}%)")
print(f"  Test:  {len(test_df):6,} samples ({len(test_df)/len(final_df)*100:.1f}%)")

# Save to disk
output_file = "data/datasets/civil_comments_with_splits.csv"
final_df.to_csv(output_file, index=False)
print(f"\n💾 Saved split dataset to: {output_file}")


## Step 5: Train BERT Model

Now we'll train the BERT model on our dataset. This will take ~30-45 minutes with GPU.


In [None]:
# ====================================
# STEP 5: Train BERT Model
# ====================================

from scripts.training.train_bert import MultiLabelTrainer

# Initialize trainer
print("🤖 Initializing BERT model...")
trainer = MultiLabelTrainer(
    model_name="bert-base-uncased",
    num_labels=6  # Our 6-label schema
)

# Load model
trainer.load_model()
print("✅ Model loaded!")
print(f"   Model: {trainer.model_name}")
print(f"   Labels: {trainer.num_labels}")
print(f"   Parameters: {sum(p.numel() for p in trainer.model.parameters()):,}")


In [None]:
# Prepare dataset for training
print("📊 Preparing dataset...")
dataset = trainer.prepare_dataset(output_file)
print("✅ Dataset prepared!")

print(f"\nDataset splits:")
for split, data in dataset.items():
    print(f"  {split:12} {len(data):6,} samples")


In [None]:
# Training configuration
config = {
    'epochs': 2,  # Start with 2 epochs for testing
    'batch_size': 16,  # Reduce to 8 if you run out of GPU memory
    'eval_batch_size': 32,
    'learning_rate': 2e-5,
    'warmup_steps': 500,
    'weight_decay': 0.01,
    'use_wandb': False  # Set to True if you want experiment tracking
}

output_dir = "artifacts/models/bert-multilabel"

print("🏋️ Starting training...")
print(f"Configuration:")
for key, value in config.items():
    print(f"  {key}: {value}")
print(f"\nOutput directory: {output_dir}")
print("\n⏰ This will take ~30-45 minutes with GPU...")
print("☕ Grab some coffee!\n")

# Train the model
trained_model = trainer.train(dataset, output_dir, config)

print("\n✅ Training complete!")


## Step 6: Evaluate Model

Let's see how well our model performed!


In [None]:
# ====================================
# STEP 6: Evaluate Model
# ====================================

print("📊 Evaluating on test set...")
test_results = trained_model.evaluate(dataset['test'])

print("\n📈 Test Results:")
print("="*60)

# Show F1 scores
f1_metrics = {k: v for k, v in test_results.items() if 'f1' in k}
for metric, value in sorted(f1_metrics.items()):
    metric_name = metric.replace('eval_', '')
    stars = "⭐" * int(value * 5)  # Visual rating
    print(f"{metric_name:20} {value:.4f} {stars}")

# Save results
import json
results_path = Path(output_dir) / "test_results.json"
with open(results_path, 'w') as f:
    json.dump(test_results, f, indent=2)

print(f"\n💾 Results saved to: {results_path}")


## Step 7: Test the Model

Try the model on some example texts!


In [None]:
# ====================================
# STEP 7: Test the Model
# ====================================

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load trained model
print("📦 Loading trained model...")
model = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)
model.eval()

# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(f"✅ Model loaded on: {device}")

def predict(text):
    """Predict labels for a given text."""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.sigmoid(outputs.logits)

    # Get predictions
    results = {}
    for i, label in enumerate(LABEL_SCHEMA.keys()):
        prob = predictions[0][i].item()
        results[label] = {
            'probability': prob,
            'flagged': prob > 0.5
        }

    return results

# Test examples
test_texts = [
    "I love this community, everyone is so helpful and kind!",
    "You're a complete idiot and should just shut up",
    "I disagree with your opinion but respect your viewpoint",
    "This is absolute garbage and you're an incompetent moron",
]

print("\n🧪 Testing model on example texts:\n")
print("="*70)

for i, text in enumerate(test_texts, 1):
    print(f"\n📝 Example {i}:")
    print(f"   Text: \"{text}\"")
    results = predict(text)

    flagged_labels = [label for label, info in results.items() if info['flagged']]

    if flagged_labels:
        print(f"   ⚠️  FLAGGED: {', '.join(flagged_labels)}")
        for label in flagged_labels:
            print(f"      - {label}: {results[label]['probability']:.3f}")
    else:
        print(f"   ✅ SAFE (no flags)")

print("\n" + "="*70)


In [None]:
# For full training, use the command line:
# python scripts/training/train_bert.py --data-path data/datasets/civil_comments/processed_data.csv

print("✅ Training simplified!")
print("\nTo train the model, run:")
print("  python scripts/training/train_bert.py \\")
print("    --data-path data/datasets/civil_comments/processed_data.csv \\")
print("    --epochs 3 \\")
print("    --batch-size 16")
print("\nThis will train a BERT model on the Civil Comments dataset.")
