# Feature Analysis: Universal Features

This notebook analyzes which SAE features activate for every prompt across different categories of input.

In [1]:
import json
import torch
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Tuple
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import hf_hub_download
from dictionary_learning.utils import load_dictionary
from tqdm.auto import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Configs

In [37]:
# =============================================================================
# MODEL SELECTION - Change this to switch between models
# =============================================================================
MODEL_TYPE = "llama"  # Options: "qwen" or "llama"
TOKEN_TYPE = "asst"  # Options: "asst", "newline", "endheader" (endheader only for llama)
SAE_LAYER = 15
SAE_TRAINER = 1

# =============================================================================
# FEATURE DASHBOARD URL - Global variable for links
# =============================================================================
FEATURE_DASHBOARD_BASE_URL = "https://completely-touched-platypus.ngrok-free.app/"

# =============================================================================
# AUTO-CONFIGURED SETTINGS BASED ON MODEL TYPE
# =============================================================================
if MODEL_TYPE == "qwen":
    MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
    SAE_RELEASE = "andyrdt/saes-qwen2.5-7b-instruct"
    ASSISTANT_HEADER = "<|im_start|>assistant"
    TOKEN_OFFSETS = {"asst": -1, "newline": 0}
    SAE_BASE_PATH = "/workspace/sae/qwen-2.5-7b-instruct/saes"
    
elif MODEL_TYPE == "llama":
    MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
    SAE_RELEASE = "andyrdt/saes-llama-3.1-8b-instruct"
    ASSISTANT_HEADER = "<|start_header_id|>assistant<|end_header_id|>"
    TOKEN_OFFSETS = {"asst": -2, "endheader": -1, "newline": 0}
    SAE_BASE_PATH = "/workspace/sae/llama-3.1-8b-instruct/saes"
    
else:
    raise ValueError(f"Unknown MODEL_TYPE: {MODEL_TYPE}. Use 'qwen' or 'llama'")

# Validate token type
if TOKEN_TYPE not in TOKEN_OFFSETS:
    raise ValueError(f"TOKEN_TYPE '{TOKEN_TYPE}' not available for {MODEL_TYPE}. Available: {list(TOKEN_OFFSETS.keys())}")

# =============================================================================
# DERIVED CONFIGURATIONS
# =============================================================================
SAE_CONFIG = {
    "release": SAE_RELEASE,
    "layer": SAE_LAYER,
    "trainer": SAE_TRAINER
}
SAE_PATH = f"{SAE_BASE_PATH}/resid_post_layer_{SAE_LAYER}/trainer_{SAE_TRAINER}"
LAYER_INDEX = SAE_LAYER
TOKEN_OFFSET = TOKEN_OFFSETS[TOKEN_TYPE]

# Data paths
PROMPTS_PATH = "./prompts"

# Output directory with clear naming
OUTPUT_DIR = f"./{MODEL_TYPE}_trainer{SAE_TRAINER}_layer{SAE_LAYER}_{TOKEN_TYPE}"

# Processing parameters
BATCH_SIZE = 8
MAX_LENGTH = 512
TOP_FEATURES = 100

# =============================================================================
# SUMMARY
# =============================================================================
print(f"Configuration Summary:")
print(f"  Model: {MODEL_NAME}")
print(f"  SAE Layer: {SAE_LAYER}, Trainer: {SAE_TRAINER}")
print(f"  Token extraction: {TOKEN_TYPE} (offset: {TOKEN_OFFSET})")
print(f"  Assistant header: {ASSISTANT_HEADER}")
print(f"  Output directory: {OUTPUT_DIR}")
print(f"  SAE Release: {SAE_RELEASE}")
print(f"  Dashboard base URL: {FEATURE_DASHBOARD_BASE_URL}")

Configuration Summary:
  Model: meta-llama/Llama-3.1-8B-Instruct
  SAE Layer: 15, Trainer: 1
  Token extraction: asst (offset: -2)
  Assistant header: <|start_header_id|>assistant<|end_header_id|>
  Output directory: ./llama_trainer1_layer15_asst
  SAE Release: andyrdt/saes-llama-3.1-8b-instruct
  Dashboard base URL: https://completely-touched-platypus.ngrok-free.app/


## Load Data

In [3]:
def load_prompts(filepath: str) -> pd.DataFrame:
    """Load prompts with labels from JSONL file."""
    prompts = []
    labels = []
    with open(filepath, 'r') as f:
        for line in f:
            data = json.loads(line.strip())
            prompts.append(data['content'])
            labels.append(data['label'])
    return pd.DataFrame({'prompt': prompts, 'label': labels})

# Load prompts from multiple .jsonl files in PROMPTS_PATH into one dataframe
prompts_df = pd.DataFrame()
for file in os.listdir(PROMPTS_PATH):
    if file.endswith(".jsonl"):
        df = load_prompts(os.path.join(PROMPTS_PATH, file))
        prompts_df = pd.concat([prompts_df, df])

print(f"Loaded {prompts_df.shape[0]} prompts")

Loaded 140 prompts


## Load Model and SAE

In [4]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"Tokenizer loaded: {tokenizer.__class__.__name__}")

# Test chat template formatting
test_messages = [{"role": "user", "content": "What's it like to be you?"}]
formatted_test = tokenizer.apply_chat_template(test_messages, tokenize=False, add_generation_prompt=True)
print(f"\nChat template test:")
print(f"Original: What's it like to be you?")
print(f"Formatted: {repr(formatted_test)}")
print(f"Formatted (readable):\n{formatted_test}")

# Test tokenization of assistant header to understand positioning
print(f"\n" + "="*60)
print("ASSISTANT HEADER TOKENIZATION ANALYSIS")
print("="*60)

assistant_tokens = tokenizer.encode(ASSISTANT_HEADER, add_special_tokens=False)
assistant_token_texts = [tokenizer.decode([token]) for token in assistant_tokens]

print(f"Assistant header: {ASSISTANT_HEADER}")
print(f"Number of tokens: {len(assistant_tokens)}")
print(f"Token IDs: {assistant_tokens}")
print(f"Individual tokens: {assistant_token_texts}")

# Test with a full formatted prompt
full_tokens = tokenizer.encode(formatted_test, add_special_tokens=False)
full_token_texts = [tokenizer.decode([token]) for token in full_tokens]

print(f"\nFull prompt tokens: {len(full_tokens)}")
print("All tokens with positions:")
for i, token_text in enumerate(full_token_texts):
    print(f"  {i:2d}: '{token_text}'")

# Find where assistant header appears in full prompt
assistant_start_pos = None
for i in range(len(full_tokens) - len(assistant_tokens) + 1):
    if full_tokens[i:i+len(assistant_tokens)] == assistant_tokens:
        assistant_start_pos = i
        break

if assistant_start_pos is not None:
    assistant_end_pos = assistant_start_pos + len(assistant_tokens) - 1
    print(f"\nAssistant header found at positions {assistant_start_pos} to {assistant_end_pos}")
    print(f"Assistant header tokens: {full_token_texts[assistant_start_pos:assistant_end_pos+1]}")
    
    # Show what the extraction function will actually extract
    extraction_pos = assistant_start_pos + len(assistant_tokens) + TOKEN_OFFSET
    print(f"\nExtraction calculation:")
    print(f"  assistant_start_pos: {assistant_start_pos}")
    print(f"  + len(assistant_tokens): {len(assistant_tokens)}")  
    print(f"  + TOKEN_OFFSET ('{TOKEN_TYPE}'): {TOKEN_OFFSET}")
    print(f"  = extraction_pos: {extraction_pos}")
    
    if 0 <= extraction_pos < len(full_token_texts):
        print(f"✓ Token at extraction position {extraction_pos}: '{full_token_texts[extraction_pos]}'")
    else:
        print(f"❌ Extraction position {extraction_pos} is out of bounds (valid range: 0-{len(full_token_texts)-1})")
else:
    print("❌ Assistant header not found in full prompt")

Tokenizer loaded: PreTrainedTokenizerFast

Chat template test:
Original: What's it like to be you?
Formatted: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's it like to be you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
Formatted (readable):
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

What's it like to be you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>



ASSISTANT HEADER TOKENIZATION ANALYSIS
Assistant header: <|start_header_id|>assistant<|end_header_id|>
Number of tokens: 3
Token IDs: [128006, 78191, 128007]
Individual tokens: ['<|start_header_id|>', 'assistant', '<|end_header_id|>']

Full prompt tokens: 43
All tokens with positions:
   0: '<|begin_of_text|>'
   1: '<|start_header_id|>

In [5]:
# Load model
device_map_value = device.index if device.type == 'cuda' and device.index is not None else str(device)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map={"": device_map_value}
)
model.eval()

print(f"Model loaded: {model.__class__.__name__}")
print(f"Model device: {next(model.parameters()).device}")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model loaded: LlamaForCausalLM
Model device: cuda:0


In [25]:
# Load SAE
ae_file_path = os.path.join(SAE_PATH, "ae.pt")
config_file_path = os.path.join(SAE_PATH, "config.json")

if os.path.exists(ae_file_path) and os.path.exists(config_file_path):
    print(f"✓ Found SAE files at: {os.path.dirname(ae_file_path)}")
else:
    print(f"SAE not found locally, downloading from {SAE_RELEASE}...")
    os.makedirs(os.path.dirname(ae_file_path), exist_ok=True)
    sae_path = f"resid_post_layer_{SAE_LAYER}/trainer_{SAE_TRAINER}"
    local_dir = SAE_BASE_PATH
    ae_file = hf_hub_download(repo_id=SAE_RELEASE, filename=f"{sae_path}/ae.pt", local_dir=local_dir)
    config_file = hf_hub_download(repo_id=SAE_RELEASE, filename=f"{sae_path}/config.json", local_dir=local_dir)

sae, _ = load_dictionary(SAE_PATH, device=device)
sae.eval()

print(f"SAE loaded with {sae.dict_size} features")
print(f"SAE device: {next(sae.parameters()).device}")

SAE not found locally, downloading from andyrdt/saes-llama-3.1-8b-instruct...


ae.pt:   0%|          | 0.00/4.30G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/951 [00:00<?, ?B/s]

SAE loaded with 131072 features
SAE device: cuda:0


## Activation Extraction Functions

In [38]:
class StopForward(Exception):
    """Exception to stop forward pass after target layer."""
    pass

@torch.no_grad()
def extract_activations(prompts: List[str], layer_idx: int) -> torch.Tensor:
    """Extract activations from specified layer for given prompts."""
    all_activations = []
    
    # Get target layer
    target_layer = model.model.layers[layer_idx]
    
    # Process in batches
    for i in tqdm(range(0, len(prompts), BATCH_SIZE), desc="Processing batches"):
        batch_prompts = prompts[i:i+BATCH_SIZE]
        
        # Format prompts as chat messages
        formatted_prompts = []
        for prompt in batch_prompts:
            messages = [{"role": "user", "content": prompt}]
            formatted_prompt = tokenizer.apply_chat_template(
                messages, 
                tokenize=False, 
                add_generation_prompt=True
            )
            formatted_prompts.append(formatted_prompt)
        
        # Tokenize batch
        batch_inputs = tokenizer(
            formatted_prompts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=MAX_LENGTH
        )
        
        # Move to device
        batch_inputs = {k: v.to(device) for k, v in batch_inputs.items()}
        
        # Hook to capture activations
        activations = None
        
        def hook_fn(module, input, output):
            nonlocal activations
            # Output is tuple, take first element (hidden states)
            activations = output[0] if isinstance(output, tuple) else output
            raise StopForward()
        
        # Register hook
        handle = target_layer.register_forward_hook(hook_fn)
        
        try:
            # Forward pass (will be stopped by hook)
            _ = model(**batch_inputs)
        except StopForward:
            pass
        finally:
            handle.remove()
        
        # Extract assistant token positions
        batch_activations = []
        for j, formatted_prompt in enumerate(formatted_prompts):
            # Get attention mask for this item
            attention_mask = batch_inputs["attention_mask"][j]
            
            # Find assistant header position
            assistant_tokens = tokenizer.encode(ASSISTANT_HEADER, add_special_tokens=False)
            input_ids = batch_inputs["input_ids"][j]
            
            # Find where assistant section starts
            assistant_pos = None
            for k in range(len(input_ids) - len(assistant_tokens) + 1):
                if torch.equal(input_ids[k:k+len(assistant_tokens)], torch.tensor(assistant_tokens).to(device)):
                    assistant_pos = k + len(assistant_tokens) + TOKEN_OFFSET
                    break
            
            if assistant_pos is None:
                # Fallback to last non-padding token
                assistant_pos = attention_mask.sum().item() - 1
            
            # Ensure position is within bounds
            max_pos = attention_mask.sum().item() - 1
            assistant_pos = min(assistant_pos, max_pos)
            assistant_pos = max(assistant_pos, 0)
            
            # Extract activation at assistant position
            assistant_activation = activations[j, assistant_pos, :]  # [hidden_dim]
            batch_activations.append(assistant_activation.cpu())
        
        all_activations.extend(batch_activations)
    
    return torch.stack(all_activations, dim=0)

print("Activation extraction functions defined")

Activation extraction functions defined


## Extract Activations

In [39]:
# Extract activations for prompts
print("Extracting activations for prompts...")
activations = extract_activations(prompts_df['prompt'], LAYER_INDEX)
print(f"Activations shape: {activations.shape}")




Extracting activations for prompts...


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

Activations shape: torch.Size([140, 4096])


## Apply SAE to Get Feature Activations

In [40]:
@torch.no_grad()
def get_sae_features(activations: torch.Tensor) -> torch.Tensor:
    """Apply SAE to get feature activations."""
    activations = activations.to(device)
    
    # Process in batches to avoid memory issues
    feature_activations = []
    
    for i in range(0, activations.shape[0], BATCH_SIZE):
        batch = activations[i:i+BATCH_SIZE]
        features = sae.encode(batch)  # [batch, num_features]
        feature_activations.append(features.cpu())
    
    return torch.cat(feature_activations, dim=0)

# Get SAE feature activations
print("Computing SAE features for all prompts...")
features = get_sae_features(activations)
print(f"Features shape: {features.shape}")


Computing SAE features for all prompts...
Features shape: torch.Size([140, 131072])


In [41]:
@torch.no_grad()
def find_universally_active_features(features: torch.Tensor, activation_threshold: float = 0.01) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Find features that are active (above threshold) for every single prompt.
    
    Args:
        features: Feature activations tensor of shape [num_prompts, num_features]
        activation_threshold: Minimum activation value to consider a feature "active"
    
    Returns:
        universal_features: Indices of features that are active for all prompts
        universal_activations: Mean activation values for universal features
    """
    # Check which features are active (above threshold) for each prompt
    active_features = features > activation_threshold  # [num_prompts, num_features]
    
    # Find features that are active for ALL prompts
    universal_mask = torch.all(active_features, dim=0)  # [num_features]
    universal_features = torch.where(universal_mask)[0]  # Indices of universal features
    
    # Get mean activation values for universal features
    universal_activations = features[:, universal_features].mean(dim=0)
    
    return universal_features, universal_activations

# Find universally active features
print("Finding features that activate for every single prompt...")
universal_features, universal_activations = find_universally_active_features(features)

print(f"Found {len(universal_features)} features that are active for all {features.shape[0]} prompts")
print(f"Universal features (indices): {universal_features.tolist()}")
print(f"Mean activation values: {universal_activations.tolist()}")

# Create a summary dataframe
if len(universal_features) > 0:
    universal_df = pd.DataFrame({
        'feature_index': universal_features.tolist(),
        'mean_activation': universal_activations.tolist()
    })
    
    # Sort by mean activation (descending)
    universal_df = universal_df.sort_values('mean_activation', ascending=False)
    
    print("\nUniversal features summary (sorted by mean activation):")
    print(universal_df.to_string(index=False))
    
    # Show distribution of activations for top universal features
    print("\nDetailed activations for top 5 universal features:")
    for i, (feature_idx, mean_act) in enumerate(zip(universal_df['feature_index'].head(5), 
                                                   universal_df['mean_activation'].head(5))):
        feature_activations = features[:, feature_idx]
        print(f"Feature {feature_idx}: mean={mean_act:.4f}, min={feature_activations.min():.4f}, max={feature_activations.max():.4f}")
        print(f"  Activations: {feature_activations.tolist()}")
        print()
else:
    print("No features are active for all prompts with the current threshold.")
    

Finding features that activate for every single prompt...
Found 2 features that are active for all 140 prompts
Universal features (indices): [84223, 128740]
Mean activation values: [1.8158377408981323, 3.660419464111328]

Universal features summary (sorted by mean activation):
 feature_index  mean_activation
        128740         3.660419
         84223         1.815838

Detailed activations for top 5 universal features:
Feature 128740: mean=3.6604, min=2.5141, max=4.5422
  Activations: [4.178600788116455, 3.9269800186157227, 4.542189121246338, 4.380805969238281, 4.497568607330322, 4.102511405944824, 4.526898384094238, 3.919839859008789, 4.296817779541016, 4.348295211791992, 3.8865933418273926, 4.36292839050293, 4.065620422363281, 3.9093990325927734, 4.011754989624023, 4.290411949157715, 4.210723400115967, 4.361583709716797, 4.098278522491455, 4.20365047454834, 3.6394309997558594, 3.058962345123291, 3.7149276733398438, 3.9324755668640137, 3.44687557220459, 3.8433456420898438, 4.159667

In [42]:
# Save results to CSV
results_dir = "./results/general"
os.makedirs(results_dir, exist_ok=True)

# Create filename with specified format
csv_filename = f"{MODEL_TYPE}_trainer1_layer{SAE_LAYER}_{TOKEN_TYPE}.csv"
csv_path = os.path.join(results_dir, csv_filename)

# Prepare data for CSV - now including label-specific universal features
all_results = []

# 1. Find universal features across ALL prompts
print("Finding universal features across all prompts...")
universal_features_all, universal_activations_all = find_universally_active_features(features)

# Store global universal features for exclusion from per-label results
global_universal_features = set()

if len(universal_features_all) > 0:
    global_universal_features = set(universal_features_all.tolist())
    for feature_idx in universal_features_all:
        feature_activations = features[:, feature_idx]
        feature_id = feature_idx.item()
        all_results.append({
            'feature_id': feature_id,
            'activation_mean': feature_activations.mean().item(),
            'activation_max': feature_activations.max().item(),
            'activation_min': feature_activations.min().item(),
            'label': 'universal',
            'chat_desc': '',
            'pt_desc': '',
            'type': '',
            'link': f"{FEATURE_DASHBOARD_BASE_URL}?model={MODEL_TYPE}&layer={SAE_LAYER}&trainer=1&fids={feature_id}"
        })
elif len(universal_features_low) > 0:
    # Use lower threshold results if original threshold found nothing
    global_universal_features = set(universal_features_low.tolist())
    for feature_idx in universal_features_low:
        feature_activations = features[:, feature_idx]
        feature_id = feature_idx.item()
        all_results.append({
            'feature_id': feature_id,
            'activation_mean': feature_activations.mean().item(),
            'activation_max': feature_activations.max().item(),
            'activation_min': feature_activations.min().item(),
            'label': 'universal',
            'chat_desc': '',
            'pt_desc': '',
            'type': '',
            'link': f"{FEATURE_DASHBOARD_BASE_URL}?model={MODEL_TYPE}&layer={SAE_LAYER}&trainer=1&fids={feature_id}"
        })
    print("Note: Using results from lower threshold (0.001) for universal features")

# 2. Find universal features within each label category
print("\nFinding universal features within each label category...")
unique_labels = prompts_df['label'].unique()

for label in unique_labels:
    print(f"  Processing label: {label}")
    # Get indices for this label
    label_indices = prompts_df[prompts_df['label'] == label].index.tolist()
    
    if len(label_indices) > 1:  # Only analyze if there are multiple prompts
        # Get features for this label
        label_features = features[label_indices]
        
        # Find universal features within this label
        label_universal_features, label_universal_activations = find_universally_active_features(label_features)
        
        if len(label_universal_features) == 0:
            # Try with lower threshold
            label_universal_features, label_universal_activations = find_universally_active_features(label_features, activation_threshold=0.001)
            if len(label_universal_features) > 0:
                print(f"    Found {len(label_universal_features)} universal features for '{label}' (threshold 0.001)")
            else:
                print(f"    No universal features found for '{label}' even with lower threshold")
        else:
            print(f"    Found {len(label_universal_features)} universal features for '{label}'")
        
        # Add to results, but exclude global universal features
        label_specific_count = 0
        for feature_idx in label_universal_features:
            feature_id = feature_idx.item()
            # Skip if this feature is already in global universal features
            if feature_id not in global_universal_features:
                feature_activations = label_features[:, feature_idx]
                all_results.append({
                    'feature_id': feature_id,
                    'activation_mean': feature_activations.mean().item(),
                    'activation_max': feature_activations.max().item(),
                    'activation_min': feature_activations.min().item(),
                    'label': label,
                    'chat_desc': '',
                    'pt_desc': '',
                    'type': '',
                    'link': f"{FEATURE_DASHBOARD_BASE_URL}?model={MODEL_TYPE}&layer={SAE_LAYER}&trainer=1&fids={feature_id}"
                })
                label_specific_count += 1
        
        if label_specific_count < len(label_universal_features):
            excluded_count = len(label_universal_features) - label_specific_count
            print(f"    Excluded {excluded_count} features that are already global universal features")
        
    else:
        print(f"    Skipping '{label}' - only {len(label_indices)} prompt(s)")

# Create DataFrame and sort with universal features on top
if all_results:
    results_df = pd.DataFrame(all_results)
    # Create a custom sort key to put 'universal' first, then alphabetical
    results_df['sort_key'] = results_df['label'].apply(lambda x: '0' if x == 'universal' else '1' + x)
    results_df = results_df.sort_values(['sort_key', 'activation_mean'], ascending=[True, False])
    # Drop the temporary sort key column
    results_df = results_df.drop('sort_key', axis=1)
    
    print(f"\nTotal universal features found: {len(results_df)}")
    print("Summary by label:")
    label_counts = results_df['label'].value_counts()
    for label, count in label_counts.items():
        print(f"  {label}: {count} features")
    
else:
    # No universal features found at all
    results_df = pd.DataFrame(columns=['feature_id', 'activation_mean', 'activation_max', 'activation_min', 'label', 'chat_desc', 'pt_desc', 'type', 'link'])
    print("Warning: No universal features found")

# Save to CSV
results_df.to_csv(csv_path, index=False)

print(f"\nResults saved to: {csv_path}")
print(f"Number of universal features saved: {len(results_df)}")

if len(results_df) > 0:
    print(f"\nPreview of saved data:")
    print(results_df.head(10).to_string(index=False))
    
    # Show sample link
    if len(results_df) > 0:
        sample_link = results_df.iloc[0]['link']
        print(f"\nSample dashboard link: {sample_link}")

Finding universal features across all prompts...

Finding universal features within each label category...
  Processing label: code
    Found 6 universal features for 'code'
    Excluded 2 features that are already global universal features
  Processing label: math
    Found 6 universal features for 'math'
    Excluded 2 features that are already global universal features
  Processing label: medical
    Found 6 universal features for 'medical'
    Excluded 2 features that are already global universal features
  Processing label: analysis
    Found 6 universal features for 'analysis'
    Excluded 2 features that are already global universal features
  Processing label: therapy
    Found 6 universal features for 'therapy'
    Excluded 2 features that are already global universal features
  Processing label: creative
    Found 6 universal features for 'creative'
    Excluded 2 features that are already global universal features
  Processing label: trivia
    Found 6 universal features for