In [1]:
# EarShot Project Starter Notebook
# ---------------------------------
# Sections: Data Loading, Preprocessing, Embedding, Vector DB Creation

import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from chromadb.config import Settings
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
import yake

In [2]:
# -------------------------------
# Config
# -------------------------------
DATASET_NAME = "SALT-NLP/silent_signals"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
VECTOR_DB_PATH = "./vector_store"
CLASSIFIER_MODEL = "tomh/toxigen_hatebert"
# unitary/toxic-bert didnt seem to work that well

In [3]:
# -------------------------------
# Load Dataset
# -------------------------------
print("Loading Silent Signals dataset...")
dataset = load_dataset(DATASET_NAME, split='train')
df = pd.DataFrame(dataset)
# Clean Dataset
# drop the party, chamber, and speaker columns
df = df.drop(columns=['party', 'chamber', 'speaker'])
# Filter if necessary (e.g., only Reddit posts, or only dog-whistle-labeled)


print(f"Loaded {len(df)} labeled entries.")

Loading Silent Signals dataset...
Loaded 16258 labeled entries.


In [4]:
# -------------------------------
# Sentence Embeddings
# -------------------------------
print("Generating embeddings...")
model = SentenceTransformer(EMBEDDING_MODEL_NAME)
texts = df['content'].tolist()  # Adjust column name if needed
embeddings = model.encode(texts, batch_size=32, show_progress_bar=True, convert_to_tensor=True)

Generating embeddings...


Batches:   0%|          | 0/509 [00:00<?, ?it/s]

In [5]:
print("Creating ChromaDB vector store...")
chroma_client = chromadb.Client(Settings(persist_directory=VECTOR_DB_PATH))
# reset the collection if it exists
try:
    chroma_client.delete_collection("earshot")
except Exception as e:
    print("Collection deletion failed or doesn't exist yet:", e)
collection = chroma_client.create_collection("earshot")

# Add embeddings to vector DB (skip numpy by converting to list via torch.tolist())
for i, (text, emb) in enumerate(zip(texts, embeddings)):
    collection.add(
        documents=[text],
        embeddings=[emb.cpu().tolist()],
        ids=[f"doc_{i}"]
    )

print(f"Stored {len(texts)} vectors in ChromaDB.")

Creating ChromaDB vector store...
Collection deletion failed or doesn't exist yet: Collection [earshot] does not exists
Stored 16258 vectors in ChromaDB.


In [6]:
# -------------------------------
# Nearest Neighbor Retrieval
# -------------------------------
def get_neighbors(query_text, k=5):
    query_emb = model.encode([query_text], convert_to_tensor=True)[0].cpu().tolist()
    results = collection.query(query_embeddings=[query_emb], n_results=k)
    return results

# Example usage
example_query = texts[12]
neighbors = get_neighbors(example_query)
print("Nearest Neighbors for:", example_query)
for doc in neighbors['documents'][0]:
    print(" -", doc)

Nearest Neighbors for: I'm buying a copy of this book and gifting it to every centipede I know in BASED Tuscola County for every Milo hater in this thread. It looks like I'm already up to quite a few copies.
 - I'm buying a copy of this book and gifting it to every centipede I know in BASED Tuscola County for every Milo hater in this thread. It looks like I'm already up to quite a few copies.
 - gay centipedes have every reason to be proud today. We have based Milo who is not going to back down in the face of gun violence.
 - I pre-ordered his book hours after it was announced. I urge all other centipedes to support the cause :-)
 - Milo, as every homosexual, is a broken person, a product of a broken family and childhood environment. The result of that brokenness is depravity.... I'm not a fan of milo in any sense of the word.
 - same here ~ *Milo the Fabulous* taught me about **Andrew Breitbart** rather early on during #GamerGate, and I'm so glad he did ❤🎮❤ USA 🏴 🏁 🏳


In [None]:
# -------------------------------
# PREDICT Pipeline: Filter + Keyword Extraction
# -------------------------------

# Load classifier manually to bypass numpy issue
print("Loading custom toxicity classifier model...")
tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
# Add padding token if missing
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load model and resize its embedding matrix
classifier_model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_MODEL)
classifier_model.resize_token_embeddings(len(tokenizer))
classifier_model.eval()

# Initialize YAKE keyword extractor
kw_extractor = yake.KeywordExtractor(top=5, stopwords=None)

def is_toxic(text, threshold=0.5):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = classifier_model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)[0]
    toxic_score = probs[1].item()  # assuming label 1 = toxic
    return toxic_score >= threshold

def extract_keywords(text):
    return [kw for kw, score in kw_extractor.extract_keywords(text)]

# Run PREDICT on neighbors of example query
print("Running PREDICT on nearest neighbors...")
example_query = texts[12]
neighbors = get_neighbors(example_query, k=5)
predict_results = []

print(f"Retrieved {len(neighbors['documents'][0])} neighbor posts.")

def get_toxic_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = classifier_model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)[0]
    return probs[1].item()

for i, post in enumerate(neighbors['documents'][0]):
    score_raw = get_toxic_score(post)
    print(f"Neighbor {i+1} | Toxicity Score: {score_raw:.3f}")
    if score_raw >= 0.3:
        print(f"Neighbor {i+1} | Toxic: True")
        keywords = extract_keywords(post)
        predict_results.append({
            'post': post,
            'keywords': keywords
        })
    else:
        print(f"Neighbor {i+1} | Toxic: False")

print(f"PREDICT returned {len(predict_results)} toxic posts with keywords.")
for result in predict_results:
    print("Post:", result['post'])
    print("Extracted Keywords:", result['keywords'])

Loading custom toxicity classifier model...


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Running PREDICT on nearest neighbors...
Retrieved 5 neighbor posts.
Neighbor 1 | Toxicity Score: 0.970
Neighbor 1 | Toxic: True
Neighbor 2 | Toxicity Score: 0.710
Neighbor 2 | Toxic: True
Neighbor 3 | Toxicity Score: 0.008
Neighbor 3 | Toxic: False
Neighbor 4 | Toxicity Score: 0.155
Neighbor 4 | Toxic: False
Neighbor 5 | Toxicity Score: 0.030
Neighbor 5 | Toxic: False
PREDICT returned 2 toxic posts with keywords.
Post: I'm buying a copy of this book and gifting it to every centipede I know in BASED Tuscola County for every Milo hater in this thread. It looks like I'm already up to quite a few copies.
Extracted Keywords: ['BASED Tuscola County', 'BASED Tuscola', 'Tuscola County', 'Milo hater', 'buying a copy']
Post: gay centipedes have every reason to be proud today. We have based Milo who is not going to back down in the face of gun violence.
Extracted Keywords: ['gay centipedes', 'proud today', 'gay', 'today', 'based Milo']


In [None]:
# -------------------------------
# DIRECT Pipeline: LLM Prompting for Dog Whistle Detection
# -------------------------------
import openai
import os
from dotenv import load_dotenv
load_dotenv()
client = openai.OpenAI()

def call_llm_direct(post):
    prompt = f"""
You are a content moderation assistant. Identify whether the following post contains any covert hate speech or dog whistles.

Return your response in the following JSON format:
{{
  "is_dog_whistle": true/false,
  "dog_whistle_terms": ["term1", "term2", ...],
  "explanation": "brief explanation of detection or why it's not a dog whistle"
}}

Post: "{post}"
"""

    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

# Run DIRECT on neighbors of example query
print("\nRunning DIRECT on nearest neighbors...")
direct_results = []

for post in neighbors['documents'][0]:
    try:
        analysis = call_llm_direct(post)
        direct_results.append({
            'post': post,
            'llm_response': analysis
        })
    except Exception as e:
        print("Error calling LLM:", e)

# Show results
for result in direct_results:
    print("\nPost:", result['post'])
    print("LLM Response:", result['llm_response'])


Running DIRECT on nearest neighbors...

Post: I'm buying a copy of this book and gifting it to every centipede I know in BASED Tuscola County for every Milo hater in this thread. It looks like I'm already up to quite a few copies.
LLM Response: ```json
{
  "is_dog_whistle": true,
  "dog_whistle_terms": ["centipede", "BASED", "Milo"],
  "explanation": "The term 'centipede' is often used in alt-right circles to refer to supporters of Donald Trump, originating from a meme. 'BASED' is slang used in online communities to describe someone who is seen as being true to themselves and not caring about how others perceive them, often used in a context that aligns with far-right ideologies. 'Milo' likely refers to Milo Yiannopoulos, a controversial figure known for his far-right views. The combination of these terms suggests an alignment with alt-right or far-right ideologies, which can be considered a form of covert hate speech or dog whistle."
}
```

Post: gay centipedes have every reason to b