In [15]:
# EarShot Project Starter Notebook
# ---------------------------------
# Sections: Data Loading, Preprocessing, Embedding, Vector DB Creation

import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from chromadb.config import Settings
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
import yake

In [16]:
# -------------------------------
# Config
# -------------------------------
DATASET_NAME = "SALT-NLP/silent_signals"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
VECTOR_DB_PATH = "./vector_store"
CLASSIFIER_MODEL = "cardiffnlp/twitter-roberta-base-hate"
# unitary/toxic-bert didnt seem to work that well

In [17]:
# -------------------------------
# Load Dataset
# -------------------------------
print("Loading Silent Signals dataset...")
dataset = load_dataset(DATASET_NAME, split='train')
df = pd.DataFrame(dataset)
# Clean Dataset
# drop the party, chamber, and speaker columns
df = df.drop(columns=['party', 'chamber', 'speaker'])
# Filter if necessary (e.g., only Reddit posts, or only dog-whistle-labeled)


print(f"Loaded {len(df)} labeled entries.")

Loading Silent Signals dataset...
Loaded 16258 labeled entries.


In [18]:
# -------------------------------
# Sentence Embeddings
# -------------------------------
print("Generating embeddings...")
model = SentenceTransformer(EMBEDDING_MODEL_NAME)
texts = df['content'].tolist()  # Adjust column name if needed
embeddings = model.encode(texts, batch_size=32, show_progress_bar=True, convert_to_tensor=True)

Generating embeddings...


Batches:   0%|          | 0/509 [00:00<?, ?it/s]

In [5]:
print("Creating ChromaDB vector store...")
chroma_client = chromadb.Client(Settings(persist_directory=VECTOR_DB_PATH))
# reset the collection if it exists
try:
    chroma_client.delete_collection("earshot")
except Exception as e:
    print("Collection deletion failed or doesn't exist yet:", e)
collection = chroma_client.create_collection("earshot")

# Add embeddings to vector DB (skip numpy by converting to list via torch.tolist())
for i, (text, emb) in enumerate(zip(texts, embeddings)):
    collection.add(
        documents=[text],
        embeddings=[emb.cpu().tolist()],
        ids=[f"doc_{i}"]
    )

print(f"Stored {len(texts)} vectors in ChromaDB.")

Creating ChromaDB vector store...
Collection deletion failed or doesn't exist yet: Collection [earshot] does not exists
Stored 16258 vectors in ChromaDB.


In [20]:
# -------------------------------
# Nearest Neighbor Retrieval
# -------------------------------
def get_neighbors(query_text, k=5):
    query_emb = model.encode([query_text], convert_to_tensor=True)[0].cpu().tolist()
    results = collection.query(query_embeddings=[query_emb], n_results=k)
    return results

# Example usage
example_query = texts[0]
neighbors = get_neighbors(example_query)
print("Nearest Neighbors for:", example_query)
for doc in neighbors['documents'][0]:
    print(" -", doc)

Nearest Neighbors for: Far left SJW/Woke activists use a variety of words with double meanings intentionally, including privilege, inclusion, comfort, and safety. Equity is no different. In the past, equity meant fairness or debt paid off, but under Theory, it brings neo-marxist intent.
 - Far left SJW/Woke activists use a variety of words with double meanings intentionally, including privilege, inclusion, comfort, and safety. Equity is no different. In the past, equity meant fairness or debt paid off, but under Theory, it brings neo-marxist intent.
 - It is time for a revolution but not the lefts version of equity, CRT, socialism, etc, one where people learn the games of the rich to make yourself a weapon of equality.
 - There were always two alt rights. There was Spencer and the Alt-Right of ethnonationalism and there was the catch-all alternative right who were conservatives with ideas that put them outside of the mainstream. Spencer coined the term in 2008.
 - Privilege as used by 

In [21]:
# -------------------------------
# PREDICT Pipeline: Filter + Keyword Extraction
# -------------------------------

# Load classifier manually to bypass numpy issue
print("Loading custom toxicity classifier model...")
tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
classifier_model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_MODEL)
classifier_model.eval()

# Initialize YAKE keyword extractor
kw_extractor = yake.KeywordExtractor(top=5, stopwords=None)

def is_toxic(text, threshold=0.5):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = classifier_model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)[0]
    toxic_score = probs[1].item()  # assuming label 1 = toxic
    return toxic_score >= threshold

def extract_keywords(text):
    return [kw for kw, score in kw_extractor.extract_keywords(text)]
# Run PREDICT on neighbors of example query
print("Running PREDICT on nearest neighbors...")
example_query = texts[0]
neighbors = get_neighbors(example_query, k=5)
predict_results = []

print(f"Retrieved {len(neighbors['documents'][0])} neighbor posts.")

def get_toxic_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = classifier_model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)[0]
    return probs[1].item()

for i, post in enumerate(neighbors['documents'][0]):
    score_raw = get_toxic_score(post)
    print(f"Neighbor {i+1} | Toxicity Score: {score_raw:.3f}")
    if score_raw >= 0.3:
        print(f"Neighbor {i+1} | Toxic: True")
        keywords = extract_keywords(post)
        predict_results.append({
            'post': post,
            'keywords': keywords
        })
    else:
        print(f"Neighbor {i+1} | Toxic: False")

print(f"PREDICT returned {len(predict_results)} toxic posts with keywords.")
for result in predict_results:
    print("Post:", result['post'])
    print("Extracted Keywords:", result['keywords'])

Loading custom toxicity classifier model...


config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Running PREDICT on nearest neighbors...
Retrieved 5 neighbor posts.
Neighbor 1 | Toxicity Score: 0.085
Neighbor 1 | Toxic: False


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Neighbor 2 | Toxicity Score: 0.084
Neighbor 2 | Toxic: False
Neighbor 3 | Toxicity Score: 0.038
Neighbor 3 | Toxic: False
Neighbor 4 | Toxicity Score: 0.056
Neighbor 4 | Toxic: False
Neighbor 5 | Toxicity Score: 0.085
Neighbor 5 | Toxic: False
PREDICT returned 0 toxic posts with keywords.


In [None]:
# -------------------------------
# DIRECT Pipeline: LLM Prompting for Dog Whistle Detection
# -------------------------------

# Set OpenAI API Key from environment
openai.api_key = os.getenv("OPENAI_API_KEY")

def call_llm_direct(post):
    prompt = f"""
You are a content moderation assistant. Identify whether the following post contains any covert hate speech or dog whistles.

Return your response in the following JSON format:
{{
  "is_dog_whistle": true/false,
  "dog_whistle_terms": ["term1", "term2", ...],
  "explanation": "brief explanation of detection or why it's not a dog whistle"
}}

Post: "{post}"
"""
    
    response = openai.ChatCompletion.create(
        model=OPENAI_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message['content']

# Run DIRECT on neighbors of example query
print("\nRunning DIRECT on nearest neighbors...")
direct_results = []

for post in neighbors['documents'][0]:
    try:
        analysis = call_llm_direct(post)
        direct_results.append({
            'post': post,
            'llm_response': analysis
        })
    except Exception as e:
        print("Error calling LLM:", e)

# Show results
for result in direct_results:
    print("\nPost:", result['post'])
    print("LLM Response:", result['llm_response'])