In [24]:
# Supabase client
from supabase import create_client

# Numerical processing
import json
import numpy as np

SUPABASE_URL = "https://rfcmhcxcfnlbfgmlcabi.supabase.co"
SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InJmY21oY3hjZm5sYmZnbWxjYWJpIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTA2MzIzOTMsImV4cCI6MjA2NjIwODM5M30.yi0SMovFIIfQRnzlj1dswkLKfebg2YmFhBbgzIuqVxI"

# Create Supabase client
supabase = create_client(SUPABASE_URL, SUPABASE_ANON_KEY)

# ----------------------------
# Paginated fetch (Data API)
# ----------------------------
rows = []              # store all fetched rows
page_size = 1000       # Supabase API limit per request
offset = 0             # pagination offset

while True:
    # Fetch a page of rows
    res = (
        supabase
        .table("status_watch_feed_embeddings")
        .select("embedding, metadata, content")
        .range(offset, offset + page_size - 1)
        .execute()
    )

    batch = res.data

    # Stop when no more rows are returned
    if not batch:
        break

    rows.extend(batch)
    offset += page_size

In [25]:
# ----------------------------
# Parse embeddings
# ----------------------------
# Supabase Data API returns embeddings as JSON strings,
# so we must parse them before using NumPy
X = np.vstack([
    json.loads(r["embedding"])   # convert string -> list[float]
    for r in rows
]).astype(float)                 # ensure numeric dtype

# Final sanity check
print(X.shape)   # (n_rows, 1536)

(100, 1536)


In [26]:
# Similarity computation
from sklearn.metrics.pairwise import cosine_similarity

# Pick one reference vector (index 0)
i = 0

# Compute cosine similarity between vector i and all vectors
sims = cosine_similarity([X[i]], X)[0]

# Get indices of top 5 most similar items (excluding itself)
top_indices = sims.argsort()[-6:-1][::-1]

# Inspect metadata of nearest neighbors
top_indices


array([18, 14, 32, 68, 38])

In [27]:
rows[0].keys()

dict_keys(['embedding', 'metadata', 'content'])

In [28]:
# View output
for i in [18, 14, 32, 68, 38]:
    print(i, rows[i]["content"])

18 Feds give Tesla another five weeks to respond to FSD probe Regulators want to know why Tesla's system ignores red lights and runs into traffic.
14 Under Musk, the Grok disaster was inevitable This is The Stepback, a weekly newsletter breaking down one essential story from the tech world. For more on dystopian developments in AI, follow Hayden Field. The Stepback arrives in our subscribers' inboxes at 8AM ET. Opt in for The Stepback here. How it started You could say it all started with Elon Musk's AI [&#8230;]
32 Rocket Report: Ariane 64 to debut soon; India has a Falcon 9 clone too? "We are fundamentally shifting our approach to securing our munitions supply chain."
68 Chinese EVs inch closer to the US as Canada slashes tariffs The country is dropping its import tax from 100% to just 6.1%, with an initial annual cap of 49,000 cars.
38 ChatGPT Go subscription rolls out worldwide at $8, but it'll show you ads OpenAI's $8 ChatGPT Go subscription, which gives you 10x more messages, is 

In [30]:
# K-means
from sklearn.cluster import KMeans

# number of clusters you want (pick a small number first)
k = 5

# run clustering
kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(X)

# labels[i] = cluster id for row i
print(labels)


[2 4 2 4 4 2 2 2 1 2 1 2 2 2 2 4 3 2 2 0 2 2 2 3 4 1 4 0 0 2 3 0 4 1 3 0 0
 0 2 3 2 0 0 0 0 0 0 0 0 3 2 2 2 2 2 2 4 2 3 2 2 1 0 0 1 3 2 4 4 3 3 3 3 4
 4 2 3 3 3 3 3 4 4 4 2 4 4 4 1 3 3 3 3 1 1 1 1 1 1 1]


In [31]:
for cluster_id in range(k):
    print(f"\nCLUSTER {cluster_id}")
    for i, label in enumerate(labels):
        if label == cluster_id:
            print("-", rows[i]["content"])



CLUSTER 0
- Mandiant releases rainbow table that cracks weak admin password in 12 hours Windows laggards still using the vulnerable hashing function: Your days are numbered.
- Judge orders Anna’s Archive to delete scraped data; no one thinks it will comply WorldCat operator hopes default judgment will convince web hosts to take action.
- Rackspace customers grapple with “devastating” email hosting price hike Reseller says Rackspace plans to charge it 706 percent more.
- Why I’m withholding certainty that “precise” US cyber-op disrupted Venezuelan electricity NYT says US hackers were able to turn off power and then quickly turn it back on.
- Microsoft: Some Windows PCs fail to shut down after January update Microsoft has confirmed a new issue that prevents Windows 11 23H2 devices with System Guard Secure Launch enabled from shutting down. [...]
- Cisco finally fixes AsyncOS zero-day exploited since November ​Cisco finally patched a maximum-severity AsyncOS zero-day exploited in attacks

In [34]:
from sklearn.cluster import DBSCAN
import numpy as np
import json

# X already exists: shape (n_rows, 1536)

# --- DBSCAN ---
dbscan = DBSCAN(
    eps=0.8,        # loosen if needed: try 0.6–1.0
    min_samples=3
)

labels = dbscan.fit_predict(X)

# --- See what happened ---
print("Unique labels:", set(labels))


Unique labels: {np.int64(0), np.int64(1), np.int64(-1)}


In [35]:
# --- Print clusters ---
for cluster_id in set(labels):
    if cluster_id == -1:
        continue  # noise

    print(f"\nCLUSTER {cluster_id}")
    for i, label in enumerate(labels):
        if label == cluster_id:
            print("-", rows[i]["content"])


CLUSTER 0
- OpenAI to test ads in ChatGPT as it burns through billions Ads coming to free tier and new $8/month ChatGPT Go plan in US.
- ChatGPT Go subscription rolls out worldwide at $8, but it'll show you ads OpenAI's $8 ChatGPT Go subscription, which gives you 10x more messages, is now available in the United States and other regions. [...]
- OpenAI says its new ChatGPT ads won't influence answers OpenAI has confirmed ChatGPT is getting ads in the coming weeks, but it promises that ads won't influence answers generated by ChatGPT. [...]
- ChatGPT users are about to get hit with targeted ads OpenAI says that users impacted by the ads will have some control over what they see.
- OpenAI brings advertising to ChatGPT in push for new revenue $500bn start-up seeks ways to fund expansion and fend off competition from rivals Google and Anthropic

CLUSTER 1
- Mother of one of Elon Musk’s offspring sues xAI over sexualized deepfakes Claims Grok chatbot created "countless" sexual images of he