In [15]:
# Supabase client
from supabase import create_client

# Numerical processing
import json
import numpy as np

SUPABASE_URL = "https://rfcmhcxcfnlbfgmlcabi.supabase.co"
SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InJmY21oY3hjZm5sYmZnbWxjYWJpIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTA2MzIzOTMsImV4cCI6MjA2NjIwODM5M30.yi0SMovFIIfQRnzlj1dswkLKfebg2YmFhBbgzIuqVxI"

# Create Supabase client
supabase = create_client(SUPABASE_URL, SUPABASE_ANON_KEY)

# ----------------------------
# Paginated fetch
# ----------------------------
rows = []
page_size = 1000
offset = 0

while True:
    res = (
        supabase
        .table("status_watch_incidents")
        .select("""
            id,
            incident_id,
            source_count,
            topic_weight,
            engine_weight,
            story_size,
            cross_topic_flag,
            regulatory_flag
        """)
        .range(offset, offset + page_size - 1)
        .execute()
    )

    batch = res.data
    if not batch:
        break

    rows.extend(batch)
    offset += page_size

print(f"Fetched {len(rows)} rows")

Fetched 50 rows


In [17]:
# For EXPLORATION only
import pandas as pd

# Convert the list of dicts returned from Supabase into a DataFrame
# Each dict becomes a row, keys become columns (id, score)
df = pd.DataFrame(rows)

# Preview the first few rows to visually inspect the data
df.head()

Unnamed: 0,id,incident_id,source_count,topic_weight,engine_weight,story_size,cross_topic_flag,regulatory_flag
0,bcecb920-0339-4551-ae51-7134741c02d3,windows11-january-2026-update-boot-failure,5,0.8,0.7,5,False,False
1,a4b73344-5d2a-4207-8c3c-5466c7cd0abf,tiktok-data-center-power-outage-us-ownership,2,0.8,0.7,2,False,False
2,d47515fe-58db-41ad-a357-8ff7165831e2,starlink-free-internet-iran-2026-01,3,0.8,1.0,3,False,False
3,21fec133-0b81-4603-99a2-fc43a67a3fc2,anthropic-claude-constitution-update-2026-01-21,3,0.9,0.7,2,False,False
4,e28a69a0-2a92-46e3-a43d-7c7daa0394b3,microsoft-ai-chip-announcement-2026-01-26,3,0.9,0.7,3,False,False


In [19]:
# OR stuff starts
# -----------------------------

import numpy as np
from scipy.optimize import linprog

# ----------------------------
# STEP 1 — Decision variables
# ----------------------------
# Each incident gets a decision variable x_i
# x_i = 1 → we select / surface this incident
# x_i = 0 → we do not select it
n = len(df)

# ----------------------------
# STEP 2 — Objective
# ----------------------------
# Instead of a single "score", we combine multiple explainable features.
# This creates a transparent "value" for each incident.
#
# Think of this as:
# - topic_weight      → how important this topic is (policy decision)
# - engine_weight     → how strong the signal source is
# - source_count      → how well-supported / corroborated the incident is
#
# The weights below are NOT learned.
# They are simple, adjustable policy knobs.

combined_value = (
    1.0 * df["topic_weight"] +     # topic importance
    0.8 * df["engine_weight"] +    # signal strength
    0.6 * df["source_count"]       # corroboration
)

# linprog minimizes by default,
# so we negate the values to simulate maximization
c = -combined_value.values

# ----------------------------
# STEP 3 — Constraints
# ----------------------------
# Reality constraint: we can only handle K incidents at once
K = 10

# Constraint form: sum(x_i) ≤ K
A_ub = [np.ones(n)]
b_ub = [K]

# (Optional later)
# Additional constraints could go here:
# - max 1 per topic
# - penalize very large stories
# - force at least 1 cybersecurity incident
# We are intentionally NOT doing that yet.

# ----------------------------
# STEP 4 — Bounds
# ----------------------------
# Each decision variable must be between 0 and 1
# 0 = not selected, 1 = selected
bounds = [(0, 1)] * n


In [20]:
# ----------------------------
# STEP 5 — Solve the optimization problem
# ----------------------------
# The solver finds the values of x_i that:
# - Maximize total score
# - Respect the constraint (≤ K incidents)
# - Respect variable bounds (0 ≤ x_i ≤ 1)
result = linprog(
    c,
    A_ub=A_ub,
    b_ub=b_ub,
    bounds=bounds,
    method="highs"
)

In [21]:
# ----------------------------
# STEP 6 — Interpret the solution
# ----------------------------
# result.x contains one value per incident (x_i)
# Values close to 1 → selected
# Values close to 0 → not selected
selected_incidents = df.loc[
    result.x > 0.5,
    [
        "incident_id",
        "topic_weight",
        "engine_weight",
        "source_count",
        "story_size"
    ]
]

selected_incidents


Unnamed: 0,incident_id,topic_weight,engine_weight,source_count,story_size
0,windows11-january-2026-update-boot-failure,0.8,0.7,5,5
5,apple-ai-wearable-development-2026-01-21,0.9,0.7,4,2
15,ice_protests_and_controversies_jan2026,0.4,0.7,8,8
19,us_tiktok_takeover_and_upscrolled_surge_jan2026,0.4,0.7,5,4
22,ice_protests_and_tech_industry_response_jan_2026,0.4,0.7,7,7
25,nvidia-invests-2b-coreweave-ai-compute-2026-01,0.9,0.7,4,4
27,tiktok_us_takeover_and_shift_to_upscrolled_jan...,0.4,0.7,5,3
38,xai-eu-investigation-grok-sexualized-deepfakes...,0.9,0.7,5,5
44,games-workshop-ai-usage-ban-2026-01-13,0.9,1.0,3,3
49,voidlink-linux-malware-2026-01,1.0,1.0,3,3
