# Setup

In [1]:
from datetime import datetime

print(
    f"‚úÖ Finished running signals feed at "
    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)

‚úÖ Finished running signals feed at 2026-02-06 19:22:34


## Import libraries

In [2]:
from dotenv import load_dotenv
load_dotenv()

import requests
import re
import json
import pandas as pd
import numpy as np
import asyncio
import anthropic
from anthropic import AsyncAnthropic
import pyperclip
from supabase import create_client, Client
import time
from typing import List, Dict
from google import genai
from openai import OpenAI
import asyncio
from openai import AsyncOpenAI
import math
from datetime import datetime, timezone
import os
from bs4 import BeautifulSoup
from newspaper import Article
from playwright.sync_api import sync_playwright
import json5
import ast
import asyncio
import google.generativeai as genai


# Supabase API
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_KEY = os.environ["SUPABASE_KEY"]
SERVICE_ROLE_KEY = os.environ["SUPABASE_SERVICE_ROLE_KEY"]

supabase: Client = create_client(SUPABASE_URL, SERVICE_ROLE_KEY)

## Clean up JSON function

In [3]:
def safe_json_loads(x):
    if not isinstance(x, str):
        return x
    
    # Fix invalid \uXXXX escapes ‚Äî replace with a safe placeholder
    x = re.sub(r'\\u(?![0-9a-fA-F]{4})', r'\\uFFFF', x)

    try:
        return json.loads(x)
    except Exception as e:
        print("‚ùå JSON decode failed:", e)
        print("Offending value:", x[:300])
        return None

## Initiate AI models

In [4]:
ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
OPENAI_MODEL = "gpt-5-mini"

# OpenAI key
openai_api_key = os.environ["OPENAI_API_KEY"]

# Anthropic
anthropic_api_key = os.environ["ANTHROPIC_API_KEY"]
client = anthropic.Anthropic(
    api_key=anthropic_api_key,
)

# Gemini Key
google_api_key = os.environ["GOOGLE_API_KEY"]

# System prompt
system = f"""

You are a research analyst doing competitive intelligence research for a client.

"""

# Grab data

In [5]:
def fetch_all_rows(table, filters=None, batch_size=1000):
    all_rows = []
    start = 0

    while True:
        query = supabase.table(table).select("*").order("id", desc=True)

        # Apply filters if provided
        if filters:
            for col, val in filters.items():
                if isinstance(val, list):
                    query = query.in_(col, val)
                else:
                    query = query.eq(col, val)

        # Pagination block
        query = query.range(start, start + batch_size - 1)

        resp = query.execute()
        data = resp.data or []
        all_rows.extend(data)

        # Stop if fewer than batch_size returned
        if len(data) < batch_size:
            break

        start += batch_size

    return all_rows

tables = ["news_feed", "linkedin_feed", "reddit_posts", "jobs", "ad_library", "companies", "competitors", "signals"]
feeds = {}

for table in tables:
    # Apply filters only for content tables
    if table not in ["companies", "competitors", "signals"]:
        filters = {
            "relevant": True,
            "processed": False
        }
    else:
        filters = None

    rows = fetch_all_rows(table, filters=filters)
    feeds[table] = pd.DataFrame(rows)

    print(f"‚úÖ Retrieved {len(rows)} rows from {table}." if rows else f"‚ö†Ô∏è No rows in {table}.")

# ---------- Assign DataFrames ----------
news_feed = feeds["news_feed"]
linkedin_feed = feeds["linkedin_feed"]
reddit_posts = feeds["reddit_posts"]
jobs = feeds["jobs"]
ads = feeds["ad_library"]
existing_signals = feeds["signals"]

companies = feeds["companies"]
competitors = feeds["competitors"]

‚úÖ Retrieved 55 rows from news_feed.
‚úÖ Retrieved 7 rows from linkedin_feed.
‚úÖ Retrieved 4 rows from reddit_posts.
‚úÖ Retrieved 79 rows from jobs.
‚úÖ Retrieved 111 rows from ad_library.
‚úÖ Retrieved 12 rows from companies.
‚úÖ Retrieved 60 rows from competitors.
‚úÖ Retrieved 931 rows from signals.


# Combine data feeds into one dataframe

## Add company id to dfs

In [6]:
# Merge company status into competitors
competitors_with_status = competitors.merge(
    companies[["id", "status"]],
    left_on="company_id",
    right_on="id",
    how="left",
    suffixes=("", "_company")
)

# Build lookup
competitor_lookup = competitors_with_status.set_index("id")[["company_id", "competitor_name", "status"]].to_dict(orient="index")

def append_company_and_competitor(df):
    # üß§ Handle empty DataFrame early
    if df.empty:
        print(f"‚ö†Ô∏è DataFrame is empty ‚Äî skipping append.")
        return df

    if "competitor_id" not in df.columns:
        print("‚ö†Ô∏è No 'competitor_id' column ‚Äî skipping append.")
        return df

    df["company_id"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("company_id", 0))
    df["competitor_name"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("competitor_name", "Unknown"))
    df["status"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("status", "Unknown"))
    df["company_id"] = df["company_id"].astype("Int64")
    return df

# Apply to feeds
news_feed = append_company_and_competitor(news_feed)
linkedin_feed = append_company_and_competitor(linkedin_feed)
jobs = append_company_and_competitor(jobs)
ads = append_company_and_competitor(ads)
print(f"Company ids added to dataframes...")

Company ids added to dataframes...


## Convert news feed to json ready df

In [7]:
news_feed_json = pd.DataFrame()

if news_feed is not None and not news_feed.empty:
    # Ensure required columns exist
    required_cols = ["id", "competitor_id", "company_id", "url", "title", "content", "description"]
    missing_cols = [col for col in required_cols if col not in news_feed.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in news_feed: {missing_cols}")
    else:
        news_feed_json["id"] = news_feed["id"]
        news_feed_json["competitor_id"] = news_feed["competitor_id"]
        news_feed_json["company_id"] = news_feed["company_id"]
        news_feed_json["content"] = news_feed["content"]
        news_feed_json["source"] = "news"

        news_feed_json["content_json"] = news_feed.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "url": row["url"],
                "title": row["title"],
                "content": row["content"],
                "description": row["description"],
                "competitor_id": row["competitor_id"],
                "source": "news"
            }),
            axis=1
        )

        print(f"‚úÖ Newsfeed content converted to json for {len(news_feed_json)} records.")
else:
    print("‚ö†Ô∏è news_feed is empty ‚Äî skipping JSON conversion.")

‚úÖ Newsfeed content converted to json for 55 records.


## Convert linkedin feed to json ready df

In [8]:
linkedin_feed_json = pd.DataFrame()

if linkedin_feed is not None and not linkedin_feed.empty:
    # Ensure required columns exist
    required_cols = ["id", "competitor_id", "author_fullName", "text", "postUrl"]
    missing_cols = [col for col in required_cols if col not in linkedin_feed.columns]
    
    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in linkedin_feed: {missing_cols}")
    else:
        linkedin_feed_json["id"] = linkedin_feed["id"]
        linkedin_feed_json["competitor_id"] = linkedin_feed["competitor_id"]
        linkedin_feed_json["company_id"] = linkedin_feed["company_id"]
        linkedin_feed_json["source"] = "linkedin"

        linkedin_feed_json["content_json"] = linkedin_feed.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "author_fullName": row["author_fullName"],
                "text": row["text"],
                "url": row["postUrl"],
                "competitor_id": row["competitor_id"],
                "source": "linkedin"
            }),
            axis=1
        )
        print("‚úÖ Linkedin content converted to json...")
else:
    print("‚ö†Ô∏è linkedin_feed is empty ‚Äî skipping JSON conversion")

‚úÖ Linkedin content converted to json...


## Convert reddit feed to json ready df

In [9]:
reddit_posts_json = pd.DataFrame()

if reddit_posts is not None and not reddit_posts.empty:
    required_cols = ["id", "company_id", "post_selftext", "post_url"]
    missing_cols = [col for col in required_cols if col not in reddit_posts.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in reddit_posts: {missing_cols}")
    else:
        reddit_posts_json["id"] = reddit_posts["id"]
        reddit_posts_json["competitor_id"] = reddit_posts["company_id"]
        reddit_posts_json["company_id"] = reddit_posts["company_id"] 
        reddit_posts_json["source"] = "reddit"

        reddit_posts_json["content_json"] = reddit_posts.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "text": row["post_selftext"],
                "url": row["post_url"],
                "competitor_id": row["company_id"],
                "company_id": row["company_id"],
                "source": "reddit"
            }),
            axis=1
        )

        print(f"‚úÖ {len(reddit_posts_json)} Reddit records converted...")
else:
    print("‚ö†Ô∏è reddit_posts is empty ‚Äî skipping JSON conversion.")

‚úÖ 4 Reddit records converted...


## Convert jobs feed to json ready df

In [10]:
jobs_json = pd.DataFrame()

if jobs is not None and not jobs.empty:
    required_cols = ["id", "competitor_id", "company_id", "title", "description", "url"]
    missing_cols = [col for col in required_cols if col not in jobs.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in jobs: {missing_cols}")
    else:
        jobs_json["id"] = jobs["id"]
        jobs_json["competitor_id"] = jobs["competitor_id"]
        jobs_json["company_id"] = jobs["company_id"]
        jobs_json["source"] = "jobs"

        jobs_json["content_json"] = jobs.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "title": row["title"],
                "description": row["description"],
                "url": row["url"],
                "competitor_id": row["competitor_id"],
                "source": "jobs"
            }),
            axis=1
        )

        print(f"‚úÖ {len(jobs_json)} job postings converted to json...")
else:
    print("‚ö†Ô∏è jobs is empty ‚Äî skipping JSON conversion.")

‚úÖ 79 job postings converted to json...


## Convert ads feed to json ready df

In [11]:
ads_json = pd.DataFrame()

if ads is not None and not ads.empty:
    required_cols = ["id", "competitor_id", "json_response", ]
    missing_cols = [col for col in required_cols if col not in ads.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in jobs: {missing_cols}")
    else:
        ads_json["id"] = ads["id"]
        ads_json["competitor_id"] = ads["competitor_id"]
        ads_json["company_id"] = ads["company_id"]
        ads_json["source"] = "ads"

        ads_json["content_json"] = ads.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "json_response": row["json_response"],
                "source": "ads"
            }),
            axis=1
        )

        print(f"‚úÖ {len(ads_json)} ads converted to json...")
else:
    print("‚ö†Ô∏è ads is empty ‚Äî skipping JSON conversion.")

‚úÖ 111 ads converted to json...


## Combine dataframes into one

In [12]:
data_feed_combined = pd.concat([
    news_feed_json,
    linkedin_feed_json,
    reddit_posts_json,
    jobs_json,
    ads_json
], ignore_index=True)
print(f"‚úÖ Data frame combination completed with {len(data_feed_combined)} rows created")

‚úÖ Data frame combination completed with 256 rows created


## Add company description to df

In [13]:
df = data_feed_combined.merge(
    companies[['id', 'company_custom_prompt']],
    left_on='company_id',
    right_on='id',
    how='left'
)

# Drop id_y and rename id_x back to id
df = (
    df
    .drop(columns=['id_y'])
    .rename(columns={'id_x': 'id'})
)
df
data_feed_combined = df
data_feed_combined

Unnamed: 0,id,competitor_id,company_id,content,source,content_json,company_custom_prompt
0,62011,158,66,Every item on this page was chosen by an edito...,news,"{""content_id"": 62011, ""url"": ""https://www.thep...",Toast POS is a cloud-based point-of-sale and r...
1,62009,158,66,"Now through Feb. 14, get $10 off your first re...",news,"{""content_id"": 62009, ""url"": ""https://www.cnet...",Toast POS is a cloud-based point-of-sale and r...
2,62003,158,66,A generic bouquet wrapped in plastic and a box...,news,"{""content_id"": 62003, ""url"": ""https://shopping...",Toast POS is a cloud-based point-of-sale and r...
3,62002,158,66,[Skip to content](https://thebeat951.com/music...,news,"{""content_id"": 62002, ""url"": ""https://thebeat9...",Toast POS is a cloud-based point-of-sale and r...
4,62000,158,66,50 Cent is delivering the beef directly to his...,news,"{""content_id"": 62000, ""url"": ""https://thegrio....",Toast POS is a cloud-based point-of-sale and r...
...,...,...,...,...,...,...,...
251,82248,163,72,,ads,"{""content_id"": 82248, ""json_response"": ""{\""adv...",MX Build combines field service management wit...
252,82168,166,72,,ads,"{""content_id"": 82168, ""json_response"": ""{\""adv...",MX Build combines field service management wit...
253,82166,166,72,,ads,"{""content_id"": 82166, ""json_response"": ""{\""adv...",MX Build combines field service management wit...
254,82160,166,72,,ads,"{""content_id"": 82160, ""json_response"": ""{\""adv...",MX Build combines field service management wit...


# Map new content against existing insight(s)

## Assign data_feed a new name

In [14]:
signal_match_candidates = (
    data_feed_combined
    .groupby(["source", "competitor_id", "company_id"], as_index=False)
    .agg({"content_json": list})
)
print(f"‚úÖ Dataframe grouped together with {len(signal_match_candidates)} rows created...")
signal_match_candidates.head(1)

‚úÖ Dataframe grouped together with 38 rows created...


Unnamed: 0,source,competitor_id,company_id,content_json
0,ads,152,23,"[{""content_id"": 82441, ""json_response"": ""{\""cr..."


## Append signals to data_feed dataframe

In [15]:
# Ensure signals_df has the columns we need
signals_subset = existing_signals[["id", "title", "summary","details", "company_id"]]
signals_subset.head(5)

Unnamed: 0,id,title,summary,details,company_id
0,52646,DoorDash is hiring Tesla's robotics leader to ...,DoorDash has appointed former Tesla robotics a...,## What You Need to Know\n- DoorDash has appoi...,
1,2809,Skanska is hiring sustainability roles to win ...,Skanska is emphasizing climate‚Äëneutral and 'cl...,## Overview\n\n* Skanska is embedding **sust...,73.0
2,2808,Skanska is building Skanska Direkt to capture ...,Skanska is launching Skanska Direkt to focus o...,## Overview\n* Skanska is executing a **dual...,73.0
3,2807,Skanska is hiring cloud and IT talent to moder...,"Skanska is building cloud, ERP, and field IT c...",## Overview\n\n* Skanska is embedding **sust...,73.0
4,2806,Skanska is hiring heavy‚Äëcivil leaders to expan...,Skanska is recruiting senior project managers ...,## Overview\n\n* Skanska is **aggressively r...,73.0


In [16]:
# Group signals by company_id into list of dicts 
signals_map = (
    signals_subset
    .groupby("company_id")
    .apply(lambda g: g.to_dict(orient="records"), include_groups=False)
    .to_dict()
)

# Add new column to data_feed
signal_match_candidates["signals_json"] = signal_match_candidates["company_id"].map(signals_map).apply(
    lambda x: x if isinstance(x, list) else []
)

signal_match_candidates.head(5)

Unnamed: 0,source,competitor_id,company_id,content_json,signals_json
0,ads,152,23,"[{""content_id"": 82441, ""json_response"": ""{\""cr...","[{'id': 2751, 'title': 'Freelance writers are ..."
1,ads,156,66,"[{""content_id"": 82387, ""json_response"": ""{\""ad...","[{'id': 2800, 'title': 'DoorDash is blaming me..."
2,ads,157,66,"[{""content_id"": 82397, ""json_response"": ""{\""ad...","[{'id': 2800, 'title': 'DoorDash is blaming me..."
3,ads,158,66,"[{""content_id"": 82420, ""json_response"": ""{\""ad...","[{'id': 2800, 'title': 'DoorDash is blaming me..."
4,ads,162,72,"[{""content_id"": 82503, ""json_response"": ""{\""cr...","[{'id': 2802, 'title': 'MX Build is offering b..."


## Run against LLM to check signal match

In [17]:
client = AsyncOpenAI(api_key=openai_api_key)
MODEL_NAME = OPENAI_MODEL
MAX_CONCURRENCY = 100

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

async def fetch_response(prompt, row_id, company_id, source, signals_json, progress):
    async with semaphore:
        try:
            response = await client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}]
            )
            text = response.choices[0].message.content
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for id {row_id}: {e}")

        # Update progress safely
        async with lock:
            progress["done"] += 1
            done = progress["done"]
            total = progress["total"]
            print(f"‚úÖ Completed {done}/{total} ({done/total:.0%})")

        return {
            "competitor_id": row_id,
            "company_id": company_id,
            "source": source,
            "signals_json": signals_json,
            "response": text
        }

async def process_all(df):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""
            You are a senior competitive intelligence analyst reporting to C-suite executives.
            
            Your job is to evaluate EACH new content item independently and decide whether it should be mapped to an existing signal.
            
            You must follow these strict rules:

            1. **Be extremely selective. Default to NOT mapping.**
            2. Only map a content item to a signal if there is a **clear, direct, and explicit overlap**
               in (a) topic, (b) company action, or (c) strategic theme.
            3. Weak, indirect, vague, or broad connections DO NOT count.
            4. If you are unsure, the answer is False.
            5. You MUST still select the closest signal (best thematic fit) even if the result is False.
            6. Output one result PER CONTENT ITEM.
            
            STRICT STRUCTURE + VALIDATION RULES (MANDATORY):
            
            7. You MUST return structured JSON only.
            8. For each content item, you MUST output the fields:
               - "content_id": the NUMERIC ID from the input.
               - "signal_id": the NUMERIC signal ID you consider the closest thematic fit.
               - "should_map": a boolean.
            
            9. You MUST follow these strict constraints for "content_id" and "signal_id":
               - They MUST be integers.
               - They MUST come directly from the input.
               - You MUST NOT generate, guess, invent, or hallucinate IDs.
               - You MUST NOT substitute titles, summaries, strings, or text in place of IDs.
               - If you cannot determine the numeric ID from the input, you MUST set it to null.
            
            10. NEVER output:
                - article titles
                - summaries
                - URLs
                - descriptions
                - category labels
                - string content
                in the "content_id" or "signal_id" fields.  
                These fields must be strictly integer or null.
            
            This is not optional. Any violation of these numeric rules invalidates the output.

            Here is the list of new content items:
            {row['content_json']}
            
            Here are the existing signals:
            {row['signals_json']}
            
            Respond ONLY with a JSON array.
            Each element must be structured like:
            
            {{
              "should_map": true/false,
              "signal_id": "<closest matching signal id>",
              "content_id": "<content_id>",
              "signal_title": "<closest signal title>",
              "content_title": "<title of this content item>",
              "reason": "Short explanation. If false, explain why the overlap is weak."
            }}
            
            Return EXACTLY one object per content item, in the same order they appear.
            """
        # print(prompt)
        tasks.append(fetch_response(prompt, row["competitor_id"], row["company_id"], row["source"], row["signals_json"], progress))

    results = await asyncio.gather(*tasks)
    return results

signal_match_results = await process_all(signal_match_candidates)
signal_match_results_df = pd.DataFrame(signal_match_results)

‚úÖ Completed 1/38 (3%)
‚úÖ Completed 2/38 (5%)
‚úÖ Completed 3/38 (8%)
‚úÖ Completed 4/38 (11%)
‚úÖ Completed 5/38 (13%)
‚úÖ Completed 6/38 (16%)
‚úÖ Completed 7/38 (18%)
‚úÖ Completed 8/38 (21%)
‚úÖ Completed 9/38 (24%)
‚úÖ Completed 10/38 (26%)
‚úÖ Completed 11/38 (29%)
‚úÖ Completed 12/38 (32%)
‚úÖ Completed 13/38 (34%)
‚úÖ Completed 14/38 (37%)
‚úÖ Completed 15/38 (39%)
‚úÖ Completed 16/38 (42%)
‚úÖ Completed 17/38 (45%)
‚úÖ Completed 18/38 (47%)
‚úÖ Completed 19/38 (50%)
‚úÖ Completed 20/38 (53%)
‚úÖ Completed 21/38 (55%)
‚úÖ Completed 22/38 (58%)
‚úÖ Completed 23/38 (61%)
‚úÖ Completed 24/38 (63%)
‚úÖ Completed 25/38 (66%)
‚úÖ Completed 26/38 (68%)
‚úÖ Completed 27/38 (71%)
‚úÖ Completed 28/38 (74%)
‚úÖ Completed 29/38 (76%)
‚úÖ Completed 30/38 (79%)
‚úÖ Completed 31/38 (82%)
‚úÖ Completed 32/38 (84%)
‚úÖ Completed 33/38 (87%)
‚úÖ Completed 34/38 (89%)
‚úÖ Completed 35/38 (92%)
‚úÖ Completed 36/38 (95%)
‚úÖ Completed 37/38 (97%)
‚úÖ Completed 38/38 (100%)


In [18]:
# Clean up JSON
def safe_load(x):
    if not x or str(x).strip() == "":
        return None

    try:
        return json.loads(x)
    except:
        pass

    try:
        return json5.loads(x)
    except:
        pass

    try:
        return ast.literal_eval(x)
    except:
        return None

signal_match_results_df["response_cleaned"] = signal_match_results_df["response"].apply(safe_load)
signal_match_results_df.head(5)

Unnamed: 0,competitor_id,company_id,source,signals_json,response,response_cleaned
0,152,23,ads,"[{'id': 2751, 'title': 'Freelance writers are ...","[\n {\n ""should_map"": true,\n ""signal_i...","[{'should_map': True, 'signal_id': 2145, 'cont..."
1,156,66,ads,"[{'id': 2800, 'title': 'DoorDash is blaming me...","[\n {\n ""should_map"": true,\n ""signal_i...","[{'should_map': True, 'signal_id': 2532, 'cont..."
2,157,66,ads,"[{'id': 2800, 'title': 'DoorDash is blaming me...","[\n {\n ""should_map"": true,\n ""signal_i...","[{'should_map': True, 'signal_id': 2054, 'cont..."
3,158,66,ads,"[{'id': 2800, 'title': 'DoorDash is blaming me...","[\n {\n ""should_map"": true,\n ""signal_i...","[{'should_map': True, 'signal_id': 2640, 'cont..."
4,162,72,ads,"[{'id': 2802, 'title': 'MX Build is offering b...","[\n {\n ""should_map"": true,\n ""signal_i...","[{'should_map': True, 'signal_id': 2719, 'cont..."


## Update supabase, map new content to existing signals

In [19]:
mapped_signal_links = []

for _, row in signal_match_results_df.iterrows():
    source = row.get("source")
    competitor_id = row.get("competitor_id")
    company_id = row.get("company_id")

    # response_cleaned is a list of mapping dicts
    mappings = row.get("response_cleaned", [])

    if not isinstance(mappings, list):
        continue

    for m in mappings:
        # Only keep mappings where should_map=True
        if m.get("should_map") is True:
            mapped_signal_links.append({
                "source": source,
                "competitor_id": competitor_id,
                "company_id": company_id,
                "content_id": m.get("content_id"),
                "signal_id": m.get("signal_id")
            })

if not mapped_signal_links:
    print("‚ö†Ô∏è No signal links created")
else:
    # Group by source
    grouped = {}
    for row in mapped_signal_links:
        src = row.get("source")
        grouped.setdefault(src, []).append(row)

    for source, rows in grouped.items():
        # Correct table map
        table_map = {
            "news": ("news_feed_signals", "news_feed_id"),
            "linkedin": ("linkedin_feed_signals", "linkedin_feed_id"),
            "reddit": ("reddit_feed_signals", "reddit_feed_id"),
            "jobs": ("jobs_feed_signals", "jobs_feed_id"),
            "ads": ("ad_library_signals", "ad_library_feed_id"),
        }

        if source not in table_map:
            print(f"‚ö†Ô∏è Skipping unknown source '{source}'")
            continue

        table_name, content_field = table_map[source]

        # Build insert rows
        insert_rows = []
        for r in rows:
            signal_id = r.get("signal_id")
            content_id = r.get("content_id")

            if signal_id is None or content_id is None:
                continue

            insert_rows.append({
                "signal_id": signal_id,
                content_field: content_id
            })

        if not insert_rows:
            print(f"‚ö†Ô∏è No valid rows for source '{source}'")
            continue

        # INSERT (no upsert)
        try:
            supabase.table(table_name).insert(insert_rows).execute()
            print(f"‚úÖ Inserted {len(insert_rows)} {source} ‚Üí signal links into {table_name}")
        except Exception as e:
            print(f"‚ùå Error inserting into {table_name}: {e}")

‚úÖ Inserted 78 ads ‚Üí signal links into ad_library_signals
‚úÖ Inserted 69 jobs ‚Üí signal links into jobs_feed_signals
‚úÖ Inserted 4 linkedin ‚Üí signal links into linkedin_feed_signals
‚ùå Error inserting into news_feed_signals: {'code': '23503', 'details': 'Key (news_feed_id)=(61956) is not present in table "news_feed".', 'hint': None, 'message': 'insert or update on table "news_feed_signals" violates foreign key constraint "news_feed_signals_2_news_feed_id_fkey"'}
‚úÖ Inserted 3 reddit ‚Üí signal links into reddit_feed_signals


## Update signal details with new content sources

In [20]:
# Convert mapped signal links to a df
updated_signals = pd.DataFrame(mapped_signal_links)

# Attach content_json
updated_signals_with_json = updated_signals.merge(
    data_feed_combined[["id", "source", "content_json"]],
    how="left",
    left_on=["content_id", "source"],
    right_on=["id", "source"]
).drop(columns=["id"])

# Prepare signal metadata
signal_meta = signals_subset.rename(columns={"id": "signal_id"})

# Merge signal metadata ‚Üí updated_signals
merged = updated_signals_with_json.merge(
    signal_meta,
    how="left",
    on="signal_id"
)

# Build JSON field
merged["signals_json"] = merged.apply(
    lambda row: {
        "signal_id": row["signal_id"],
        "title": row["title"],
        "summary": row["summary"],
        "details": row["details"],
    },
    axis=1
)

updated_signals_with_json = merged

# Group by signals
updated_signals_with_json_grouped = (
    updated_signals_with_json
    .groupby("signal_id")
    .agg({
        "signals_json": "first",
        "content_json": list
    })
    .reset_index()
)
updated_signals_with_json_grouped.head(1)

Unnamed: 0,signal_id,signals_json,content_json
0,1851,"{'signal_id': 1851, 'title': 'Nabla is hiring ...","[{""content_id"": 8848, ""title"": ""Mid-Market Cus..."


## Run through LLM

In [21]:
import google.generativeai as genai

genai.configure(api_key=google_api_key)

MAX_CONCURRENCY = 50

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

# 2. Initialize the model with JSON mode enabled
model = genai.GenerativeModel(
    model_name="gemini-2.5-pro" ,
    generation_config={
        "temperature": 0,
        # "max_output_tokens": 8192,
        "response_mime_type": "application/json"
    }
)

async def fetch_response(prompt, signal_id, signals_json, progress):
    async with semaphore:
        text = None
        try:
            # 3. Gemini Async Call
            # Note: We use the `await` syntax for the async version of the method
            response = await model.generate_content_async(prompt)
            text = response.text
            
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error on signal {signal_id}: {e}")

        async with lock:
            progress["done"] += 1
            print(f"‚úÖ Completed {progress['done']}/{progress['total']}")

        return {
            "signal_id": signal_id,
            "signals_json": signals_json,
            "response": text
        }

async def process_all(df):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""
            You are updating an existing competitive-intelligence signal.
            
            Below is the existing signal JSON, which contains a summary and a 'details' field written in Markdown using the exact CI structure from the extraction prompt:
            
            {row['signals_json']}
            
            Below is NEW content that must be incorporated *into* this existing signal without altering the structure:
            
            {row['content_json']}
            
            YOUR TASK:
            Integrate the new insights into the existing signal by **appending new bullets** into the appropriate sections, while preserving the original structure and wording.
            
            ### CRITICAL RULES (DO NOT VIOLATE)
            
            1. **You can edit, rewrite, reorder, or rephrase ANY existing text if needed.**
            
            2. **Maintain the exact four-section structure:**
               - ## What You Need to Know
               - ## The Threat to Watch
               - ## What to Monitor
               - ## Bottom Line 
               (These already exist in the details; do not recreate them.)
            
            3. **APPEND ONLY**
               - Add or modify bullets in correct section.
               - Use the exact same Markdown bullet style (hyphen at start).
               - Match tone and bullet length from the original extraction prompt.
               - Bold key phrases using **double asterisks** to enable skimming
               - Make sure to cite specific facts, quotes, or claims, include inline source links using this format:
                   - Example: [POS ad promotion](https://www.linkedin.com/ad-library/detail/968519826?trk=ad_library_ad_preview_content_image)
                   - Format: [link text](https://example.com)
            - Link format: competitor claims/quotes should link to original source; financial data should link to the report/filing
            - Example: "Freshworks reported **15% constant currency revenue growth** [Q3 earnings](https://...)"
               
            4. **SUMMARY**
               - Update the existing summary to *include the new idea*.
               - Summary must remain 1‚Äì2 sentences, maximum.
               - Do NOT delete the original meaning or rewrite from scratch.
               - When referring to a competitor, do not say 'they' or 'them', refer to the competitor by their name.
            
            5. **CONTENT INTEGRITY**
               - Keep all existing Markdown and links.
               - Do not add new sections.
               - Do not say ‚Äúnew‚Äù or ‚Äúadditional.‚Äù
               - Try to keeping each section to about 3-5 bullets.

            6. For update_sig indicate if you just Enriched the content or there is actually a significant new update that my clinet should know about.
                If there is not a big update, just put 'enriched', if there is put 'updated'.
            
            ### OUTPUT FORMAT
            Respond ONLY with valid JSON, a list containing exactly ONE object:
            
            [
              {{
                "summary": "<updated summary that keeps all original meaning but adds new insight>",
                "details": "<update content with new sources integrated>",
                "update_sig": "<enriched OR updated>",
                "update_descrip": "<if updated, what the major update was, state the before and after so we know the difference>",
              }}
            ]
            
            Do NOT include commentary, explanation, or text outside the JSON.
            """

        tasks.append(
            fetch_response(
                prompt,
                row["signal_id"],
                row["signals_json"],
                progress
            )
        )

    results = await asyncio.gather(*tasks)
    return results

# Execution
updated_signal_details = await process_all(updated_signals_with_json_grouped)
updated_signal_details_df = pd.DataFrame(updated_signal_details)

‚úÖ Completed 1/65
‚úÖ Completed 2/65
‚úÖ Completed 3/65
‚úÖ Completed 4/65
‚úÖ Completed 5/65
‚úÖ Completed 6/65
‚úÖ Completed 7/65
‚úÖ Completed 8/65
‚úÖ Completed 9/65
‚úÖ Completed 10/65
‚úÖ Completed 11/65
‚úÖ Completed 12/65
‚úÖ Completed 13/65
‚úÖ Completed 14/65
‚úÖ Completed 15/65
‚úÖ Completed 16/65
‚úÖ Completed 17/65
‚úÖ Completed 18/65
‚úÖ Completed 19/65
‚úÖ Completed 20/65
‚úÖ Completed 21/65
‚úÖ Completed 22/65
‚úÖ Completed 23/65
‚úÖ Completed 24/65
‚úÖ Completed 25/65
‚úÖ Completed 26/65
‚úÖ Completed 27/65
‚úÖ Completed 28/65
‚úÖ Completed 29/65
‚úÖ Completed 30/65
‚úÖ Completed 31/65
‚úÖ Completed 32/65
‚úÖ Completed 33/65
‚úÖ Completed 34/65
‚úÖ Completed 35/65
‚úÖ Completed 36/65
‚úÖ Completed 37/65
‚úÖ Completed 38/65
‚úÖ Completed 39/65
‚úÖ Completed 40/65
‚úÖ Completed 41/65
‚úÖ Completed 42/65
‚úÖ Completed 43/65
‚úÖ Completed 44/65
‚úÖ Completed 45/65
‚úÖ Completed 46/65
‚úÖ Completed 47/65
‚úÖ Completed 48/65
‚úÖ Completed 49/65
‚úÖ Completed 50/65
‚úÖ Compl

In [22]:
# Clean json
updated_signal_details_df["response_cleaned"] = updated_signal_details_df["response"].apply(safe_load)
updated_signal_details_df.head(1)

Unnamed: 0,signal_id,signals_json,response,response_cleaned
0,1851,"{'signal_id': 1851, 'title': 'Nabla is hiring ...","[\n {\n ""summary"": ""Nabla is rapidly hirin...",[{'summary': 'Nabla is rapidly hiring clinical...


## Update supabase with new details content for these signals

In [23]:
def extract_summary_and_details(cleaned):
    if not cleaned or not isinstance(cleaned, list) or len(cleaned) == 0:
        return None, None, None, None

    item = cleaned[0] if isinstance(cleaned[0], dict) else {}
    return (
        item.get("summary"),
        item.get("details"),
        item.get("update_sig"),
        item.get("update_descrip"),
    )

def batch_update_signals(df, batch_size=100):
    total_rows = len(df)
    num_batches = math.ceil(total_rows / batch_size)

    print(f"üöÄ Updating {total_rows} signals in {num_batches} batches...")

    updated_count = 0

    for i in range(num_batches):
        batch = df.iloc[i * batch_size : (i + 1) * batch_size]
        update_rows = []

        now_utc = datetime.now(timezone.utc).isoformat(sep=" ", timespec="microseconds")

        for _, row in batch.iterrows():
            signal_id = int(row["signal_id"])
            cleaned = row.get("response_cleaned")

            summary, details, update_sig, update_descrip = extract_summary_and_details(cleaned)

            update_rows.append({
                "id": signal_id,
                "summary": summary,
                "details": details,
                "update_sig": update_sig,
                "update_descrip": update_descrip,
                "last_updated": now_utc
            })

        resp = (
            supabase
            .table("signals")
            .upsert(update_rows, on_conflict="id")
            .execute()
        )

        updated_count += len(resp.data or [])
        print(f"   ‚úÖ Batch {i+1}/{num_batches}: {len(update_rows)} rows")

    print("======================================")
    print(f"üéâ FINISHED ‚Äî {updated_count} total rows updated")
    print("======================================")

batch_update_signals(updated_signal_details_df, batch_size=100)

üöÄ Updating 65 signals in 1 batches...
   ‚úÖ Batch 1/1: 65 rows
üéâ FINISHED ‚Äî 65 total rows updated


## Remove content_ids from dataframe where true [remove this?]

In [24]:
# 1. Build set of mapped content_ids
mapped_content_ids = {
    int(row["content_id"]) 
    for row in mapped_signal_links
}

# 2. Drop those rows from data_feed_combined
before = len(data_feed_combined)

data_feed_combined = data_feed_combined[
    ~data_feed_combined["id"].astype(int).isin(mapped_content_ids)
].reset_index(drop=True)

after = len(data_feed_combined)

print(f"‚úÖ Removed {before - after} processed content rows")
print(f"‚úÖ Data feed now has {len(data_feed_combined)} rows remaining")

‚úÖ Removed 203 processed content rows
‚úÖ Data feed now has 53 rows remaining


# Create new insights using LLM

## Group datafeeds together to prep for LLM

In [25]:
data_feed = (
    data_feed_combined
    .groupby(["source", "competitor_id", "company_id", "company_custom_prompt"], as_index=False)
    .agg({"content_json": list})
)
print(f"‚úÖ Dataframe grouped together with {len(data_feed)} rows created...")

‚úÖ Dataframe grouped together with 11 rows created...


### Run through LLM

In [26]:
client = AsyncOpenAI(api_key=openai_api_key)
MODEL_NAME = OPENAI_MODEL
MAX_CONCURRENCY = 100

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

async def fetch_response(prompt, row_id, company_id, progress):
    async with semaphore:
        try:
            response = await client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}]
            )
            text = response.choices[0].message.content
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for id {row_id}: {e}")

        # Update progress safely
        async with lock:
            progress["done"] += 1
            done = progress["done"]
            total = progress["total"]
            print(f"‚úÖ Completed {done}/{total} ({done/total:.0%})")

        return {
            "competitor_id": row_id,
            "company_id": company_id,
            "response": text
        }

async def process_all(df):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""
            You are a competitive intelligence analyst reporting to C-suite executives on behalf of your client.

            About your client: {row['company_custom_prompt']}.

            **Your task:**
            Each piece of content you are being given is from one of your clients competitors.
            Analyze the following content (news articles, LinkedIn posts, Reddit posts, social ads, job postings, etc) to identify key strategic and competitive insights that are relevant or related to your client.
            You do not have to include every piece of content in your analysis.
            There will be pieces of content that are not important or relevant. You can leave those out.
            
            For each insight you identify:
            - Create a headline that states **what they're doing AND why** (the strategic endgame)
            - Write a one sentence insight that explains the implication for the reader
            - List the supporting sources below (title and URL)
            - You can use the same source in different themes if needed
            - However, only use each source a maximum of once per theme
            - If possible, include the competitors name in the headline
            
            ## Headline Formula:
            **"[action/method] to [strategic goal/endgame]"**
            
            A good headline answers: What are they doing, and what are they trying to achieve or protect?
            
            ## Headline Guidelines:
            ‚úÖ DO name both the **method** and the **goal/endgame**
            ‚úÖ DO imply the threat or opportunity for the reader
            ‚úÖ DO use verbs that show transition or intent (shifting, locking in, betting on, racing to, pivoting)
            ‚úÖ DO infer strategy from job postings (hiring patterns reveal GTM shifts, capability bets)
            ‚úÖ OK to use competitor name
            
            ‚ùå DON'T just list activities without the "why"
            ‚ùå DON'T describe topics‚Äîstate the strategic play
            ‚ùå DON'T use vague jargon ("AI Platform," "Integration Central")
            ‚ùå DON'T write headlines that could apply to any company
            
            ## Good Examples:
            - Ambience Healthcare relocating its global HQ to Nashville to embed inside the U.S. healthcare
            - Nabla scaling executive and technical leadership to accelerate enterprise rollout
            - Microsoft Healthcare building voice-based AI stress detection to enter clinical remote monitoring
            - Square building proprietary POS hardware to increase lock‚Äëin
            
            ## Bad Examples:
            - AI Platform at Scale" (no method, no goal)
            - Scaling commercial GTM and building market credibility" (lists activities, doesn't say why)
            - Owning integrations: developer APIs, hardware and deeper partner ties" (describes what, not why it matters)
            - New Product Features" (too generic)
            - Investing in data, streaming, and observability (too generic)
            
            ## Insight Guidelines:
            ‚úÖ 1 sentence max, preferably no more than 5-7 words, any longer its hard to quickly skim
            ‚úÖ State the implication for the reader (threat, opportunity, or vulnerability)
            ‚úÖ If there's a weakness in the competitor's play, name it
            ‚úÖ Do not say "They are" or "our", when referring to a competitor use their name.
            
            **Your job:** Synthesize patterns into strategic insights that tell executives what a competitor is trying to achieve and what that means for them.

            ## Output Schema
            Return only valid JSON matching this structure:
                {{
                  "insight_id": 1,
                  "company_id": {row['company_id']},
                  "headline": "Title here",
                  "insight": "Concise strategic insight",
                  "supporting_content": [
                    {{
                      "title": "Title 1",
                      "url": "https://example.com/article1",
                      "relevance": "Brief note on how this article supports the theme",
                      "content_id": "content_id from the data",
                      "source": "source from the data",
                    }}
                  ]
                }}
            
            ## Content
            Here is the content: {row['content_json']}

            ‚ö†Ô∏è Important:
            - **Do not fabricate** any fields like `source`, `url`, `company_id` or `content_id`. Use them exactly as provided in the input data.
            - Input the competitor_id field as an integer, not a string
            - If multiple items support the same theme, include each under `supporting_content`.
            - Return only valid JSON matching the exact schema below.
            - Each "supporting_content" item must be unique.
            - Do NOT repeat the same source, URL, or content_id more than once within a theme.
            - If multiple sentences or mentions refer to the same source, merge them into a single supporting_content entry with a concise combined "relevance" summary.

            ## Output Format Example
            This should be the structure of the output:
                {{
                  "insight_id": 1,
                  "company_id": source from the data field `company_id`,
                  "headline": "Strategic title here",
                  "insight": "A concise strategic insight or interpretation of what this theme means for the competitive landscape",
                  "supporting_content": [
                    {{
                      "title": "Title 1",
                      "url": "https://example.com/article1",
                      "relevance": "Brief note on how this article supports the theme",
                      "content_id": "content_id from the data",
                      "source": "source from the data",
                    }},
                    {{
                      "title": "Title 2",
                      "url": "https://example.com/article2",
                      "relevance": "Brief note on how this article supports the theme",
                      "content_id": "content_id from the data",
                      "source": "source from the data",
                    }}
                  ]
                }},
                {{
                  "insight_id": 2,
                  "company_id": source from the data field `company_id`,
                  "headline": "Another strategic theme",
                  "insight": "Strategic interpretation of this pattern",
                  "supporting_content": [
                    {{
                      "title": "Title 3",
                      "url": "https://example.com/article3",
                      "relevance": "Connection to theme",
                      "source": "source from the data",
                      "content_id": "content_id from the data",
                    }}
                      ]
                }}
            """
        tasks.append(fetch_response(prompt, row["competitor_id"], row["company_id"], progress))

    results = await asyncio.gather(*tasks)
    return results

results = await process_all(data_feed)
results_df = pd.DataFrame(results)

‚úÖ Completed 1/11 (9%)
‚úÖ Completed 2/11 (18%)
‚úÖ Completed 3/11 (27%)
‚úÖ Completed 4/11 (36%)
‚úÖ Completed 5/11 (45%)
‚úÖ Completed 6/11 (55%)
‚úÖ Completed 7/11 (64%)
‚úÖ Completed 8/11 (73%)
‚úÖ Completed 9/11 (82%)
‚úÖ Completed 10/11 (91%)
‚úÖ Completed 11/11 (100%)


## Clean up json

In [27]:
results_df["signals_v1"] = results_df["response"].apply(safe_json_loads)
results_df.head(50)

Unnamed: 0,competitor_id,company_id,response,signals_v1
0,165,72,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 1726411658241..."
1,173,23,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 1506665873936..."
2,170,73,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 170, 'headlin..."
3,171,73,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 171, 'headlin..."
4,172,73,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 172, 'headlin..."
5,80,23,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 80, 'headline..."
6,152,23,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 152, 'headlin..."
7,158,66,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 158, 'headlin..."
8,160,71,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 160, 'headlin..."
9,170,73,"[\n {\n ""insight_id"": 1,\n ""company_id""...","[{'insight_id': 1, 'company_id': 170, 'headlin..."


# Combine similar themes and content via LLM

In [28]:
draft_signal_report = (
    results_df
    .groupby("company_id", as_index=False)
    .agg({
        "competitor_id": list,
        "signals_v1": list,
    })
)

## Prompt for LLM

In [29]:
final_signals_prompt = f"""
You have been given a JSON list of key insights from a competitive intelligence report for you client.

Each object in the list represents an insight about your client's competitor.

Your task is to combine and clean up redundant or overlapping insights **within each competitor's list** while keeping the JSON format clean and consistent.

---

### üß† Instructions

1. **Input Format**
   - The input JSON is a list of objects or lists, each representing one competitor's collection of insights.
   - Each insight object contains:
     - `theme_id`
     - `company_id`
     - `headline`
     - `insight`
     - `supporting_content` (an array of source objects)

2. **What to Do**
   - For each competitor:
     * Identify insights that are highly similar or cover the same topic.
       - Example: ‚ÄúSqures AI adoption threatens scribe headcount‚Äù and ‚ÄúSquares AI scribes replacing human scribes‚Äù are redundant.
     * Merge those into a single, stronger insight.
     * Each insight can have a mix of different types of sources (news, linkedin posts, reddit posts, etc). They do not have to contain all the same type of source like they do in the initial data file.

3. **How to Merge**
   - **`headline`:** Write a new, concise, and accurate headline summarizing the merged idea.
   - **`insight`:** Combine and synthesize the text of all related insights into a single coherent paragraph.
   - **`supporting_content`:** Merge the arrays from all related insights (deduplicate identical links if possible).
   - **IDs:** Keep the `theme_id` and `company_id` from the first theme in the merged group.

4. **Keep Unique Insights**
   - Insights that are distinct should remain unmerged and unchanged.

5. **Writing a great headline**
             
## Headline Formula:
**"[Company] is [doing X] to [achieve Y]"**
**"[Insight] is [happening] to [achieve Y]"**

A good headline answers: What are they doing, and why does it matter?

## Headline Guidelines:
‚úÖ DO write in plain language‚Äîif it sounds like jargon, rewrite it
‚úÖ DO state the action and the consequence in one sentence
‚úÖ DO keep it short (under 15 words ideal)
‚úÖ DO use present progressive ("is using," "is targeting," "is pushing")
‚úÖ When possible, use name the company at the start if the company name is available

‚ùå DON'T use filler adjectives (aggressively, strategically, actively)
‚ùå DON'T use business jargon (GTM, leverage, scale, accelerate, lock-in)
‚ùå DON'T use colons or dashes to separate company from headline
‚ùå DON'T list tactics‚Äîstate what they're doing and who it affects
‚ùå DON'T write anything you wouldn't say out loud to a colleague

## Good Examples:
- DoorDash is using vague drop-off policies to avoid refunds and shift costs to restaurants
- DoorDash is recruiting liquor stores with paid ads to grow selection and block rival growth
- Nabla is hiring sales leaders in the US to push into enterprise healthcare
- Healthcare AI startups are losing customer trust as AI tools falter

## Bad Examples:
- "DoorDash: Leveraging Policy Ambiguity to Reduce Payout Exposure" (jargon, colon format)
- "Scaling commercial GTM and building market credibility" (no company, no plain language)
- "Aggressively targeting alcohol merchants to expand assortment" (filler adjective, missing company)
- "Square building proprietary POS hardware to increase lock-in" (missing "is", jargon)

---

### ‚öôÔ∏è Output Format Requirements

* The final output must be valid JSON.
* It should remain a **list**, where each element represents one company's full list of insights.
* Do **not** reintroduce a `"strategic_themes"` wrapper or any extra keys.
* Each insight should include exactly:
  - `insight_id`
  - `company_id`
  - `headline` (keep this short, ideally only about 5-7 words)
  - `insight` (this should be one sentence max)
  - `supporting_content` (with title, url, relevance, content_id, source)

Example structure:

[
  {{
    "insight_id": 1,
    "company_id": 78,
    "headline": "Ambience prioritizing remote-friendly hiring",
    "insight": "Ambience is positioning itself as hybrid/remote-friendly...",
    "supporting_content": [
      {{
        "title": "Ambience Healthcare - Jobs",
        "url": "https://jobs.ashbyhq.com/ambiencehealthcare...",
        "relevance": "Careers page highlights...",
        "content_id": 33573,
        "source": "news"
      }}
    ]
  }},
  [
    {{
      "insight_id": 1,
      "company_id": 23,
      "headline": "AI adoption threatens scribe headcount",
      "insight": "Healthcare organizations and EHR vendors adopting AI scribing...",
      "supporting_content": [...]
    }},
    {{
      "insight_id": 2,
      "company_id": 23,
      "headline": "Scribe burnout driving turnover",
      "insight": "High patient volumes and low pay create burnout...",
      "supporting_content": [...]
    }}
  ]
]

---

Please analyze and return the updated, merged JSON using **only** this structure.
"""
print("‚úÖ Prompt has been locked and loaded.")

‚úÖ Prompt has been locked and loaded.


## Run through LLM

In [30]:
async def fetch_signals_v2(prompt, company_id, progress):
    async with semaphore:
        try:
            response = await client.chat.completions.create(
                model="gpt-5-mini",
                messages=[{"role": "user", "content": prompt}]
            )
            text = response.choices[0].message.content

        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for company_id {company_id}: {e}")

        # ---- Update progress safely ----
        async with lock:
            progress["done"] += 1
            done = progress["done"]
            total = progress["total"]
            print(f"‚úÖ Completed {done}/{total} ({done/total:.0%})")

        # ---- Return standardized row ----
        return {
            "company_id": company_id,
            "signals_v2": text
        }
        
async def process_all_signals(df, final_signals_prompt):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        company_id = row["company_id"]
        signals_text = row["signals_v1"]

        prompt = f"""
        {final_signals_prompt}

        **Key Insights JSON:**
        {signals_text}
        """

        tasks.append(
            fetch_signals_v2(prompt, company_id, progress)
        )

    # Run all tasks
    results = await asyncio.gather(*tasks)
    return results
results = await process_all_signals(draft_signal_report, final_signals_prompt)

# Convert to DataFrame
signal_v2 = pd.DataFrame(results)
print(f"‚úÖ Created signal_v2 DataFrame with {len(signal_v2)} rows")

‚úÖ Completed 1/5 (20%)
‚úÖ Completed 2/5 (40%)
‚úÖ Completed 3/5 (60%)
‚úÖ Completed 4/5 (80%)
‚úÖ Completed 5/5 (100%)
‚úÖ Created signal_v2 DataFrame with 5 rows


# Prep data to write back to Supabase

## Convert to dataframe

In [31]:
records = []

for _, row in signal_v2.iterrows():
    company_id = row["company_id"]
    themes_raw = row["signals_v2"]

    # Parse JSON string safely
    try:
        themes = json.loads(themes_raw)
    except (TypeError, json.JSONDecodeError):
        continue

    # Skip if not a list
    if not isinstance(themes, list):
        continue

    # Handle nested list-of-lists structure
    for inner in themes:
        # if a single theme dict was wrapped in another list, flatten it
        if isinstance(inner, list):
            inner_themes = inner
        else:
            inner_themes = [inner]

        for theme in inner_themes:
            if not isinstance(theme, dict):
                continue

            headline = theme.get("headline")
            insight = theme.get("insight")
            theme_id = theme.get("insight_id")

            for item in theme.get("supporting_content", []):
                records.append({
                    "insight_id": theme_id,
                    "company_id": company_id,
                    "competitor_id": item.get("competitor_id"),
                    "title": headline,
                    "summary": insight,
                    "content_title": item.get("title"),
                    "url": item.get("url"),
                    "relevance": item.get("relevance"),
                    "content_id": item.get("content_id"),
                    "source": item.get("source")
                })

# Convert to DataFrame
content_data_feed = pd.DataFrame(records)

print(f"‚úÖ Created flattened DataFrame with {len(content_data_feed)} rows.")
content_data_feed.head(1)

‚úÖ Created flattened DataFrame with 33 rows.


Unnamed: 0,insight_id,company_id,competitor_id,title,summary,content_title,url,relevance,content_id,source
0,1,23,,Epical Health is running sustained Google Ads,Epical Health is running sustained Google ad c...,Epical Health creative CR04920972368668524545,https://adstransparency.google.com/advertiser/...,Ad entry shows active campaign starting 2025-0...,82348,ads


In [32]:
content_data_feed.head()

Unnamed: 0,insight_id,company_id,competitor_id,title,summary,content_title,url,relevance,content_id,source
0,1,23,,Epical Health is running sustained Google Ads,Epical Health is running sustained Google ad c...,Epical Health creative CR04920972368668524545,https://adstransparency.google.com/advertiser/...,Ad entry shows active campaign starting 2025-0...,82348,ads
1,1,23,,Epical Health is running sustained Google Ads,Epical Health is running sustained Google ad c...,Epical Health image creative (archive/simgad/1...,https://adstransparency.google.com/advertiser/...,Image-format creative active 2025-07-06 to 202...,82341,ads
2,1,23,,Epical Health is running sustained Google Ads,Epical Health is running sustained Google ad c...,Epical Health creative CR04201105845217394689,https://adstransparency.google.com/advertiser/...,Text-format creative with start 2026-01-24 and...,82330,ads
3,1,23,,Abridge is being grouped with workforce‚Äëamplif...,Analyst coverage groups Abridge with other wor...,Key takeaways from the First Analysis Healthca...,https://www.linkedin.com/posts/first-analysis_...,Groups Abridge with other 'LaborProductivity /...,30179,linkedin
4,2,23,,Suki is hiring public‚Äëhealth talent for govern...,Suki is recruiting hires with public‚Äëhealth ex...,Beyond excited to be spending Valentine‚Äôs Day ...,https://www.linkedin.com/posts/sara-e-lamb_kel...,A Suki employee highlights prior PEPFAR/USAID ...,30174,linkedin


### Drop duplicates

In [33]:
# Drop duplicates by headline to avoid inserting twice
unique_signals = content_data_feed[["title", "summary", "company_id"]].drop_duplicates().to_dict(orient="records")
print(f"{len(unique_signals)} signals created...")

18 signals created...


## Write insights to db, grab ids

In [34]:
# Insert headlines and grab ids
insert_response = supabase.table("signals").insert(unique_signals).execute()
signals_inserted = insert_response.data
print(f"‚úÖ Inserted {len(signals_inserted)} signals.")

‚úÖ Inserted 18 signals.


In [35]:
headline_to_id = {row["title"]: row["id"] for row in signals_inserted}
print(f"Associated ids with headlines...")

Associated ids with headlines...


## Update db associating content to insights

In [36]:
# --- Step 3: Create join table entries for `news_feed_signals` ---
join_rows = []
for _, row in content_data_feed.iterrows():
    headline = row["title"]
    if headline not in headline_to_id:
        continue
    join_rows.append({
        "headline": headline,
        "signal_id": headline_to_id[headline],
        "content_id": row["content_id"],
        "source": row["source"]
    })
print(f" Updated db with {len(join_rows)} insights...")

 Updated db with 33 insights...


## Send to supabase

In [37]:
if not join_rows:
    print("‚ö†Ô∏è No join records created")
else:
    # Group join_rows by source
    grouped = {}
    for row in join_rows:
        src = row.get("source")
        grouped.setdefault(src, []).append(row)

    for source, rows in grouped.items():
        # Correct table map
        table_map = {
            "news": ("news_feed_signals", "news_feed_id"),
            "linkedin": ("linkedin_feed_signals", "linkedin_feed_id"),
            "reddit": ("reddit_feed_signals", "reddit_feed_id"),
            "jobs": ("jobs_feed_signals", "jobs_feed_id"),
            "ads": ("ad_library_signals", "ad_library_feed_id"),
        }

        if source not in table_map:
            print(f"‚ö†Ô∏è Skipping unknown source '{source}'")
            continue

        table_name, content_field = table_map[source]

        # Build insert rows
        insert_rows = []
        for r in rows:
            signal_id = r.get("signal_id")
            content_id = r.get("content_id")

            if signal_id is None or content_id is None:
                continue

            insert_rows.append({
                "signal_id": signal_id,
                content_field: content_id
            })

        if not insert_rows:
            print(f"‚ö†Ô∏è No valid rows for source '{source}'")
            continue

        # INSERT (no upsert)
        try:
            supabase.table(table_name).insert(insert_rows).execute()
            print(f"‚úÖ Inserted {len(insert_rows)} {source} ‚Üí signal links into {table_name}")
        except Exception as e:
            print(f"‚ùå Error inserting into {table_name}: {e}")

‚úÖ Inserted 9 ads ‚Üí signal links into ad_library_signals
‚úÖ Inserted 3 linkedin ‚Üí signal links into linkedin_feed_signals
‚úÖ Inserted 1 reddit ‚Üí signal links into reddit_feed_signals
‚úÖ Inserted 7 news ‚Üí signal links into news_feed_signals
‚úÖ Inserted 13 jobs ‚Üí signal links into jobs_feed_signals


# Enhance signal records with details

## Add Headline and signal_id

In [38]:
# Convert join_rows to df
join_df = pd.DataFrame(join_rows)
print(f"There are currently {join_df["signal_id"].nunique()} signals...")

There are currently 18 signals...


In [39]:
# Convert both fields to integers so they can map
join_df["content_id"] = pd.to_numeric(join_df["content_id"], errors="coerce")
data_feed_combined["id"] = pd.to_numeric(data_feed_combined["id"], errors="coerce")

# Merge dataframes
data_feed_with_headlines = join_df.merge(
    data_feed_combined[["id", "content_json", "company_id", "competitor_id"]],
    left_on="content_id",
    right_on="id",
    how="left"
)
print(f"There are are now {data_feed_with_headlines["signal_id"].nunique()} signals...")

There are are now 18 signals...


## Group by signal_id

In [40]:
fields_to_group = [
    "headline",
    "content_id",
    "source",
    "id",
    "content_json",
    "company_id",
    "competitor_id",
]

# Group by signal_id and aggregate each field into lists
signal_content_grouped = (
    data_feed_with_headlines
    .groupby("signal_id")[fields_to_group]
    .agg(list)
    .reset_index()
)

signal_content_grouped.head()

Unnamed: 0,signal_id,headline,content_id,source,id,content_json,company_id,competitor_id
0,2810,[Epical Health is running sustained Google Ads...,"[82348, 82341, 82330]","[ads, ads, ads]","[82348, 82341, 82330]","[{""content_id"": 82348, ""json_response"": ""{\""ad...","[23, 23, 23]","[173, 173, 173]"
1,2811,[Abridge is being grouped with workforce‚Äëampli...,[30179],[linkedin],[30179],"[{""content_id"": 30179, ""author_fullName"": ""Fir...",[23],[80]
2,2812,[Suki is hiring public‚Äëhealth talent for gover...,[30174],[linkedin],[30174],"[{""content_id"": 30174, ""author_fullName"": ""Sar...",[23],[80]
3,2813,[Research report is grouping cloud and EHR ven...,[30131],[linkedin],[30131],"[{""content_id"": 30131, ""author_fullName"": ""Joh...",[23],[152]
4,2814,[Clinicians are choosing iPad Minis for bedsid...,[24944],[reddit],[24944],"[{""content_id"": 24944, ""text"": ""I recently acc...",[23],[23]


# Send to LLM to add details

## Hit the LLM

In [41]:
import google.generativeai as genai

genai.configure(api_key=google_api_key)

# SWITCHED TO PRO
MODEL_NAME = "gemini-2.5-pro"

MAX_CONCURRENCY = 50

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

# 2. Initialize the model with JSON mode enabled
model = genai.GenerativeModel(
    model_name=MODEL_NAME,
    generation_config={
        "temperature": 0,
        "response_mime_type": "text/plain"
    }
)

async def fetch_response(prompt, headline, signal_id, progress):
    async with semaphore:
        text = None
        try:
            response = await model.generate_content_async(prompt)
            text = response.text
             
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for id {headline}: {e}")

        async with lock:
            progress["done"] += 1
            print(f"‚úÖ Completed {progress['done']}/{progress['total']}")

        return {
            "headline": headline,
            "signal_id" : signal_id,
            "response": text
        }

async def process_all(df):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""
            Analyze the following content and produce a detailed competitive-intelligence extract written in clean, valid Markdown.
            Don't say things like "we" or "our", this is for a client, you aren't writing this as if you are part of the team.
            Use bullets for everything, do not number anything.
            
            You are a competitive analyst extracting actionable intelligence for strategic decision-makers who need to understand:
            1. What the competitor is doing
            2. Why it matters to us
            3. What we should watch or do about it
            4. Make sure the insights are focused on the competitor(s) mentioned in the title and summary section
            
            Your output must follow these formatting rules:
            - Use ## for all major section headings
            - Use standard markdown bullet points (- or *) for all lists
            - Keep bullets SHORT - one clear point per bullet, ideally one sentence max
            - Bold key phrases using **double asterisks** to enable skimming
            - each heading should only have 3-5 key points

            CITATION STRUCTURE - STRICT RULE
            When citing specific facts, quotes, or claims, you MUST use this exact format:
            
            [text](URL)
            
            Do NOT use any other format as it will break the frontend of the app.
            
            - Example: "Freshworks reports [15% revenue growth](https://...) during 2025 Q3.*"
            
            REQUIRED STRUCTURE (in this exact order):

            ## Overview
            
            [2-3 bullet points the strategic implication, their vulnerabilities, and recommended competitive response]
            
            ## What You Need to Know
            
            [3-4 bullet points that captures the competitive situation, momentum, and key context]
            
            ## The Threat to Watch

            - Short, punchy bullets (1-2 sentences each)
            - 3-5 key competitive threats or moves
            - Focus on impact to your business
            - Call out strategic bets, resource allocation, pricing/GTM tactics
            - Note capability gaps or weaknesses
            
            ## What to Monitor
            
            - Short bulleted items - one specific signal per line
            - 3-5 concrete, actionable monitoring points
            - Each should be scannable at a glance
            
            ADDITIONAL GUIDANCE:
            - Include relevant financial metrics, growth rates, or market position data
            - Note product/technology bets and positioning claims
            - Highlight partnership or GTM initiatives
            - Identify execution risks or organizational challenges
            - Every bullet should be independently useful - no filler
            - Source links should be linked to the actual text inline
                - Example: "Freshworks reports [15% revenue growth](https://...) during 2025 Q3.*"

            CITATION FORMAT (STRICT ‚Äî WRAP THE CLAIM TEXT)
            
            ‚úÖ Correct:
            - Freshworks reports [15% revenue growth](https://...) during 2025 Q3.
            - Oracle is hiring to scale [a global, personalized health ecosystem](https://...)
            - The company launched [‚ÄúAutopilot for Finance‚Äù](https://...) for mid-market teams.
            
            Aim for 250-350 words total. Optimize for speed-reading and scannability.
            
            Now analyze this content:

            {row['content_json']}
            """
        # print(prompt)
        tasks.append(fetch_response(prompt, row["headline"], row["signal_id"], progress))

    results = await asyncio.gather(*tasks)
    return results

signal_enrichment = await process_all(signal_content_grouped)
signal_enrichment_df = pd.DataFrame(signal_enrichment)

‚úÖ Completed 1/18
‚úÖ Completed 2/18
‚úÖ Completed 3/18
‚úÖ Completed 4/18
‚úÖ Completed 5/18
‚úÖ Completed 6/18
‚úÖ Completed 7/18
‚úÖ Completed 8/18
‚úÖ Completed 9/18
‚úÖ Completed 10/18
‚úÖ Completed 11/18
‚úÖ Completed 12/18
‚úÖ Completed 13/18
‚úÖ Completed 14/18
‚úÖ Completed 15/18
‚úÖ Completed 16/18
‚úÖ Completed 17/18
‚úÖ Completed 18/18


In [42]:
signal_enrichment_df.head(1)

Unnamed: 0,headline,signal_id,response
0,[Epical Health is running sustained Google Ads...,2810,## Overview\n\n* Epical Health is executing ...


## Update signals in supabase

In [43]:
def update_signal_details(df):
    for _, row in df.iterrows():
        signal_id = int(row["signal_id"])
        details = row["response"]
        # summary = row["summary"]

        if not signal_id or pd.isna(signal_id):
            print(f"‚ö†Ô∏è Skipping row with no signal_id: {row}")
            continue

        try:
            resp = (
                supabase.table("signals")
                .update({
                        "details": details,
                        # "summary": summary
                    })
                .eq("id", signal_id)
                .execute()
            )
            print(f"‚úÖ Updated signal_id {signal_id}")
        except Exception as e:
            print(f"‚ùå Error updating signal_id {signal_id}: {e}")

# Run updates
update_signal_details(signal_enrichment_df)

‚úÖ Updated signal_id 2810
‚úÖ Updated signal_id 2811
‚úÖ Updated signal_id 2812
‚úÖ Updated signal_id 2813
‚úÖ Updated signal_id 2814
‚úÖ Updated signal_id 2815
‚úÖ Updated signal_id 2816
‚úÖ Updated signal_id 2817
‚úÖ Updated signal_id 2818
‚úÖ Updated signal_id 2819
‚úÖ Updated signal_id 2820
‚úÖ Updated signal_id 2821
‚úÖ Updated signal_id 2822
‚úÖ Updated signal_id 2823
‚úÖ Updated signal_id 2824
‚úÖ Updated signal_id 2825
‚úÖ Updated signal_id 2826
‚úÖ Updated signal_id 2827


## Update all IDs to processed == true

In [44]:
# --- Tables you want to update ---
update_tables = ["news_feed", "linkedin_feed", "reddit_posts", "jobs", "ad_library"]

for table in update_tables:
    df = feeds.get(table)

    if df is None or df.empty:
        print(f"‚ö†Ô∏è No data to update for {table}")
        continue

    # Extract all IDs returned in your earlier query
    ids_to_update = df["id"].tolist()

    print(f"üîß Updating {len(ids_to_update)} rows in {table}...")

    # Batch update using .in_()
    resp = (
        supabase.table(table)
        .update({"processed": True})
        .in_("id", ids_to_update)
        .execute()
    )

    print(f"‚úÖ Updated processed = TRUE for {table}.")

üîß Updating 55 rows in news_feed...
‚úÖ Updated processed = TRUE for news_feed.
üîß Updating 7 rows in linkedin_feed...
‚úÖ Updated processed = TRUE for linkedin_feed.
üîß Updating 4 rows in reddit_posts...
‚úÖ Updated processed = TRUE for reddit_posts.
üîß Updating 79 rows in jobs...
‚úÖ Updated processed = TRUE for jobs.
üîß Updating 111 rows in ad_library...
‚úÖ Updated processed = TRUE for ad_library.


In [45]:
print(f"‚úÖ Finished running signals feed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

‚úÖ Finished running signals feed at 2026-02-06 19:29:21


# Update signals

In [46]:
existing_signals = feeds["signals"]
len(existing_signals)

931

In [47]:
client = AsyncAnthropic(api_key=anthropic_api_key)
MODEL_NAME = "claude-sonnet-4-5-20250929"
MAX_CONCURRENCY = 100

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

async def fetch_response(prompt, title, id, progress):
    async with semaphore:
        try:
            response = await client.messages.create(
                model=MODEL_NAME,
                max_tokens=1200,
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )

            # Claude returns content as a list of blocks
            text = response.content[0].text if response.content else None
            
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for id {headline}: {e}")

        # Update progress safely
        async with lock:
            progress["done"] += 1
            done = progress["done"]
            total = progress["total"]
            print(f"‚úÖ Completed {done}/{total} ({done/total:.0%})")

        return {
            "title": title,
            "id" : id,
            "response": text
        }

async def process_all(df):
    total = len(df)
    progress = {"done": 0, "total": total}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""
## Task

You are a competitive intelligence analyst. Your job is to write a clear, scannable headline that tells the reader what a competitor is doing and why it matters.

## Headline Guidelines

- One sentence, under 12 words
- Start with "[Company] is [verb-ing]..."
- State what they're doing and the consequence‚Äîskip the mechanism/how
- Write in plain language‚Äîif it sounds like jargon, rewrite it
- No filler adjectives (aggressively, strategically, actively)
- No business jargon (GTM, leverage, scale, accelerate, lock-in)
- No period at the end
- Do not say "our" or "we"‚Äîthis is for a client
- Be specific‚Äîgeneralizations don't help

Just provide the title as your response, nothing else.

## Good Examples

- Square is freezing merchant accounts and holding funds for 90+ days
- DoorDash is using vague policies to avoid refunds and shift costs to restaurants
- DoorDash is recruiting liquor stores to grow selection and block rivals
- Uber Eats is using its scale to pressure restaurants into higher fees

## Bad Examples

- "Square is freezing merchant accounts with automated triggers and holding funds for 90+ days to limit fraud risk." (too long, includes mechanism)
- "DoorDash: Leveraging Policy Ambiguity to Reduce Payout Exposure" (jargon, colon format)
- "Aggressively targeting alcohol merchants" (filler adjective, no company, no consequence)
- "New product features announced." (generic, passive, has period)

            ---
            
            ## Brief to Summarize

            {row['details']}
            """
        # print(prompt)
        tasks.append(fetch_response(prompt, row["title"], row["id"], progress))

    results = await asyncio.gather(*tasks)
    return results

signal_details = await process_all(existing_signals)
signal_details_df = pd.DataFrame(signal_details)
signal_details_df.head(50)

‚úÖ Completed 1/931 (0%)
‚úÖ Completed 2/931 (0%)
‚úÖ Completed 3/931 (0%)
‚úÖ Completed 4/931 (0%)
‚úÖ Completed 5/931 (1%)
‚úÖ Completed 6/931 (1%)
‚úÖ Completed 7/931 (1%)
‚úÖ Completed 8/931 (1%)
‚úÖ Completed 9/931 (1%)
‚úÖ Completed 10/931 (1%)
‚úÖ Completed 11/931 (1%)
‚úÖ Completed 12/931 (1%)
‚úÖ Completed 13/931 (1%)
‚úÖ Completed 14/931 (2%)
‚úÖ Completed 15/931 (2%)
‚úÖ Completed 16/931 (2%)
‚úÖ Completed 17/931 (2%)
‚úÖ Completed 18/931 (2%)
‚úÖ Completed 19/931 (2%)
‚úÖ Completed 20/931 (2%)
‚úÖ Completed 21/931 (2%)
‚úÖ Completed 22/931 (2%)
‚úÖ Completed 23/931 (2%)
‚úÖ Completed 24/931 (3%)
‚úÖ Completed 25/931 (3%)
‚úÖ Completed 26/931 (3%)
‚úÖ Completed 27/931 (3%)
‚úÖ Completed 28/931 (3%)
‚úÖ Completed 29/931 (3%)
‚úÖ Completed 30/931 (3%)
‚úÖ Completed 31/931 (3%)
‚úÖ Completed 32/931 (3%)
‚úÖ Completed 33/931 (4%)
‚úÖ Completed 34/931 (4%)
‚úÖ Completed 35/931 (4%)
‚úÖ Completed 36/931 (4%)
‚úÖ Completed 37/931 (4%)
‚úÖ Completed 38/931 (4%)
‚úÖ Completed 39/931 

Unnamed: 0,title,id,response
0,DoorDash is hiring Tesla's robotics leader to ...,52646,DoorDash is adding Tesla's former robotics VP ...
1,Skanska is hiring sustainability roles to win ...,2809,Skanska is embedding sustainability roles in p...
2,Skanska is building Skanska Direkt to capture ...,2808,Skanska is targeting small projects in Sweden ...
3,Skanska is hiring cloud and IT talent to moder...,2807,Skanska is hiring project leaders to deliver c...
4,Skanska is hiring heavy‚Äëcivil leaders to expan...,2806,Skanska is recruiting senior civil constructio...
5,Turner is reinforcing project engineering with...,2805,Turner is hiring senior engineers with BIM ski...
6,Turner is hiring field security and survey tec...,2804,Turner is posting confused job descriptions th...
7,Turner is hiring HR admins to scale regional p...,2803,Turner is hiring HR staff to capture early-car...
8,MX Build is offering brand-voice AI templates,2802,"SMBs are comparing Claude, Gemini and Perplexi..."
9,MX Build is consolidating AI for quoting and o...,2801,Small businesses are abandoning ChatGPT for AI...


In [48]:
def update_signal_details(df):
    for _, row in df.iterrows():
        signal_id = int(row["id"])
        output = row["response"]
        # summary = row["summary"]

        if not signal_id or pd.isna(signal_id):
            print(f"‚ö†Ô∏è Skipping row with no signal_id: {row}")
            continue

        try:
            resp = (
                supabase.table("signals")
                .update({
                        "title": output,
                        # "details": output,
                    })
                .eq("id", signal_id)
                .execute()
            )
            # print(f"‚úÖ Updated signal_id {signal_id}")
        except Exception as e:
            print(f"‚ùå Error updating signal_id {signal_id}: {e}")

# Run updates
update_signal_details(signal_details_df)
print("Signals updated...")

Signals updated...


### 