# Setup

In [1]:
limit_number = 100

In [2]:
from datetime import datetime

print(
    f"‚úÖ Finished running signals feed at "
    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)

‚úÖ Finished running signals feed at 2026-02-09 09:09:52


## Import libraries

In [3]:
from dotenv import load_dotenv
load_dotenv()

import requests
import re
import json
import pandas as pd
import numpy as np
import asyncio
import anthropic
from anthropic import AsyncAnthropic
import pyperclip
from supabase import create_client, Client
import time
from typing import List, Dict
from google import genai
from openai import OpenAI
import asyncio
from openai import AsyncOpenAI
import math
from datetime import datetime, timezone
import os
from bs4 import BeautifulSoup
from newspaper import Article
from playwright.sync_api import sync_playwright
import json5
import ast
import asyncio
import google.generativeai as genai


# Supabase API
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_KEY = os.environ["SUPABASE_KEY"]
SERVICE_ROLE_KEY = os.environ["SUPABASE_SERVICE_ROLE_KEY"]

supabase: Client = create_client(SUPABASE_URL, SERVICE_ROLE_KEY)

## Clean up JSON function

In [4]:
def safe_json_loads(x):
    if not isinstance(x, str):
        return x
    
    # Fix invalid \uXXXX escapes ‚Äî replace with a safe placeholder
    x = re.sub(r'\\u(?![0-9a-fA-F]{4})', r'\\uFFFF', x)

    try:
        return json.loads(x)
    except Exception as e:
        print("‚ùå JSON decode failed:", e)
        print("Offending value:", x[:300])
        return None

## Initiate AI models

In [5]:
ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
OPENAI_MODEL = "gpt-5-mini"

# OpenAI key
openai_api_key = os.environ["OPENAI_API_KEY"]

# Anthropic
anthropic_api_key = os.environ["ANTHROPIC_API_KEY"]
client = anthropic.Anthropic(
    api_key=anthropic_api_key,
)

# Gemini Key
google_api_key = os.environ["GOOGLE_API_KEY"]

# System prompt
system = f"""

You are a research analyst doing competitive intelligence research for a client.

"""

# Grab data

In [6]:
def fetch_all_rows(table, filters=None, batch_size=1000):
    all_rows = []
    start = 0

    while True:
        query = supabase.table(table).select("*").order("id", desc=True)

        # Apply filters if provided
        if filters:
            for col, val in filters.items():
                if isinstance(val, list):
                    query = query.in_(col, val)
                else:
                    query = query.eq(col, val)

        # Pagination block
        query = query.range(start, start + batch_size - 1)

        resp = query.execute()
        data = resp.data or []
        all_rows.extend(data)

        # Stop if fewer than batch_size returned
        if len(data) < batch_size:
            break

        start += batch_size

    return all_rows

tables = ["news_feed", "linkedin_feed", "reddit_posts", "jobs", "ad_library", "companies", "competitors", "feed_summaries"]
feeds = {}

for table in tables:
    # Apply filters only for content tables
    if table not in ["companies", "competitors", "feed_summaries"]:
        filters = {
            "relevant": True,
            # "processed": False
        }
    else:
        filters = None

    rows = fetch_all_rows(table, filters=filters)
    feeds[table] = pd.DataFrame(rows)

    print(f"‚úÖ Retrieved {len(rows)} rows from {table}." if rows else f"‚ö†Ô∏è No rows in {table}.")

# ---------- Assign DataFrames ----------
news_feed = feeds["news_feed"]
linkedin_feed = feeds["linkedin_feed"]
reddit_posts = feeds["reddit_posts"]
jobs = feeds["jobs"]
ads = feeds["ad_library"]

feed_summaries = feeds["feed_summaries"]
companies = feeds["companies"]
competitors = feeds["competitors"]

‚úÖ Retrieved 5388 rows from news_feed.
‚úÖ Retrieved 3897 rows from linkedin_feed.
‚úÖ Retrieved 508 rows from reddit_posts.
‚úÖ Retrieved 2561 rows from jobs.
‚úÖ Retrieved 4564 rows from ad_library.
‚úÖ Retrieved 12 rows from companies.
‚úÖ Retrieved 60 rows from competitors.
‚úÖ Retrieved 140 rows from feed_summaries.


# Combine data feeds into one dataframe

## Add company id to dfs

In [7]:
# Merge company status into competitors
competitors_with_status = competitors.merge(
    companies[["id", "status"]],
    left_on="company_id",
    right_on="id",
    how="left",
    suffixes=("", "_company")
)

# Build lookup
competitor_lookup = competitors_with_status.set_index("id")[["company_id", "competitor_name", "status"]].to_dict(orient="index")

def append_company_and_competitor(df):
    # üß§ Handle empty DataFrame early
    if df.empty:
        print(f"‚ö†Ô∏è DataFrame is empty ‚Äî skipping append.")
        return df

    if "competitor_id" not in df.columns:
        print("‚ö†Ô∏è No 'competitor_id' column ‚Äî skipping append.")
        return df

    df["company_id"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("company_id", 0))
    df["competitor_name"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("competitor_name", "Unknown"))
    df["status"] = df["competitor_id"].map(lambda x: competitor_lookup.get(x, {}).get("status", "Unknown"))
    df["company_id"] = df["company_id"].astype("Int64")
    return df

# Apply to feeds
news_feed = append_company_and_competitor(news_feed)
linkedin_feed = append_company_and_competitor(linkedin_feed)
jobs = append_company_and_competitor(jobs)
ads = append_company_and_competitor(ads)
print(f"Company ids added to dataframes...")

Company ids added to dataframes...


## Convert news feed to json ready df

In [8]:
news_feed.head(1)

Unnamed: 0,id,created_at,title,url,description,competitor_id,published_date,push_to_feed,relevance_descrip,relevant,processed,content,thumbnail,insight,company_id,publisher,display_date,competitor_name,status
0,63752,2026-02-09T11:01:22.955102+00:00,Brooks Nader‚Äôs DoorDash Ad Has Alix Earle Sayi...,https://www.yahoo.com/entertainment/celebrity/...,Brooks Nader's latest ad for DoorDash has ever...,158.0,2026-02-07T13:15:45+00:00,,Ad featuring DoorDash,True,False,Powered by Yahoo Scout. Yahoo is using AI to g...,https://news.google.com/api/attachments/CC8iK0...,DoorDash continues to enhance brand visibility...,66,Yahoo,2026-02-07,DoorDash,trial


In [9]:
news_feed_json = pd.DataFrame()

if news_feed is not None and not news_feed.empty:
    # Ensure required columns exist
    required_cols = ["id", "competitor_id", "company_id", "url", "title", "content", "description"]
    missing_cols = [col for col in required_cols if col not in news_feed.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in news_feed: {missing_cols}")
    else:
        news_feed_json["id"] = news_feed["id"]
        news_feed_json["competitor_id"] = news_feed["competitor_id"]
        news_feed_json["company_id"] = news_feed["company_id"]
        news_feed_json["content"] = news_feed["content"]
        news_feed_json["display_date"] = news_feed["display_date"]
        news_feed_json["source"] = "news"

        news_feed_json["content_json"] = news_feed.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "url": row["url"],
                "title": row["title"],
                # "content": row["content"],
                "description": row["description"],
                "competitor_id": row["competitor_id"],
                "source": "news"
            }),
            axis=1
        )

        print(f"‚úÖ Newsfeed content converted to json for {len(news_feed_json)} records.")
else:
    print("‚ö†Ô∏è news_feed is empty ‚Äî skipping JSON conversion.")

‚úÖ Newsfeed content converted to json for 5388 records.


## Convert linkedin feed to json ready df

In [10]:
linkedin_feed.head(1)

Unnamed: 0,id,created_at,postUrl,author_id,text,author_fullName,author_profile_pic,competitor_id,headline,postedDate,...,alert_response,processed,relevant,relevance_descrip,insight,push_to_feed,display_date,company_id,competitor_name,status
0,30362,2026-02-09T11:05:19.833765+00:00,https://www.linkedin.com/posts/louise-wills-09...,83358831.0,Organizations that are deploying specialized A...,Louise Wills,https://media.licdn.com/dms/image/v2/C5603AQFF...,152.0,Helping my customers to be the best that they ...,2026-02-09T00:00:00+00:00,...,,False,True,AI adoption related to health,Emphasis on investment in AI roles accelerates...,,2026-02-09,23,Oracle Health,active


In [11]:
linkedin_feed_json = pd.DataFrame()

if linkedin_feed is not None and not linkedin_feed.empty:
    # Ensure required columns exist
    required_cols = ["id", "competitor_id", "author_fullName", "text", "postUrl"]
    missing_cols = [col for col in required_cols if col not in linkedin_feed.columns]
    
    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in linkedin_feed: {missing_cols}")
    else:
        linkedin_feed_json["id"] = linkedin_feed["id"]
        linkedin_feed_json["competitor_id"] = linkedin_feed["competitor_id"]
        linkedin_feed_json["company_id"] = linkedin_feed["company_id"]
        linkedin_feed_json["display_date"] = linkedin_feed["display_date"]
        linkedin_feed_json["source"] = "linkedin"

        linkedin_feed_json["content_json"] = linkedin_feed.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "author_fullName": row["author_fullName"],
                "text": row["text"],
                "url": row["postUrl"],
                "competitor_id": row["competitor_id"],
                "source": "linkedin"
            }),
            axis=1
        )
        print("‚úÖ Linkedin content converted to json...")
else:
    print("‚ö†Ô∏è linkedin_feed is empty ‚Äî skipping JSON conversion")

‚úÖ Linkedin content converted to json...


## Convert reddit feed to json ready df

In [12]:
reddit_posts.head(1)

Unnamed: 0,id,created_at,post_title,post_url,post_created_utc,post_id,company_id,subreddit,relevant,post_selftext,post_author,push_to_feed,matched_keywords,processed,relevance_descrip,insight,display_date
0,26796,2026-02-09T11:17:17.220735+00:00,[For Hire] HealthTech dev team ready to suppor...,https://www.reddit.com/r/HealthTech/comments/1...,2026-02-09T09:06:30+00:00,1qzzfz7,23,healthtech,True,"Hi everyone,\n\nI work with a HealthTech-focus...",Feisty_Honeydew_2866,,[healthtech],False,"Discusses EHR integrations, relevant expertise",EHR integrations and AI-driven workflows are k...,2026-02-09


In [13]:
reddit_posts_json = pd.DataFrame()

if reddit_posts is not None and not reddit_posts.empty:
    required_cols = ["id", "company_id", "post_selftext", "post_url"]
    missing_cols = [col for col in required_cols if col not in reddit_posts.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in reddit_posts: {missing_cols}")
    else:
        reddit_posts_json["id"] = reddit_posts["id"]
        reddit_posts_json["competitor_id"] = reddit_posts["company_id"]
        reddit_posts_json["company_id"] = reddit_posts["company_id"]
        reddit_posts_json["display_date"] = reddit_posts["display_date"]
        reddit_posts_json["source"] = "reddit"

        reddit_posts_json["content_json"] = reddit_posts.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "text": row["post_selftext"],
                "url": row["post_url"],
                "competitor_id": row["company_id"],
                "company_id": row["company_id"],
                "source": "reddit"
            }),
            axis=1
        )

        print(f"‚úÖ {len(reddit_posts_json)} Reddit records converted...")
else:
    print("‚ö†Ô∏è reddit_posts is empty ‚Äî skipping JSON conversion.")

‚úÖ 508 Reddit records converted...


## Convert jobs feed to json ready df

In [14]:
jobs.head(1)

Unnamed: 0,id,created_at,title,description,url,job_id,competitor_id,postedAt,key_insights,push_to_feed,processed,relevant,relevance_descrip,insight,display_date,company_id,competitor_name,status
0,9214,2026-02-09T11:05:31.776446+00:00,Territory Sales Representative / Restaurant Sp...,About SpotOn We‚Äôre not just building restauran...,https://www.linkedin.com/jobs/view/4368047543/,,157,2026-02-02,,,False,True,Job related to restaurant services,SpotOn emphasizes high-touch service to boost ...,2026-02-02,66,SpotOn,trial


In [15]:
jobs_json = pd.DataFrame()

if jobs is not None and not jobs.empty:
    required_cols = ["id", "competitor_id", "company_id", "title", "description", "url"]
    missing_cols = [col for col in required_cols if col not in jobs.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in jobs: {missing_cols}")
    else:
        jobs_json["id"] = jobs["id"]
        jobs_json["competitor_id"] = jobs["competitor_id"]
        jobs_json["company_id"] = jobs["company_id"]
        jobs_json["display_date"] = jobs["display_date"]
        jobs_json["source"] = "jobs"

        jobs_json["content_json"] = jobs.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "title": row["title"],
                "description": row["description"],
                "url": row["url"],
                "competitor_id": row["competitor_id"],
                "source": "jobs"
            }),
            axis=1
        )

        print(f"‚úÖ {len(jobs_json)} job postings converted to json...")
else:
    print("‚ö†Ô∏è jobs is empty ‚Äî skipping JSON conversion.")

‚úÖ 2561 job postings converted to json...


## Convert ads feed to json ready df

In [16]:
ads_json = pd.DataFrame()

if ads is not None and not ads.empty:
    required_cols = ["id", "competitor_id", "json_response", ]
    missing_cols = [col for col in required_cols if col not in ads.columns]

    if missing_cols:
        print(f"‚ö†Ô∏è Missing columns in jobs: {missing_cols}")
    else:
        ads_json["id"] = ads["id"]
        ads_json["competitor_id"] = ads["competitor_id"]
        ads_json["company_id"] = ads["company_id"]
        ads_json["display_date"] = ads["display_date"]
        ads_json["source"] = "ads"

        ads_json["content_json"] = ads.apply(
            lambda row: json.dumps({
                "content_id": row["id"],
                "json_response": row["json_response"],
                "source": "ads"
            }),
            axis=1
        )

        print(f"‚úÖ {len(ads_json)} ads converted to json...")
else:
    print("‚ö†Ô∏è ads is empty ‚Äî skipping JSON conversion.")

‚úÖ 4564 ads converted to json...


## Combine dataframes into one

In [17]:
data_feed_combined = pd.concat([
    news_feed_json,
    linkedin_feed_json,
    reddit_posts_json,
    jobs_json,
    ads_json
], ignore_index=True)
print(f"‚úÖ Data frame combination completed with {len(data_feed_combined)} rows created")

‚úÖ Data frame combination completed with 16918 rows created


## Add company description to df

In [18]:
df = data_feed_combined.merge(
    companies[['id', 'company_custom_prompt']],
    left_on='company_id',
    right_on='id',
    how='left'
)

df = (
    df
    .drop(columns=['id_y'])
    .rename(columns={'id_x': 'id'})
)
df
data_feed_combined = df
print(f"{len(data_feed_combined)}")

16918


## Group datafeeds together to prep for LLM

In [19]:
data_feed = (
    data_feed_combined
    .groupby(["display_date","company_id"], as_index=False)
    .agg({"content_json": list})
)
print(f"‚úÖ Dataframe grouped together with {len(data_feed)} rows created...")

‚úÖ Dataframe grouped together with 3045 rows created...


In [20]:
data_feed = data_feed.sort_values(
    by="display_date",
    ascending=False
)
data_feed.head(10)

Unnamed: 0,display_date,company_id,content_json
3044,2026-02-09,73,"[{""content_id"": 63476, ""url"": ""https://news.ci..."
3043,2026-02-09,72,"[{""content_id"": 9135, ""title"": ""Mobile QA Auto..."
3042,2026-02-09,71,"[{""content_id"": 9165, ""title"": ""Volunteer Seni..."
3041,2026-02-09,66,"[{""content_id"": 63617, ""url"": ""https://www.pro..."
3040,2026-02-09,23,"[{""content_id"": 63201, ""url"": ""https://www.web..."
3039,2026-02-08,73,"[{""content_id"": 73620, ""json_response"": ""{\""cr..."
3038,2026-02-08,72,"[{""content_id"": 9036, ""title"": ""SEO/GEO Specia..."
3037,2026-02-08,71,"[{""content_id"": 62868, ""url"": ""https://www.new..."
3036,2026-02-08,66,"[{""content_id"": 63737, ""url"": ""https://tribune..."
3035,2026-02-08,23,"[{""content_id"": 62995, ""url"": ""https://www.gam..."


### Pull latest date when feed summary was last ran

In [21]:
most_recent_date = feed_summaries["date"].max()
most_recent_date

'2026-02-06'

In [22]:
most_recent_data_feed = data_feed[
    data_feed["display_date"] > most_recent_date
]
most_recent_data_feed.head(10)

Unnamed: 0,display_date,company_id,content_json
3044,2026-02-09,73,"[{""content_id"": 63476, ""url"": ""https://news.ci..."
3043,2026-02-09,72,"[{""content_id"": 9135, ""title"": ""Mobile QA Auto..."
3042,2026-02-09,71,"[{""content_id"": 9165, ""title"": ""Volunteer Seni..."
3041,2026-02-09,66,"[{""content_id"": 63617, ""url"": ""https://www.pro..."
3040,2026-02-09,23,"[{""content_id"": 63201, ""url"": ""https://www.web..."
3039,2026-02-08,73,"[{""content_id"": 73620, ""json_response"": ""{\""cr..."
3038,2026-02-08,72,"[{""content_id"": 9036, ""title"": ""SEO/GEO Specia..."
3037,2026-02-08,71,"[{""content_id"": 62868, ""url"": ""https://www.new..."
3036,2026-02-08,66,"[{""content_id"": 63737, ""url"": ""https://tribune..."
3035,2026-02-08,23,"[{""content_id"": 62995, ""url"": ""https://www.gam..."


### Run through LLM

In [23]:
client = AsyncOpenAI(api_key=openai_api_key)
MODEL_NAME = OPENAI_MODEL
MAX_CONCURRENCY = 100

semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
lock = asyncio.Lock()

async def fetch_response(prompt, company_id, display_date, progress):
    async with semaphore:
        try:
            response = await client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}]
            )
            text = response.choices[0].message.content
        except Exception as e:
            text = None
            print(f"‚ö†Ô∏è Error for company_id {company_id}: {e}")

        async with lock:
            progress["done"] += 1
            done = progress["done"]
            total = progress["total"]
            print(f"‚úÖ Completed {done}/{total} ({done/total:.0%})")

        return {
            "display_date": display_date,
            "company_id": company_id,
            "response": text
        }

async def process_all(df):
    progress = {"done": 0, "total": len(df)}
    tasks = []

    for _, row in df.iterrows():
        prompt = f"""

You are a competitive intelligence analyst briefing C-suite executives.
Your job is to surface signals that could affect strategic decisions in the next 30-90 days.
Executives have 60 seconds‚Äîlead with what matters most.

## Content
Here is the content (including titles and sources):
{row['content_json']}

## Critical Grounding Rules
- **ONLY extract insights explicitly present in the content above**
- **ONLY use URLs provided in the content‚Äînever generate or infer URLs**
- If the content contains fewer than 3 meaningful signals, return only what exists
- If the content contains no actionable competitive signals, respond: "No significant competitive signals in today's update."
- Never fabricate, infer, or hallucinate sources, companies, or insights not explicitly stated

## Directions
- Extract up to THREE of the most critical competitive signals from this daily update
- Prioritize: product launches, pricing changes, strategic pivots, market moves
- Lead each bullet with company name, then **bold the key insight**
- The bolded insight must be a **short headline phrase**, not a clause or sentence
- Do NOT bold supporting detail, explanations, or qualifiers
- Keep it to one continuous sentence‚Äîno dashes or arrows
- The bolded insight should communicate the core "so what" at a glance
- Keep bullets to **10‚Äì15 words max** (excluding source link)
- **Consolidate related updates from the same company into one bullet**
- If updates are unrelated, use separate bullets
- **When referencing two companies together, use & not /** (e.g., Uber & DoorDash)
- Drop generic updates (UX improvements, minor ops) unless they signal strategy
- If possible provide a mix of insights from the various sources (job insights, ads, news articles, reddit threads)
- **Always attribute the insight to where it was found**

## Before responding, verify that:
- Every bullet references content explicitly provided above
- Every URL matches a URL from the input content
- There are no more than 3 bullets
- No extra lines exist outside those bullets

If any bullet references external information not in the content, delete it.

## Source Attribution Rules
- Append a source reference at the end of each bullet
- **Use ONLY URLs provided in the content above‚Äîdo not generate URLs**
- Render the source as a **Markdown link** with a short readable label
- If the source is an ad campaign, the hyperlink text should be the name of the channel followed by 'ad' (e.g., Facebook Ad)
- Do not include tracking parameters or long query strings in URLs

## Format
Company **2‚Äì5 word insight in bold** with supporting detail in same sentence [Source](URL)

## Good Examples
- NYC **tip law sticks with 10%** default mandate now in effect [NYC ruling](https://www.nyc.gov/site/dca/index.page)
- Square **sales hiring surge** with senior enterprise roles added nationwide [Job postings](https://block.com/careers)
- ServiceTitan **Insurance Queue live** with AmeriPro Roofing as first customer for roofing claims [LinkedIn Ad](https://www.linkedin.com/ad-library/detail/1122486033)

## Bad Examples
- NYC **tip law sticks with 10% default mandate now in effect** (too many bolded words)
- Square **ramping merchant acquisition through senior sales hires and ad spend** (bold is a clause)
- **DoorDash** expanding retail with Hibbett (bold should be insight, not company)

Provide output in markdown.

        """
        tasks.append(
            fetch_response(
                prompt,
                row["company_id"],
                row["display_date"],
                progress
            )
        )

    results = await asyncio.gather(*tasks)
    return results

results = await process_all(most_recent_data_feed)
summaries_df = pd.DataFrame(results)

‚úÖ Completed 1/15 (7%)
‚úÖ Completed 2/15 (13%)
‚úÖ Completed 3/15 (20%)
‚úÖ Completed 4/15 (27%)
‚úÖ Completed 5/15 (33%)
‚úÖ Completed 6/15 (40%)
‚úÖ Completed 7/15 (47%)
‚úÖ Completed 8/15 (53%)
‚úÖ Completed 9/15 (60%)
‚úÖ Completed 10/15 (67%)
‚úÖ Completed 11/15 (73%)
‚úÖ Completed 12/15 (80%)
‚úÖ Completed 13/15 (87%)
‚úÖ Completed 14/15 (93%)
‚úÖ Completed 15/15 (100%)


# Prep data to write back to Supabase

## Send to supabase

In [24]:
filtered_df = summaries_df[
    summaries_df["company_id"].notna() &
    (summaries_df["company_id"] != 0)
]

rows = []

for _, row in filtered_df.iterrows():
    rows.append({
        "company_id": int(row["company_id"]),
        "date": row["display_date"],
        "summary": row["response"],
    })

response = (
    supabase
    .table("feed_summaries")
    .insert(rows)
    .execute()
)

print(f"Insert complete ‚Äî {len(rows)} rows inserted")

Insert complete ‚Äî 15 rows inserted


## Commented code to use 2.5 pro LLM

In [25]:
# import google.generativeai as genai

# genai.configure(api_key=google_api_key)

# # SWITCHED TO PRO
# MODEL_NAME = "gemini-2.5-pro"

# MAX_CONCURRENCY = 50

# semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
# lock = asyncio.Lock()

# # 2. Initialize the model with JSON mode enabled
# model = genai.GenerativeModel(
#     model_name=MODEL_NAME,
#     generation_config={
#         "temperature": 0,
#         "response_mime_type": "text/plain"
#     }
# )

# async def fetch_response(prompt, headline, signal_id, progress):
#     async with semaphore:
#         text = None
#         try:
#             response = await model.generate_content_async(prompt)
#             text = response.text
             
#         except Exception as e:
#             text = None
#             print(f"‚ö†Ô∏è Error for id {headline}: {e}")

#         async with lock:
#             progress["done"] += 1
#             print(f"‚úÖ Completed {progress['done']}/{progress['total']}")

#         return {
#             "headline": headline,
#             "signal_id" : signal_id,
#             "response": text
#         }

# async def process_all(df):
#     total = len(df)
#     progress = {"done": 0, "total": total}
#     tasks = []

#     for _, row in df.iterrows():
#         prompt = f"""
#             Analyze the following content and produce a detailed competitive-intelligence extract written in clean, valid Markdown.
#             Don't say things like "we" or "our", this is for a client, you aren't writing this as if you are part of the team.
#             Use bullets for everything, do not number anything.
            
#             You are a competitive analyst extracting actionable intelligence for strategic decision-makers who need to understand:
#             1. What the competitor is doing
#             2. Why it matters to us
#             3. What we should watch or do about it
#             4. Make sure the insights are focused on the competitor(s) mentioned in the title and summary section
            
#             Your output must follow these formatting rules:
#             - Use ## for all major section headings
#             - Use standard markdown bullet points (- or *) for all lists
#             - Keep bullets SHORT - one clear point per bullet, ideally one sentence max
#             - Bold key phrases using **double asterisks** to enable skimming
#             - each heading should only have 3-5 key points

#             CITATION STRUCTURE - STRICT RULE
#             When citing specific facts, quotes, or claims, you MUST use this exact format:
            
#             [text](URL)
            
#             Do NOT use any other format as it will break the frontend of the app.
            
#             - Example: "Freshworks reports [15% revenue growth](https://...) during 2025 Q3.*"
            
#             REQUIRED STRUCTURE (in this exact order):

#             ## Overview
            
#             [2-3 bullet points the strategic implication, their vulnerabilities, and recommended competitive response]
            
#             ## What You Need to Know
            
#             [3-4 bullet points that captures the competitive situation, momentum, and key context]
            
#             ## The Threat to Watch

#             - Short, punchy bullets (1-2 sentences each)
#             - 3-5 key competitive threats or moves
#             - Focus on impact to your business
#             - Call out strategic bets, resource allocation, pricing/GTM tactics
#             - Note capability gaps or weaknesses
            
#             ## What to Monitor
            
#             - Short bulleted items - one specific signal per line
#             - 3-5 concrete, actionable monitoring points
#             - Each should be scannable at a glance
            
#             ADDITIONAL GUIDANCE:
#             - Include relevant financial metrics, growth rates, or market position data
#             - Note product/technology bets and positioning claims
#             - Highlight partnership or GTM initiatives
#             - Identify execution risks or organizational challenges
#             - Every bullet should be independently useful - no filler
#             - Source links should be linked to the actual text inline
#                 - Example: "Freshworks reports [15% revenue growth](https://...) during 2025 Q3.*"

#             CITATION FORMAT (STRICT ‚Äî WRAP THE CLAIM TEXT)
            
#             ‚úÖ Correct:
#             - Freshworks reports [15% revenue growth](https://...) during 2025 Q3.
#             - Oracle is hiring to scale [a global, personalized health ecosystem](https://...)
#             - The company launched [‚ÄúAutopilot for Finance‚Äù](https://...) for mid-market teams.
            
#             Aim for 250-350 words total. Optimize for speed-reading and scannability.
            
#             Now analyze this content:

#             {row['content_json']}
#             """
#         # print(prompt)
#         tasks.append(fetch_response(prompt, row["headline"], row["signal_id"], progress))

#     results = await asyncio.gather(*tasks)
#     return results

# signal_enrichment = await process_all(signal_content_grouped)
# signal_enrichment_df = pd.DataFrame(signal_enrichment)