In [1]:
import pandas as pd
import os
from openai import OpenAI
import json
from pydantic import BaseModel, Field
from typing import List, Optional
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

from dotenv import load_dotenv
load_dotenv()

True

# Helper functions

In [2]:
def chat_completion(client, prompt, response_format=None):
    messages = [
        {
            "role": "user",
            "content": prompt
        }
    ]
    
    kwargs = {
        "model": "gpt-4o", # deepseek/deepseek-r1:free
        "messages": messages,
    }
    
    if response_format:
        kwargs["response_format"] = response_format
        response = client.beta.chat.completions.parse(**kwargs)
    else:
        response = client.chat.completions.create(**kwargs)

    return response.choices[0]


In [41]:
def Job_Pool(func, *iterables, njobs=4):
    with ThreadPoolExecutor(max_workers=njobs) as executor:
        results = list(tqdm(executor.map(func, *iterables), total=len(iterables[0])))
    return results

# Read in Data

In [42]:
df = pd.read_csv("../data/data.csv")
df['Published Date'] = pd.to_datetime(df['Published Date'])

In [43]:
df['FullContent'] = df['Title'] + "\n" + df['Published Date'].apply(lambda x: x.strftime("%Y-%m-%d")) + "\n" + df['Content']

# Event Detection using LLM

In [44]:
# client = OpenAI(
#   base_url="https://openrouter.ai/api/v1",
#   api_key=os.getenv("OPENROUTER_API_KEY"),
# )

client = OpenAI()

In [45]:
class EventArgument(BaseModel):
    """Schema for event-related entities."""
    agent: Optional[str] = Field(None, description="Who caused the event?")
    object: Optional[str] = Field(None, description="What was affected?")
    location: Optional[str] = Field(None, description="Where did it happen?")
    time: Optional[str] = Field(None, description="When did it happen? Give the date in the format YYYY-MM-DD where possible.")
    counterpart: Optional[str] = Field(None, description="Who else was involved?")
    impact_summary: Optional[str] = Field(None, description="What is the significance or consequence of this event?")
    cause: Optional[str] = Field(None, description="What explicitly led to this event?")

class EventDependency(BaseModel):
    """Schema for event relationships."""
    event_id: int = Field(..., description="ID of the related event")
    relation_type: str = Field(..., description="Type of relationship between events")
    description: Optional[str] = Field(None, description="Description of how the events are related")

class Event(BaseModel):
    """Schema for detected events and their components."""
    event_id: int = Field(..., description="Unique identifier for the event")
    event_type: str = Field(..., description="The type of event detected (e.g., Economic Policy, Market Crash, Political Decision).")
    trigger: str = Field(..., description="The main word(s) triggering the event.")
    event_summary: str = Field(..., description="A structured, full-sentence summary of the event capturing its broader context.")
    arguments: EventArgument = Field(..., description="Structured entities related to the event.")
    dependencies: List[EventDependency] = Field(description="Relationships with other events")

class EventResponse(BaseModel):
    """Schema for a collection of detected events."""
    events: List[Event]

In [46]:
prompt = """
You are an advanced NLP system specializing in event detection, trigger extraction, summarization, and relationship analysis of news events.

Your task is to extract structured events from the given news article and output them in JSON format.

## Extraction Rules:
1. Identify key events (e.g., economic policy, political decisions, disasters, agreements, financial impact).
2. Extract the event trigger (a key phrase indicating the event).
3. Generate a complete event summary in one sentence, capturing the full context.
4. Extract event arguments:
   - Agent (Who caused the event?)
   - Object (What was affected?)
   - Location (Where did it happen?)
   - Time (When did it happen?)
   - Counterpart (Who else was involved?)
   - Impact Summary (Why is this event important?)
   - Cause (What explicitly led to this event?)

5. Identify relationships between events using these types:
   - TRIGGERED: Event A directly caused Event B
   - INFLUENCED: Event A indirectly impacted Event B
   - CONSEQUENCE_OF: Event A occurred as a direct consequence of Event B
   - RESPONSE_TO: Event A was a reaction to Event B
   - ESCALATED_FROM: Event A worsened an existing situation caused by Event B
   - RELATED_TO: Event A is contextually related to Event B without direct causation

### Example Input
"The Federal Reserve raised interest rates by 0.5%, causing the stock market to plunge. Investors reacted negatively, leading to a sell-off."

### Expected Output (Structured JSON)
{{
  "events": [
    {{
      "event_id": 1,
      "event_type": "Economic Policy",
      "trigger": "raised interest rates",
      "event_summary": "The Federal Reserve increased interest rates by 0.5%, leading to stock market volatility and investor uncertainty.",
      "arguments": {{
        "agent": "Federal Reserve",
        "object": "interest rates",
        "location": "United States",
        "time": "2025-02-01",
        "counterpart": "Stock Market",
        "impact_summary": "Investors reacted negatively, increasing uncertainty in financial markets.",
        "cause": "Federal Reserve's monetary policy decision"
      }},
      "dependencies": [
        {{
          "event_id": 2,
          "relation_type": "TRIGGERED",
          "description": "Interest rate hike directly caused market decline"
        }}
      ]
    }},
    {{
      "event_id": 2,
      "event_type": "Market Crash",
      "trigger": "plunged",
      "event_summary": "Investor fears over rising interest rates led to a sharp decline in stock prices, causing a significant sell-off.",
      "arguments": {{
        "agent": "Investors",
        "object": "stock market",
        "location": "United States",
        "time": "2025-02-02",
        "counterpart": null,
        "impact_summary": "Market losses wiped out $500 billion in value, impacting institutional and retail investors.",
        "cause": "Federal Reserve's interest rate increase"
      }},
      "dependencies": [
        {{
          "event_id": 1,
          "relation_type": "CONSEQUENCE_OF",
          "description": "Market decline was a direct consequence of interest rate hike"
        }}
      ]
    }}
  ]
}}

News article:
```
{article}
```

Take a deep breath and work on this step by step:
1. First, identify all distinct events in the article
2. For each event, extract its components (type, trigger, summary, arguments)
3. Finally, analyze how the events are related to each other
""".strip()



In [50]:
def get_event(article):
    try:
        response = chat_completion(client, prompt.format(article=article), response_format=EventResponse) 
    except Exception as e:
        print(e)
        return None
    
    try:
        json_data = json.loads(response.message.content)
    except Exception as e:
        print(e)
        json_data = None

    return json_data

In [51]:
# print(prompt.format(article=df['FullContent'].values[0]))

In [52]:
events = Job_Pool(get_event, df['FullContent'].values, njobs=15)


 46%|████▋     | 3551/7674 [59:18<54:38,  1.26it/s]  

the JSON object must be str, bytes or bytearray, not NoneType


100%|██████████| 7674/7674 [2:02:18<00:00,  1.05it/s]  


TypeError: 'NoneType' object is not subscriptable

In [61]:
events2 = [event['events'] if event is not None else [] for event in events]

In [62]:
df['events'] = events2

In [63]:
df.to_json('../data/events.json', orient='records')


In [60]:
df

Unnamed: 0,Title,Source,Author,Published Date,URL,Country,Content,FullContent,events
0,Fed officials warn of inflation risks from tar...,Straits Times,,2025-02-03 23:25:00+00:00,https://www.straitstimes.com/business/economy/...,Singapore,WASHINGTON - The Trump administration’s plan f...,Fed officials warn of inflation risks from tar...,"[{'event_id': 1, 'event_type': 'Economic Warni..."
1,While You Were Sleeping: 5 stories you might h...,Straits Times,,2025-02-03 22:35:49+00:00,https://www.straitstimes.com/world/while-you-w...,Singapore,"Trump, Sheinbaum reach deal to delay tariffs f...",While You Were Sleeping: 5 stories you might h...,"[{'event_id': 1, 'event_type': 'Policy Agreeme..."
2,"Slower growth, souring business sentiment: How...",Channel NewsAsia,Abigail Ng,2025-02-03 22:00:00+00:00,https://www.channelnewsasia.com/singapore/trum...,Singapore,Analysts say the impact would be cushioned by ...,"Slower growth, souring business sentiment: How...","[{'event_id': 1, 'event_type': 'Economic Polic..."
3,Malaysia's ECRL: A closer look at the US$11.2b...,Channel NewsAsia,Aqil Haziq Mahmud,2025-02-03 22:00:00+00:00,https://www.channelnewsasia.com/asia/malaysia-...,Singapore,In the second of a four-part series on the Eas...,Malaysia's ECRL: A closer look at the US$11.2b...,"[{'event_id': 1, 'event_type': 'Infrastructure..."
4,"S&P 500, Nasdaq, pare losses as Trump’s Mexico...",Straits Times,,2025-02-03 21:22:52+00:00,https://www.straitstimes.com/business/companie...,Singapore,NEW YORK - The major stock indexes closed lowe...,"S&P 500, Nasdaq, pare losses as Trump’s Mexico...","[{'event_id': 1, 'event_type': 'Political Deci..."
...,...,...,...,...,...,...,...,...,...
7669,Blinken lands in China on rare trip with hopes...,Channel NewsAsia,,2023-06-17 19:49:00+00:00,https://www.channelnewsasia.com/world/blinken-...,Singapore,BEIJING: US Secretary of State Antony Blinken ...,Blinken lands in China on rare trip with hopes...,"[{'event_id': 1, 'event_type': 'Diplomatic Vis..."
7670,"Blinken talks to Japanese, South Korean counte...",Straits Times,,2023-06-17 17:15:59+00:00,https://www.straitstimes.com/world/united-stat...,Singapore,WASHINGTON - US Secretary of State Antony Blin...,"Blinken talks to Japanese, South Korean counte...","[{'event_id': 1, 'event_type': 'Diplomatic Eng..."
7671,Blinken supports efforts toward 'mature' China...,Channel NewsAsia,,2023-06-17 05:52:35+00:00,https://www.channelnewsasia.com/asia/blinken-s...,Singapore,SEOUL: US Secretary of State Antony Blinken sa...,Blinken supports efforts toward 'mature' China...,"[{'event_id': 1, 'event_type': 'Diplomatic Eff..."
7672,Blinken says his trip to China aims at 'avoidi...,Straits Times,,2023-06-16 23:25:00+00:00,https://www.straitstimes.com/world/united-stat...,Singapore,WASHINGTON - US Secretary of State Antony Blin...,Blinken says his trip to China aims at 'avoidi...,"[{'event_id': 1, 'event_type': 'Diplomatic Vis..."
