In [55]:
import pandas as pd
import os
from openai import OpenAI
import json
from pydantic import BaseModel, Field
from typing import List, Optional

from dotenv import load_dotenv
load_dotenv()

True

In [30]:
def chat_completion(client, prompt, response_format=None):
    messages = [
        {
            "role": "user",
            "content": prompt
        }
    ]
    
    kwargs = {
        "model": "gpt-4o", # deepseek/deepseek-r1:free
        "messages": messages,
    }
    
    if response_format:
        kwargs["response_format"] = response_format
        response = client.beta.chat.completions.parse(**kwargs)
    else:
        response = client.chat.completions.create(**kwargs)

    return response.choices[0]


In [31]:
# client = OpenAI(
#   base_url="https://openrouter.ai/api/v1",
#   api_key=os.getenv("OPENROUTER_API_KEY"),
# )

client = OpenAI()

In [53]:
df = pd.read_csv("../data/data.csv")
df['Published Date'] = pd.to_datetime(df['Published Date'])

In [54]:
df['FullContent'] = df['Title'] + "\n" + df['Published Date'].apply(lambda x: x.strftime("%Y-%m-%d")) + "\n" + df['Content']

In [63]:
class EventArgument(BaseModel):
    """Schema for event-related entities."""
    agent: Optional[str] = Field(None, description="Who caused the event?")
    object: Optional[str] = Field(None, description="What was affected?")
    location: Optional[str] = Field(None, description="Where did it happen?")
    time: Optional[str] = Field(None, description="When did it happen? Give the date in the format YYYY-MM-DD where possible.")
    counterpart: Optional[str] = Field(None, description="Who else was involved?")
    impact_summary: Optional[str] = Field(None, description="What is the significance or consequence of this event?")

class Event(BaseModel):
    """Schema for detected events and their components."""
    event_type: str = Field(..., description="The type of event detected (e.g., Economic Policy, Market Crash, Political Decision).")
    trigger: str = Field(..., description="The main word(s) triggering the event.")
    event_summary: str = Field(..., description="A structured, full-sentence summary of the event capturing its broader context.")
    arguments: EventArgument = Field(..., description="Structured entities related to the event.")

class EventResponse(BaseModel):
    """Schema for a collection of detected events."""
    events: List[Event]


In [66]:
prompt = """
You are an advanced NLP system specializing in event detection, trigger extraction, and summarization of news events.

Your task is to extract structured events from the given news article and output them in JSON format.

## Extraction Rules:
1. Identify key events (e.g., economic policy, political decisions, disasters, agreements, financial impact).  
2. Extract the event trigger (a key phrase indicating the event).  
3. Generate a complete event summary in one sentence, capturing the full context.  
4. Extract event arguments:
   - Agent (Who caused the event?)
   - Object (What was affected?)
   - Location (Where did it happen?)
   - Time (When did it happen?)
   - Counterpart (Who else was involved?)
   - Impact Summary (Why is this event important?)

### Example Input
"The Federal Reserve raised interest rates by 0.5%, causing the stock market to plunge. Investors reacted negatively, leading to a sell-off."

### Expected Output (Structured JSON)
```json
{{
  "events": [
    {{
      "event_type": "Economic Policy",
      "trigger": "raised interest rates",
      "event_summary": "The Federal Reserve increased interest rates by 0.5%, leading to stock market volatility and investor uncertainty.",
      "arguments": {{
        "agent": "Federal Reserve",
        "object": "interest rates",
        "location": "United States",
        "time": "2025-02-01",
        "counterpart": "Stock Market",
        "impact_summary": "Investors reacted negatively, increasing uncertainty in financial markets."
      }}
    }},
    {{
      "event_type": "Market Crash",
      "trigger": "plunged",
      "event_summary": "Investor fears over rising interest rates led to a sharp decline in stock prices, causing a significant sell-off.",
      "arguments": {{
        "agent": "Investors",
        "object": "stock market",
        "location": "United States",
        "time": "2025-02-02",
        "counterpart": null,
        "impact_summary": "Market losses wiped out $500 billion in value, impacting institutional and retail investors."
      }}
    }}
  ]
}}

News article:
```
{article}
```

Take a deep breath and work on this step by step.
""".strip()


In [67]:
print(prompt.format(article=df['FullContent'].values[0]))

You are an advanced NLP system specializing in event detection, trigger extraction, and summarization of news events.

Your task is to extract structured events from the given news article and output them in JSON format.

## Extraction Rules:
1. Identify key events (e.g., economic policy, political decisions, disasters, agreements, financial impact).  
2. Extract the event trigger (a key phrase indicating the event).  
3. Generate a complete event summary in one sentence, capturing the full context.  
4. Extract event arguments:
   - Agent (Who caused the event?)
   - Object (What was affected?)
   - Location (Where did it happen?)
   - Time (When did it happen?)
   - Counterpart (Who else was involved?)
   - Impact Summary (Why is this event important?)

### Example Input
"The Federal Reserve raised interest rates by 0.5%, causing the stock market to plunge. Investors reacted negatively, leading to a sell-off."

### Expected Output (Structured JSON)
```json
{
  "events": [
    {
     

In [68]:
response = chat_completion(client, prompt.format(article=df['FullContent'].values[0]), response_format=EventResponse) 

In [70]:
json.loads(response.message.content)

{'events': [{'event_type': 'Economic Policy',
   'trigger': 'trade tariffs',
   'event_summary': 'The Trump administration announced a plan for trade tariffs on the three largest US trade partners, leading to potential inflation risks.',
   'arguments': {'agent': 'Trump administration',
    'object': 'trade tariffs',
    'location': 'United States',
    'time': '2025-02-01',
    'counterpart': 'Mexico, Canada, and China',
    'impact_summary': 'Potential inflation risks could impact monetary policy and economic growth.'}},
  {'event_type': 'International Trade Dispute',
   'trigger': 'retaliated with its own tariffs',
   'event_summary': 'Canada responded to the US tariffs by implementing its own range of tariffs on American products.',
   'arguments': {'agent': 'Canada',
    'object': 'US tariffs',
    'location': 'Canada',
    'time': '2025-02-03',
    'counterpart': 'United States',
    'impact_summary': 'Increased tension in trade relations and potential economic repercussions.'}},

In [48]:
print(df['FullContent'].values[0])

Fed officials warn of inflation risks from tariff surge
WASHINGTON - The Trump administration’s plan for trade tariffs come with inflation risks, two Federal Reserve officials warned on Feb 3, stopping short of saying how that affects their thinking on monetary policy in a climate of uncertainty.

“The kind of broad-based tariffs that were announced over the weekend, one would expect to have an impact on prices,” Federal Reserve Bank of Boston president Susan Collins said in an interview with CNBC, adding that “with broad-based tariffs, you actually would not only see increases in prices of final goods, but also a number of intermediate goods.”

Ms Collins, however, noted that there’s not a lot of experience on how mega tariffs impact the economy in the modern age, which makes it hard for the Fed to know exactly how things will play out. She noted it is possible that the Fed could even shrug off a one-time increase in inflation tied to the tariffs, although even that was uncertain.

Sp

In [73]:
df2= pd.read_parquet('../data/ed_output.parquet')

In [75]:
df2.ed_results.values[0]

array([{'events': array([{'offset': array([207, 211]), 'trigger': 'sale', 'type': 'transferownership'},
              {'offset': array([40, 46]), 'trigger': 'income', 'type': 'transfermoney'}],
             dtype=object), 'text': 'Apart from a divergence among different income groups, China’s consumption trend this year is polarised among sectors, as service consumption such as concerts, tourism and catering have been better than the sale of goods – which has been more suppressed by worsening income prospects, Zhang said'},
       {'events': array([{'offset': array([78, 86]), 'trigger': 'spending', 'type': 'transfermoney'}],
             dtype=object), 'text': 'Dan Wang, chief economist at Hang Seng Bank China, said the surge in consumer spending in tourism and entertainment – which are heavily driven by a pent-up need to get out and de-stress after COVID-19 lockdowns – is unlikely to be sustainable'},
       {'events': array([{'offset': array([75, 81]), 'trigger': 'income', 'type': 't