## News Feed Extraction & Classification

In [6]:
import pandas as pd
import sys
sys.path.append('..')
from pydantic import BaseModel, Field
from typing import Literal, Optional
from utils.prompts import render
from utils.llm_client import LLMClient
from utils.router import pick_model
from IPython.display import Markdown, display

In [3]:
class CrisisEvent(BaseModel):
    district: Literal["Colombo", "Gampaha", "Kandy", "Kalutara", "Galle", "Matara", "Hambantota", "Jaffna", "Anuradhapura", "Polonnaruwa", "Badulla", "Monaragala", "Ratnapura", "Kegalle", "Nuwara Eliya", "Trincomalee", "Batticaloa", "Ampara", "Puttalam", "Kurunegala", "Mannar", "Mullaitivu", "Kilinochchi", "Vavuniya"]
    flood_level_meters: Optional[float] = None
    vicm_count: int = Field(default=0)
    main_need: str
    status: Literal["Critical", "Warning", "Stable"]

In [7]:
# Load news feed
file_path = "../data/news_feed.txt"

with open(file_path, "r", encoding="utf-8") as f:
    news_items = [line.strip() for line in f if line.strip()]

print(f"Loaded {len(news_items)} news items")
news_items[:3]

Loaded 30 news items


 'SOS: 5 people trapped on a roof in Ja-Ela (Gampaha). Water rising fast. Need boat immediately.',
 'Update: Kandy road cleared near Peradeniya. Traffic moving slowly. No victims reported.']

In [8]:
# Initialize LLM client
model = pick_model('openai', 'general')
client = LLMClient('openai', model)

# Schema for extraction
schema_str = """
{
  "district": "string (one of: Colombo, Gampaha, Kandy, Kalutara, Galle, Matara, Hambantota, Jaffna, Anuradhapura, Polonnaruwa, Badulla, Monaragala, Ratnapura, Kegalle, Nuwara Eliya, Trincomalee, Batticaloa, Ampara, Puttalam, Kurunegala, Mannar, Mullaitivu, Kilinochchi, Vavuniya)",
  "flood_level_meters": "number or null",
  "vicm_count": "integer (default 0)",
  "main_need": "string",
  "status": "string (one of: Critical, Warning, Stable)"
}
"""

In [None]:
# Process each news item
valid_events = []

for idx, item in enumerate(news_items, start=1):
    try:
        # Extract JSON using LLM
        prompt_text, spec = render(
            'json_extract.v1',
            schema=schema_str,
            text=item
        )
        
        messages = [{'role': 'user', 'content': prompt_text}]
        response = client.chat(messages, temperature=spec.temperature, max_tokens=spec.max_tokens)
        
        json_str = response["text"].strip()
        
        # Clean JSON (remove markdown code blocks)
        if json_str.startswith('```json'):
            json_str = json_str[7:]
        if json_str.endswith('```'):
            json_str = json_str[:-3]
        json_str = json_str.strip()
        
        # Validate with Pydantic
        event = CrisisEvent.model_validate_json(json_str)
        valid_events.append(event.model_dump())
        
        print(f"Item {idx}: Valid - {event.district}")
        
    except Exception as e:
        print(f"Item {idx}: Invalid - {str(e)}")

# Convert to DataFrame
df = pd.DataFrame(valid_events)

# Save to Excel
output_path = "../outputs/flood_report.xlsx"
df.to_excel(output_path, index=False)

print(f"Saved {len(valid_events)} valid events to {output_path}")

Item 1: Valid - Colombo
Item 2: Valid - Gampaha
Item 3: Valid - Kandy
Item 4: Valid - Kalutara
Item 5: Valid - Gampaha
Item 6: Valid - Colombo
Item 7: Valid - Matara
Item 8: Valid - Colombo
Item 9: Valid - Galle
Item 10: Valid - Gampaha
Item 11: Valid - Colombo
Item 12: Invalid - 2 validation errors for CrisisEvent
district
  Input should be 'Colombo', 'Gampaha', 'Kandy', 'Kalutara', 'Galle', 'Matara', 'Hambantota', 'Jaffna', 'Anuradhapura', 'Polonnaruwa', 'Badulla', 'Monaragala', 'Ratnapura', 'Kegalle', 'Nuwara Eliya', 'Trincomalee', 'Batticaloa', 'Ampara', 'Puttalam', 'Kurunegala', 'Mannar', 'Mullaitivu', 'Kilinochchi' or 'Vavuniya' [type=literal_error, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.12/v/literal_error
status
    For further information visit https://errors.pydantic.dev/2.12/v/literal_error
Item 13: Valid - Kandy
Item 14: Valid - Gampaha
Item 15: Valid - Nuwara Eliya
Item 16: Valid - Gampaha
Item 17: Valid - Ratn