In [74]:
import os
import pandas as pd
from eventregistry import *
from psycopg2.extras import execute_values
from dotenv import load_dotenv
from utils import get_db_connection, log
from datetime import datetime
from psycopg2.extras import execute_values
load_dotenv()


True

In [75]:
api_key = os.getenv("EVENT_REGISTRY_API_KEY")
er = EventRegistry(apiKey=api_key)


In [76]:
concept_uri = er.getConceptUri("Protest")

q = QueryArticlesIter(
    keywords = "Caterpillar",
    conceptUri = concept_uri,
    lang = "eng",
    dataType = ["news", "pr"],
    dateStart = "2024-12-01",
    dateEnd = datetime.today().strftime("%Y-%m-%d")
)

# Limit items while iterating (not in constructor)
articles = []
for i, article in enumerate(q.execQuery(er)):
    if i >= 50:
        break
    articles.append({
        "date": article.get("date"),
        "source": article.get("source", {}).get("title", ""),
        "url": article.get("url", ""),
        "title": article.get("title", ""),
        "location": article.get("location", {}).get("label", ""),
        "event": "Protest"
    })

df = pd.DataFrame(articles)
log(f"Fetched {len(df)} ESG articles.")
df.head()


[LOG] Fetched 29 ESG articles.


Unnamed: 0,date,source,url,title,location,event
0,2025-06-26,FuelCellsWorks,https://fuelcellsworks.com/2025/06/26/green-hy...,Caterpillar to Demonstrate Hydrogen CHP System...,,Protest
1,2025-06-04,Pekin Daily Times,https://www.pekintimes.com/story/news/local/20...,"From safety to productivity, here are new tech...",,Protest
2,2025-06-04,Indianapolis Star,https://eu.pjstar.com/story/news/local/2025/06...,"From safety to productivity, here are new tech...",,Protest
3,2025-06-19,Canadian Mining Journal,https://www.canadianminingjournal.com/news/cat...,Caterpillar's MINExpo 2024 booth design receiv...,,Protest
4,2025-06-23,El diario24,https://www.eldiario24.com/en/caterpillar-mari...,"Not electricity, not hydrogen -- Just a dual...",,Protest


In [78]:
def load_esg_to_db(df):
    if df.empty:
        log("⚠️ No ESG articles to insert.")
        return

    with get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                CREATE TABLE IF NOT EXISTS esg_events (
                    id SERIAL PRIMARY KEY,
                    date TIMESTAMP,
                    source TEXT,
                    url TEXT,
                    title TEXT,
                    location TEXT,
                    event TEXT
                );
            """)
            conn.commit()

            cur.execute("TRUNCATE TABLE esg_events;")
            conn.commit()
            log("⚠️ Cleared old ESG articles.")

            values = [tuple(row) for row in df.to_numpy()]
            insert_sql = """
                INSERT INTO esg_events (date, source, url, title, location, event)
                VALUES %s
            """
            execute_values(cur, insert_sql, values)
        conn.commit()
    log(f"✔ Inserted {len(df)} ESG articles into esg_events.")

load_esg_to_db(df)


[LOG] ⚠️ Cleared old ESG articles.
[LOG] ✔ Inserted 29 ESG articles into esg_events.
