In [58]:
from eventregistry import *
import json, os, sys
import pandas as pd
import numpy as np
from openai import OpenAI


In [59]:
api_key = os.getenv("NEWSAPI_API_KEY")
event_registry = EventRegistry(apiKey=api_key, allowUseOfArchive=False)
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [76]:
search_query = "Joe Biden"
event_query = QueryEventsIter(keywords=search_query)
event_list =[]
for event in event_query.execQuery(event_registry, sortBy="Recent", maxItems=1):
    print(json.dumps(event, indent=4))
    #event_list.append(event)

{
    "uri": "spa-3490090",
    "concepts": [
        {
            "uri": "http://en.wikipedia.org/wiki/United_Kingdom",
            "type": "loc",
            "score": 100,
            "label": {
                "eng": "United Kingdom"
            },
            "location": {
                "type": "country",
                "label": {
                    "eng": "United Kingdom"
                }
            }
        },
        {
            "uri": "http://en.wikipedia.org/wiki/Prime_Minister_of_the_United_Kingdom",
            "type": "wiki",
            "score": 88,
            "label": {
                "eng": "Prime Minister of the United Kingdom"
            }
        },
        {
            "uri": "http://en.wikipedia.org/wiki/Labour_Party_(UK)",
            "type": "org",
            "score": 85,
            "label": {
                "eng": "Labour Party (UK)"
            }
        },
        {
            "uri": "http://en.wikipedia.org/wiki/Rishi_Sunak",
            "typ

In [77]:
event_uri = event["uri"]
lang = "eng"
q = QueryEventArticlesIter(eventUri=event_uri, lang=lang)
filename = "../data/processed/" + lang + "_" + event_uri + ".json"
if os.path.exists(filename):
    os.remove(filename)
articles = []
for article in q.execQuery(
    event_registry,
    sortBy="date",
    maxItems=10,
    returnInfo=ReturnInfo(sourceInfo=SourceInfoFlags(location=True),),
    ):
    articles.append(article)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [62]:
lang = "eng"
q = QueryEventArticlesIter(eventUri=event_uri, lang=lang)
filename = "../data/processed/" + lang + "_" + event_uri + ".json"
if os.path.exists(filename):
    os.remove(filename)
articles = []
for article in q.execQuery(
    event_registry,
    sortBy="date",
    maxItems=10,
    returnInfo=ReturnInfo(sourceInfo=SourceInfoFlags(location=True),),
    ):
    articles.append(article)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [63]:
def openai_distinctive_events_list(
    article,
    openai_client,
    model="gpt-4o",
):

    prompt_complete = (
        f"You are an expert at summarizing the news. \
            Read the following news article and translate it into English if necessary. \
                 Create a list of distinctive events described in the article, arranged in chronological order. The first event listed should be the earliest event mentioned in the article, followed by the subsequent events in the order they occurred: "
        + article["body"]
    )
    model = model
    messages = [{"role": "user", "content": prompt_complete}]
    try:
        response = openai_client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [67]:
src_filename = "../data/processed/" + lang + "_" + event_uri + ".json"
with open(src_filename, "r") as f:
    data = f.read()
    des_articles = []
    src_articles = json.loads(data)
    for src_art in src_articles:
        src_art["distinctive_events"] = openai_distinctive_events_list(
            src_art, openai_client, model="gpt-4o"
        )

        des_articles.append(src_art)

des_filename = src_filename + "_distinctive_events.json"
if os.path.exists(des_filename):
    os.remove(des_filename)
with open(des_filename, "w") as f:
    json.dump(des_articles, f, indent=4)

In [69]:
lang = "zho"
src_filename = "../data/processed/" + lang + "_" + event_uri + ".json"
with open(src_filename, "r") as f:
    data = f.read()
    des_articles = []
    src_articles = json.loads(data)
    for src_art in src_articles:
        src_art["distinctive_events"] = openai_distinctive_events_list(
            src_art, openai_client, model="gpt-4o"
        )

        des_articles.append(src_art)

des_filename = src_filename + "_distinctive_events.json"
if os.path.exists(des_filename):
    os.remove(des_filename)
with open(des_filename, "w") as f:
    json.dump(des_articles, f, indent=4)

In [None]:
lang = "zho"
src_filename = "../data/processed/" + lang + "_" + event_uri + ".json"
with open(src_filename, "r") as f:
    data = f.read()
    des_articles = []
    src_articles = json.loads(data)
    for src_art in src_articles:
        src_art["distinctive_events"] = openai_distinctive_events_list(
            src_art, openai_client, model="gpt-4o"
        )

        des_articles.append(src_art)

des_filename = src_filename + "_distinctive_events.json"
if os.path.exists(des_filename):
    os.remove(des_filename)
with open(des_filename, "w") as f:
    json.dump(des_articles, f, indent=4)

In [70]:
lang = "hin"
src_filename = "../data/processed/" + lang + "_" + "hin-359877" + ".json"
with open(src_filename, "r") as f:
    data = f.read()
    des_articles = []
    src_articles = json.loads(data)
    for src_art in src_articles:
        src_art["distinctive_events"] = openai_distinctive_events_list(
            src_art, openai_client, model="gpt-4o"
        )

        des_articles.append(src_art)

des_filename = src_filename + "_distinctive_events.json"
if os.path.exists(des_filename):
    os.remove(des_filename)
with open(des_filename, "w") as f:
    json.dump(des_articles, f, indent=4)