In [2]:
from eventregistry import *
import json, os, sys

from dotenv import load_dotenv
import os
from eventregistry import EventRegistry


load_dotenv()  # This loads the environment variables from .env file
api_key = os.getenv("EVENT_REGISTRY_API_KEY")
er = EventRegistry(apiKey=api_key, allowUseOfArchive=False)

In [4]:
conceptUri = "Russo_Ukrainian_War"
lang = "zho"
query = {
    "$query": {
        # "conceptUri": "https://en.wikipedia.org/wiki/Russo-Ukrainian_War",
        "keyword": "俄乌战争",
        "keywordLoc": "body",
        "lang": lang,
    },
    "$filter": {"isDuplicate": "skipDuplicates"},
}

q = QueryArticlesIter.initWithComplexQuery(query)

filename = conceptUri + "_" + lang + ".json"
if os.path.exists(filename):
    os.remove(filename)

articles = []
for art in q.execQuery(
    er,
    sortBy="date",
    maxItems=100,
    returnInfo=ReturnInfo(
        sourceInfo=SourceInfoFlags(location=True),
    ),
):
    articles.append(art)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [4]:
# combine two json files into one new json file
import json


def combine_json(file1, file2, output):
    with open(file1) as f:
        data1 = json.load(f)
    with open(file2) as f:
        data2 = json.load(f)
    data = data1 + data2
    with open(output, "w") as f:
        json.dump(data, f, indent=4)
    return data


combine_json(
    "25_eng_Territorial_disputes_in_the_South_China_Sea.json",
    "25_zho_Territorial_disputes_in_the_South_China_Sea.json",
    "25_combined_Territorial_disputes_in_the_South_China_Sea.json",
)

[{'uri': '8174691294',
  'lang': 'eng',
  'isDuplicate': False,
  'date': '2024-06-12',
  'time': '17:58:18',
  'dateTime': '2024-06-12T17:58:18Z',
  'dateTimePub': '2024-06-12T17:25:09Z',
  'dataType': 'news',
  'sim': 0.9529411792755127,
  'url': 'https://gulftoday.ae:443/opinion/2024/06/12/vietnam-eyes-greener-power-seeks-energy-savings',
  'title': 'Vietnam eyes greener power, seeks energy savings',
  'body': 'Pham Van Coung, a director of Pho Noi Power Station, works at the Pho Noi Power Station, a state utility owned by Electricity of Vietnam, in Hung Yen province, Vietnam, on June 4, 2024. Reuters\n\nLights are off and air conditioning is down at the headquarters of Vietnam\'s state-run electricity provider EVN as the country\'s top power utility tries to "lead by example" to avoid a repeat of last year\'s crippling blackouts, an official tells visitors. But many businesses around Vietnam\'s capital Hanoi appear to be ignoring the call to conserve power, keeping decorative but o

In [5]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv(override=True)  # This loads the environment variables from .env file
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [6]:
def QnA(
    art,
    client,
    model,
    QnA_prompt,
    QnA_answer,
):

    prompt_complete = (
        QnA_prompt
        + "\n"
        + art["title"]
        + "\n"
        + art["body"]
        + "\n"
        + "Please choose from the options below:"
        + QnA_answer
    )
    model = model
    messages = [{"role": "user", "content": prompt_complete}]
    try:
        response = client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [7]:
import json


def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about Raisi's reputation:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                src_art,
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )

            des_articles.append(src_art)
    des_filename = "QnA_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [9]:
import re


def extract_answer(qna_text):
    # Use a regular expression to find the answer choice (A, B, C, or D) at the start of the QnA field
    match = re.search(r"\s*([A-E])\)", qna_text)
    if match:
        return match.group(1)
    return None


def extract_answer_from_json(src_filename):
    # Load the JSON data
    with open(src_filename, "r") as file:
        data = json.load(file)

    # Iterate through the records and extract answers
    for record in data:
        LLM_answer = record.get("LLM_answer", "")
        answer = extract_answer(LLM_answer)
        if answer:
            print(f"Extracted answer: {answer}, lang: {record['lang']}")
        else:
            print("No answer extracted")

In [10]:
def count_num_of_answers(src_filename):
    # Load the JSON data
    with open(src_filename, "r") as file:
        data = json.load(file)

    # Iterate through the records and extract answers that can be A, B, C, or D, or E
    # count the number of different  answers
    answers = {}
    answers["A"] = 0
    answers["B"] = 0
    answers["C"] = 0
    answers["D"] = 0
    answers["E"] = 0
    for record in data:
        LLM_answer = record.get("LLM_answer", "")
        answer = extract_answer(LLM_answer)
        if answer:
            answers[answer] += 1

    print(answers)

In [11]:
def copy_LLM_QnA(
    src_filename,
    des_filename,
    N=10,
):
    with open(des_filename, "r") as f:
        data = f.read()
        des_articles = json.loads(data)

    with open(src_filename, "r") as f:
        data = f.read()

        src_articles = json.loads(data)
        for n in range(N):
            qna = {}
            qna["Question"] = src_articles[n]["Question"]
            qna["Answers"] = src_articles[n]["Answers"]
            qna["LLM_answer"] = src_articles[n]["LLM_answer"]
            match = re.search(r"\s*([A-E])\)", qna["LLM_answer"])
            if match:
                qna["post_LLM_answer"] = match.group(1)
            else:
                qna["post_LLM_answer"] = "NaN"

            if "QnA" in des_articles[n]:
                des_articles[n]["QnA"].append(qna)
            else:
                des_articles[n]["QnA"] = [qna]

            # overwrite the file
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

## Test different questions for article topical alignment

In [76]:
question = "Which aspect does the article emphasize:"
answer = "A) Diplomatic meetings and agreements between countries B) Direct encounters and clashes between naval forces C) Statements and perspectives from political leaders D) Joint military exercises and defense partnerships E) None of the above"
get_LLM_QnA(
    "25_eng_Territorial_disputes_in_the_South_China_Sea.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)
get_LLM_QnA(
    "25_zho_Territorial_disputes_in_the_South_China_Sea.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

In [77]:
count_num_of_answers("QnA_25_eng_Territorial_disputes_in_the_South_China_Sea.json")
count_num_of_answers("QnA_25_zho_Territorial_disputes_in_the_South_China_Sea.json")

{'A': 1, 'B': 3, 'C': 5, 'D': 1, 'E': 15}
{'A': 4, 'B': 3, 'C': 6, 'D': 0, 'E': 12}


In [78]:
copy_LLM_QnA(
    src_filename="QnA_25_eng_Territorial_disputes_in_the_South_China_Sea.json",
    des_filename="Alignment_25_eng_Territorial_disputes_in_the_South_China_Sea.json",
    N=25,
)
copy_LLM_QnA(
    src_filename="QnA_25_zho_Territorial_disputes_in_the_South_China_Sea.json",
    des_filename="Alignment_25_zho_Territorial_disputes_in_the_South_China_Sea.json",
    N=25,
)

## Test question for article interest

In [None]:
question = "What approach does the article prefer for resolving the territorial disputes in the South China Sea:"
answer = "A) Bilateral negotiations \
B) Multilateral talks  \
C) International law  \
D) Economic cooperation and maintaining stability"
get_LLM_QnA(
    "Territorial_disputes_in_the_South_China_Sea.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

In [None]:
extract_answer_from_json("QnA_Territorial_disputes_in_the_South_China_Sea.json")

In [None]:
copy_LLM_QnA(
    src_filename="QnA_Territorial_disputes_in_the_South_China_Sea.json",
    des_filename="Interest_Territorial_disputes_in_the_South_China_Sea.json",
    N=10,
)