In [1]:
from eventregistry import *
import json, os, sys
import os
from eventregistry import EventRegistry



In [2]:
api_key = os.getenv("NEWSAPI_API_KEY")
er = EventRegistry(apiKey=api_key, allowUseOfArchive=False)

### English articles from India

In [5]:
lang = "eng"
query = {
  "$query": {
    "$and": [
      {
        "keyword": "india",
        "keywordLoc": "body"
      },
      {
        "keyword": "election",
        "keywordLoc": "body"
      },
      {
        "keyword": "2024",
        "keywordLoc": "body"
      },
      {
        "sourceLocationUri": "http://en.wikipedia.org/wiki/India"
      },
      {
        "lang": lang
      }
    ]
  },
  "$filter": {
    "forceMaxDataTimeWindow": "31"
  }
}
q = QueryArticlesIter.initWithComplexQuery(query)
filename = lang + ".json"
if os.path.exists(filename):
    os.remove(filename)

articles = []
for art in q.execQuery(
    er,
    sortBy="date",
    maxItems=20,
    returnInfo=ReturnInfo(
        sourceInfo=SourceInfoFlags(location=True),
    ),
):
    articles.append(art)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [6]:
# copy the body of the articles to a new json file
import json

articles_body = []
for article in articles:
    art = {}
    art["article"] = article["body"]
    articles_body.append(art)
print(articles_body)

[{'article': "According to the , the counting of votes will commence at 8 AM, on Tuesday, June 4. This will cover not only the Lok Sabha elections but also the state legislative assemblies of Andhra Pradesh and Odisha, along with various bye-elections.\n\nFor the latest updates on the Lok Sabha election results, you can visit the official Election Commission of India (ECI) website: https://results.eci.gov.in/. These updates will also be accessible via the Voter Helpline App, compatible with both iOS and Android devices.\n\nTo experience comprehensive coverage, constituency wise updates and debates, the election results will be live-streamed on various news channels, YouTube channels, and social media accounts of major news outlets. Enthusiasts of large screen experience in Maharashtra, including Mumbai, can watch the 2024 Lok Sabha election results live in theatres as well as MovieMax will showcase the event titled 'Election Results 2024' starting at 9 AM.\n\nUnder Rule 54 A of the Con

In [7]:
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [72]:
def QnA(
    art,
    client,
    model="gpt-4o",
    QnA_prompt="I have 20 news articles, and I need you to create multiple-choice questions with categorical answers to identify narrative differences among the provided articles. The questions should be generic to all articles and shouldn't have answer related to one specific article. Here are the articles:",
    QnA_answer = " "
):

    prompt_complete = QnA_prompt + QnA_answer + "\n" + art
    model = model
    messages = [
        {"role": "user", "content": prompt_complete},
    ]
    try:
        response = client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [13]:
response = QnA(str(articles_body), client)

In [14]:
print(response)

Sure, here are some multiple-choice questions with categorical answers to identify narrative differences among the provided articles:

### Question 1: What is the primary focus of the article?
A. Election results and political analysis  
B. Real estate and property listings  
C. Tourism and travel destinations  
D. Economic and market updates  

### Question 2: Which region or country is predominantly discussed in the article?
A. India  
B. United States  
C. Europe  
D. Global  

### Question 3: What type of event or topic is central to the article?
A. Political elections  
B. Real estate transactions  
C. Tourism and travel  
D. Economic and market trends  

### Question 4: What is the tone of the article?
A. Informative and factual  
B. Analytical and critical  
C. Promotional and enthusiastic  
D. Reflective and opinionated  

### Question 5: Which sector is primarily highlighted in the article?
A. Politics and governance  
B. Real estate and housing  
C. Tourism and hospitality  


### Testing on one question

In [70]:
import json

def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about India elections:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                str(src_art),
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )
            des_articles.append(src_art)
    des_filename = "QnA_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [71]:
question = "What type of event or topic is central to the article?:"
answer = "A) Political elections \
B) Real estate transactions \
C) Tourism and travel \
D) Economic and market trends"
get_LLM_QnA(
    "eng.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

In [39]:
import re

def extract_answer(qna_text):
    # Use a regular expression to find the answer choice (A, B, C, or D) at the start of the QnA field
    match = re.search(r"\s*([A-D])\)", qna_text)
    if match:
        return match.group(1)
    return None


def extract_answer_from_json(src_filename):
    # Load the JSON data
    with open(src_filename, "r") as file:
        data = json.load(file)

    # Iterate through the records and extract answers
    for record in data:
        LLM_answer = record.get("LLM_answer", "")
        answer = extract_answer(LLM_answer)
        if answer:
            print(f"Extracted answer: {answer}, lang: {record['lang']}")
        else:
            print("No answer extracted")

In [40]:
extract_answer_from_json("QnA_eng.json")

Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: B, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: A, lang: eng
Extracted answer: D, lang: eng
Extracted answer: A, lang: eng
Extracted answer: C, lang: eng


### Tone of the article test

In [41]:
import json

def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about India elections:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                str(src_art),
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )
            des_articles.append(src_art)
    des_filename = "Interest_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [42]:
get_LLM_QnA("eng.json", client)

In [43]:
extract_answer_from_json("Interest_eng.json")

Extracted answer: B, lang: eng
Extracted answer: C, lang: eng
Extracted answer: C, lang: eng
Extracted answer: D, lang: eng
Extracted answer: B, lang: eng
Extracted answer: A, lang: eng
Extracted answer: B, lang: eng
Extracted answer: C, lang: eng
Extracted answer: D, lang: eng
Extracted answer: A, lang: eng
Extracted answer: C, lang: eng
Extracted answer: C, lang: eng
Extracted answer: B, lang: eng
Extracted answer: B, lang: eng
Extracted answer: B, lang: eng
Extracted answer: B, lang: eng
Extracted answer: B, lang: eng
Extracted answer: D, lang: eng
Extracted answer: B, lang: eng
Extracted answer: A, lang: eng


In [44]:
def copy_LLM_QnA(
    src_filename,
    des_filename,
    N=10,
):
    with open(des_filename, "r") as f:
        data = f.read()
        des_articles = json.loads(data)

    with open(src_filename, "r") as f:
        data = f.read()

        src_articles = json.loads(data)
        for n in range(N):
            qna = {}
            qna["Question"] = src_articles[n]["Question"]
            qna["Answers"] = src_articles[n]["Answers"]
            qna["LLM_answer"] = src_articles[n]["LLM_answer"]
            match = re.search(r"\s*([A-D])\)", qna["LLM_answer"])
            if match:
                qna["post_LLM_answer"] = match.group(1)
            else:
                qna["post_LLM_answer"] = "NaN"

            if "QnA" in des_articles[n]:
                des_articles[n]["QnA"].append(qna)
            else:
                des_articles[n]["QnA"] = [qna]

            # overwrite the file
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [45]:
copy_LLM_QnA(
    src_filename="QnA_eng.json",
    des_filename="Interest_eng.json",
    N=20,
)

### Hindi articles in India

In [46]:
lang = "hin"
query = {
  "$query": {
    "$and": [
      {
        "keyword": "india",
        "keywordLoc": "body"
      },
      {
        "keyword": "election",
        "keywordLoc": "body"
      },
      {
        "keyword": "2024",
        "keywordLoc": "body"
      },
      {
        "sourceLocationUri": "http://en.wikipedia.org/wiki/India"
      },
      {
        "lang": lang
      }
    ]
  },
  "$filter": {
    "forceMaxDataTimeWindow": "31"
  }
}
q = QueryArticlesIter.initWithComplexQuery(query)
filename = lang + ".json"
if os.path.exists(filename):
    os.remove(filename)

articles = []
for art in q.execQuery(
    er,
    sortBy="date",
    maxItems=20,
    returnInfo=ReturnInfo(
        sourceInfo=SourceInfoFlags(location=True),
    ),
):
    articles.append(art)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [47]:
# copy the body of the articles to a new json file
import json

articles_body = []
for article in articles:
    art = {}
    art["article"] = article["body"]
    articles_body.append(art)
print(articles_body)

[{'article': "Exit Poll 2024 Result, Cvoter, Today Chanakya, CNX India Tv, Axis MY India, Times Now Navbharat ETG, Watch Online: चुनाव आयोग (Election Commission) ने कहा कि अंतिम चरण का मतदान संपन्न होने के आधे घंटे बाद एग्जिट पोल्स दिखाए जा सकेंगे। हर बात की तरह इस बार भी टाइम्स नाउ ने ईटीजी के साथ मिलकर एग्जिट पोल किया है। टाइम्स नाउ के एग्जिट पोल सबसे सटीक रहे हैं।\n\nTamil Nadu Exit Poll, Exit Poll 2024 Result, C-voter, Today Chanakya, CNX India Tv, Axis MY India, Times Now Navbharat ETG, Watch Online: सातवें एवं अतिम चरण में 57 सीटों पर वोटिंग समाप्त होने के साथ ही लोकसभा चुनाव का मतदान समाप्त हो गया है। इसके साथ ही देश की सभी 543 सीटों के लिए वोटिंग पूरी हो गई है। इन सभी सीटों के चुनाव नतीजे चार जून को आएंगे लेकिन एजेंसियों के एग्जिट पोल्स शाम साढ़े छह बजे के बाद आने शुरू हो गए हैं। जो एजेंसियां अपने एग्जिट पोल दे रही हैं उनमें VMR, C Voter, Today's Chanakya, Axis My India प्रमुख रूप से शामिल हैं। 2019 के एग्जिट पोल्स की अगर बात करें तो इन सभी एजेंसियों ने भाजपा और एनडीए की जीत के

### Testing on one question

In [48]:
import json

def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about India elections:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                str(src_art),
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )
            des_articles.append(src_art)
    des_filename = "QnA_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [49]:
question = "What type of event or topic is central to the article?:"
answer = "A) Political elections \
B) Real estate transactions \
C) Tourism and travel \
D) Economic and market trends"
get_LLM_QnA(
    "hin.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

In [50]:
extract_answer_from_json("QnA_hin.json")

Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: D, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: D, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin


### Tone of article test

In [51]:
import json

def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about India elections:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                str(src_art),
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )
            des_articles.append(src_art)
    des_filename = "Interest_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [52]:
get_LLM_QnA("hin.json", client)

In [53]:
extract_answer_from_json("Interest_hin.json")

Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: D, lang: hin
Extracted answer: B, lang: hin
Extracted answer: A, lang: hin
Extracted answer: A, lang: hin
Extracted answer: B, lang: hin
Extracted answer: C, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: A, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: B, lang: hin
Extracted answer: C, lang: hin


In [54]:
copy_LLM_QnA(
    src_filename="QnA_hin.json",
    des_filename="Interest_hin.json",
    N=20,
)

### Testing on different questions

In [73]:
def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is the correct tone that this article discusses about India elections:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    # Read the source file
    with open(src_filename, "r") as f:
        src_articles = json.load(f)

    # Define the output file name
    des_filename = "QnA_" + src_filename

    # Initialize des_articles
    des_articles = []

    # Check if the output file already exists
    if os.path.exists(des_filename):
        with open(des_filename, "r") as f:
            des_articles = json.load(f)
    else:
        des_articles = src_articles

    # Process each article in the source file
    for src_art in src_articles:
        llm_answer = QnA(
            str(src_art),
            client,
            model=model,
            QnA_prompt=QnA_prompt,
            QnA_answer=QnA_answer,
        )
        
        qna_entry = {
            "Question": QnA_prompt,
            "Answers": QnA_answer,
            "LLM_answer": llm_answer
        }
        
        # Extract post_LLM_answer based on regex match
        match = re.search(r"\s*([A-D])\)", llm_answer)
        if match:
            qna_entry["post_LLM_answer"] = match.group(1)
        else:
            qna_entry["post_LLM_answer"] = "NaN"
        
        if "QnA" not in src_art:
            src_art["QnA"] = []
        src_art["QnA"].append(qna_entry)

    # Write the updated list to the output file
    with open(des_filename, "w") as f:
        json.dump(src_articles, f, ensure_ascii=False, indent=4)

In [74]:
question = "What is the primary focus of the article?"
answer = "A) Election results and political analysis\
B) Real estate and property listings  \
C) Tourism and travel destinations \
D) Economic and market updates"
get_LLM_QnA(
    "eng.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

In [75]:
question = "Which region or country is predominantly discussed in the article?"
answer = "A) India\
B) United States  \
C) Europe \
D) Global"
get_LLM_QnA(
    "eng.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answer,
)

### More categorical objective questions

In [77]:
lang = "eng"
dateStart = "2024-05-28"
dateEnd = "2024-06-04"

query = {
  "$query": {
    "$and": [
      {
        "keyword": "india",
        "keywordLoc": "body"
      },
      {
        "keyword": "election",
        "keywordLoc": "body"
      },
      {
        "keyword": "2024",
        "keywordLoc": "body"
      },
      {
        "sourceLocationUri": "http://en.wikipedia.org/wiki/India"
      },
      {
        "dateStart": "2024-05-28",
        "dateEnd": "2024-06-04",
        "lang": "eng"
      }
    ]
  }
}
q = QueryArticlesIter.initWithComplexQuery(query)
filename = lang + dateEnd + ".json"
if os.path.exists(filename):
    os.remove(filename)

articles = []
for art in q.execQuery(
    er,
    sortBy="date",
    maxItems=20,
    returnInfo=ReturnInfo(
        sourceInfo=SourceInfoFlags(location=True),
    ),
):
    articles.append(art)

with open(filename, "w") as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)

In [78]:
# copy the body of the articles to a new json file
import json

articles_body = []
for article in articles:
    art = {}
    art["article"] = article["body"]
    articles_body.append(art)
print(articles_body)

[{'article': 'In the 2024 Lok Sabha elections, Uttar Pradesh became a political thriller with Rahul Gandhi and Akhilesh Yadav challenging the BJP, resulting in a reduced seat share for the ruling party. Priyanka Gandhi\'s intense campaign played a crucial role in Congress\'s victories in Amethi and Rae Bareli. Her direct and personal approach resonated with voters, particularly women. She also successfully brokered a seat-sharing deal between Congress and the Samajwadi Party. This election marked a significant turnaround for Congress in Uttar Pradesh.Uttar Pradesh has emerged as a gripping political thriller on the day of the Lok Sabha election results in 2024. Rahul Gandhi and Akhilesh Yadav, the \'UP ke ladke\', posed a formidable challenge to the Bharatiya Janata Party (BJP), with the INDIA bloc cutting into BJP\'s seat share. However, the true victor of Uttar Pradesh in this election is a leader whose efforts have finally borne fruit.\n\nThe twin power:\n\nThe Congress clinched vic

In [79]:
def QnA(
    art,
    client,
    model="gpt-4o",
    QnA_prompt="I have 20 news articles, and I need you to create multiple-choice objective questions with categorical answers to identify narrative differences among the provided articles. The questions should be generic to all articles and shouldn't have answer related to one specific article. The end goal is to match the articles which are talking abouut the same thing. Here are the articles:",
    QnA_answer = " "
):

    prompt_complete = QnA_prompt + QnA_answer + "\n" + art
    model = model
    messages = [
        {"role": "user", "content": prompt_complete},
    ]
    try:
        response = client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [80]:
response = QnA(str(articles_body), client)

In [81]:
print(response)

Sure, here are some multiple-choice objective questions with categorical answers to help identify narrative differences among the provided articles:

### Question 1: What is the primary focus of the article?
A. Political alliances and election results
B. Sports and celebrity involvement in politics
C. Economic policies and development
D. Social issues and public welfare

### Question 2: Which region or state is prominently mentioned in the article?
A. Uttar Pradesh
B. West Bengal
C. Andhra Pradesh
D. Assam

### Question 3: What is the main event discussed in the article?
A. A political leader's campaign strategy
B. Election results and seat distribution
C. A sports event or celebrity's involvement
D. A social or economic policy announcement

### Question 4: Which political party is frequently mentioned in the article?
A. Bharatiya Janata Party (BJP)
B. Indian National Congress (INC)
C. Trinamool Congress (TMC)
D. Samajwadi Party (SP)

### Question 5: What is the tone of the article?
A.