In [2]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv(override=True)  # This loads the environment variables from .env file
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
def QnA(
    art,
    client,
    model,
    QnA_prompt,
    QnA_answer,
):

    prompt_complete = (
        QnA_prompt
        + "\n"
        + art["title"]
        + "\n"
        + art["body"]
        + "\n"
        + "Please choose from the options below:"
        + QnA_answer
    )
    model = model
    messages = [{"role": "user", "content": prompt_complete}]
    try:
        response = client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [4]:
import json


def get_LLM_QnA(
    src_filename,
    client,
    model="gpt-4o",
    QnA_prompt="Which of the following is correct tone that this article discuss about Raisi's reputation:",
    QnA_answer="A) Positive\nB) Neutral\nC) Negative\nD) Not applicable\n",
):
    with open(src_filename, "r") as f:
        data = f.read()
        des_articles = []
        src_articles = json.loads(data)
        for src_art in src_articles:
            src_art["Question"] = QnA_prompt
            src_art["Answers"] = QnA_answer
            src_art["LLM_answer"] = QnA(
                src_art,
                client,
                model=model,
                QnA_prompt=QnA_prompt,
                QnA_answer=QnA_answer,
            )

            des_articles.append(src_art)
    des_filename = "QnA_" + src_filename
    if os.path.exists(des_filename):
        os.remove(des_filename)
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [11]:
import re


def extract_answer(qna_text):
    # Use a regular expression to find the answer choice (A, B, C, or D) at the start of the QnA field
    match = re.search(r"\s*([A-D])\)", qna_text)
    if match:
        return match.group(1)
    return None


def extract_answer_from_json(src_filename):
    # Load the JSON data
    with open(src_filename, "r") as file:
        data = json.load(file)

    # Iterate through the records and extract answers
    for record in data:
        LLM_answer = record.get("LLM_answer", "")
        answer = extract_answer(LLM_answer)
        if answer:
            print(f"Extracted answer: {answer}")
        else:
            print("No answer extracted")

# Repeat below steps to verify question and llm answer

In [41]:
question = "Does the article talk about people express joy over the death of Raisi"
answers = "A) Yes B) No"
get_LLM_QnA(
    "2024-05-19_hin.json",
    client,
    model="gpt-4o",
    QnA_prompt=question,
    QnA_answer=answers,
)

In [40]:
extract_answer_from_json("QnA_2024-05-19_eng.json")

Extracted answer: A
Extracted answer: A
Extracted answer: A
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B


In [38]:
extract_answer_from_json("QnA_2024-05-19_zho.json")

Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B


In [42]:
extract_answer_from_json("QnA_2024-05-19_hin.json")

Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B
Extracted answer: B


# Save question and answer

In [43]:
import csv

# Q and A to be added as a new row
new_row = [question, answers]

# name of the csv file
filename = "QnA.csv"

# writing to csv file
with open(filename, "a") as csvfile:
    # creating a csv writer object
    csvwriter = csv.writer(csvfile)

    # writing the data row
    csvwriter.writerow(new_row)

In [15]:
def copy_LLM_QnA(
    src_filename,
    des_filename,
    N=10,
):
    with open(des_filename, "r") as f:
        data = f.read()
        des_articles = json.loads(data)

    with open(src_filename, "r") as f:
        data = f.read()

        src_articles = json.loads(data)
        for n in range(N):
            qna = {}
            qna["Question"] = src_articles[n]["Question"]
            qna["Answers"] = src_articles[n]["Answers"]
            qna["LLM_answer"] = src_articles[n]["LLM_answer"]
            match = re.search(r"\s*([A-D])\)", qna["LLM_answer"])
            if match:
                qna["post_LLM_answer"] = match.group(1)
            else:
                qna["post_LLM_answer"] = "NaN"

            if "QnA" in des_articles[n]:
                des_articles[n]["QnA"].append(qna)
            else:
                des_articles[n]["QnA"] = [qna]

            # overwrite the file
    with open(des_filename, "w") as f:
        json.dump(des_articles, f, ensure_ascii=False, indent=4)

In [44]:
copy_LLM_QnA(
    src_filename="QnA_2024-05-19_hin.json", des_filename="LLM_2024-05-19_hin.json", N=10
)
copy_LLM_QnA(
    src_filename="QnA_2024-05-19_eng.json", des_filename="LLM_2024-05-19_eng.json", N=10
)
copy_LLM_QnA(
    src_filename="QnA_2024-05-19_zho.json", des_filename="LLM_2024-05-19_zho.json", N=10
)