In [10]:
import json, os
from openai import OpenAI

source_filename = "summary_eng_hin.json"
des_filename = "answers_" + source_filename

# Load articles
with open(source_filename, 'r', encoding='utf-8') as f:
    articles = json.load(f)

# Load questions
with open('narrative_questions.json', 'r', encoding='utf-8') as f:
    questions = json.load(f)

# Set API key
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.getenv("OPENAI_API_KEY"),
)


In [11]:
def get_answers(article, questions, model):
    article_title = article.get('title')
    article_content = article.get('body')
    print(f"Proceeding article {article_title}")
    
    pre = "You are an AI trained to understand articles and generate concise answers to multiple-choice questions based on the content. \
        Please read the following article carefully. After reading, answer ALL the questions listed below. \
            Your answers must be in capital letters and formatted as a single string, where each question number is followed by its corresponding answer letter. \
                Separate each question-answer pair with a semicolon. \
                    Example format: 1A;2B;3C;4D;... \n\n"

    prompt = pre + f"Article Content:\n{article_content}\n\n Questions: {questions}\n"
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
    except Exception as e:
        print(f"Error: {e}")
    return response

In [12]:
def convert_string_to_dict(answer_string):
    if answer_string == "":
        return {}
    if answer_string[-1] == ';':
        answer_string = answer_string[:-1]
    return {item[:-1]: item[-1] for item in answer_string.split(';')}

results = []
iteration = 0
max_iteration = len(articles)

for article in articles:
    # article_content = article['body']

    answers = get_answers(article, questions, model="gpt-4o")

    pure_answers = answers.choices[0].message.content
    print(pure_answers)

    # convert string to dictionary 1A;2B;3C;44D to {'1': 'A', '2': 'B', '3': 'C', '44': 'D'}
    processed_answers = convert_string_to_dict(pure_answers)
    article['narrative_answers'] = processed_answers

    results.append(article)
    iteration += 1
    print(f"Processed {iteration} out of {max_iteration}")


# Save results to a JSON file
with open(des_filename, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

Proceeding article European Commission Competition Report Spotlights Potential Areas of Focus for Next Commissioner
1C;2C;3C;4C;5C;6C;7C;8C;9C;10C
Processed 1 out of 200
Proceeding article Social media posts claim Ukrainian first lady Olena Zelenska bought a $4 million sports car from luxury dealer Bugatti in June while visiting Paris with President Volodymyr Zelenskyy. The automaker said that's not true.
1C;2C;3C;4C;5C;6C;7C;8C;9C;10C
Processed 2 out of 200
Proceeding article Zelenskyy: If Trump Knows How to Finish War, 'He Should Tell Us Today'
1C;2C;3C;4C;5C;6C;7C;8C;9C;10C
Processed 3 out of 200
Proceeding article Zelensky says he's ready to meet with Trump  --  as he decries weapons...
1A;2A;3A;4A;5C;6C;7C;8A;9B;10B
Processed 4 out of 200
Proceeding article Rocky Mountain Power proposes 30.5% electricity rate increase, points to fuel prices as culprit
Q1C;Q2C;Q3C;Q4C;Q5C;Q6C;Q7C;Q8C;Q9C;Q10C
Processed 5 out of 200
Proceeding article Aiming to lower gas prices, feds boost supply in

In [15]:
# Define the input and output file paths
input_file_path = 'answers_summary_eng_hin.json'
output_file_path = 'filtered_output.json'

# Load JSON data from the input file
with open(input_file_path, 'r') as input_file:
    data = json.load(input_file)

# Filter articles based on narrative_answers criteria
filtered_data = [
    article for article in data
    if not (
        article.get("narrative_answers") == {
            "1": "C",
            "2": "C",
            "3": "C",
            "4": "C",
            "5": "C",
            "6": "C",
            "7": "C",
            "8": "C",
            "9": "C",
            "10": "C"
        } or
        article.get("narrative_answers") == {
            "": "C"
        } or
        article.get("narrative_answers") == {
            "Q1": "C",
            "Q2": "C",
            "Q3": "C",
            "Q4": "C",
            "Q5": "C",
            "Q6": "C",
            "Q7": "C",
            "Q8": "C",
            "Q9": "C",
            "Q10": "C"
        }
    )
]

# Dump the filtered data to the output file
with open(output_file_path, 'w') as output_file:
    json.dump(filtered_data, output_file, indent=4)

print(f"Filtered data has been written to {output_file_path}")



Filtered data has been written to filtered_output.json
