In [7]:
import json, os
from openai import OpenAI

source_data_path = "data/"
filename = "summary_俄乌战争_zho.json"
source_filename = source_data_path + filename

des_data_path = "data/"
des_filename = des_data_path + "answers_" + filename

# Load articles
with open(source_filename, 'r', encoding='utf-8') as f:
    articles = json.load(f)

# Load questions
with open(des_data_path+'formated_output_questions.json', 'r', encoding='utf-8') as f:
    questions = json.load(f)

# Set API key
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.getenv("OPENAI_API_KEY"),
)


In [8]:
def get_answers(article, questions, model):
    article_title = article.get('title')
    article_content = article.get('body')
    print(f"Proceeding article {article_title}")
    
    pre = "You are an AI trained to understand articles and generate concise answers to multiple-choice questions based on the content. \
        Please read the following article carefully. After reading, answer ALL the questions listed below. \
            Your answers must be in capital letters and formatted as a single string, where each question number is followed by its corresponding answer letter. \
                Separate each question-answer pair with a semicolon. \
                    Example format: 1A;2B;3C;4D;... \n\n"

    prompt = pre + f"Article Content:\n{article_content}\n\n Questions: {questions}\n"
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
    except Exception as e:
        print(f"Error: {e}")
    return response

In [9]:
def convert_string_to_dict(answer_string):
    if answer_string == "":
        return {}
    if answer_string[-1] == ';':
        answer_string = answer_string[:-1]
    return {item[:-1]: item[-1] for item in answer_string.split(';')}

results = []
iteration = 0
max_iteration = len(articles)

for article in articles:
    # article_content = article['body']

    answers = get_answers(article, questions, model="gpt-4-turbo")

    pure_answers = answers.choices[0].message.content
    print(pure_answers)

    # convert string to dictionary 1A;2B;3C;44D to {'1': 'A', '2': 'B', '3': 'C', '44': 'D'}
    processed_answers = convert_string_to_dict(pure_answers)
    article['answers'] = processed_answers

    results.append(article)
    iteration += 1
    print(f"Processed {iteration} out of {max_iteration}")


# Save results to a JSON file
with open(des_filename, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

Proceeding article 对抗民主西方 中俄领导人上合组织峰会再度会面
1B;2B;3B;4B;5E;6A;7A;8B;9E;10E;11E;12B;13B;14B;15E;16B;17B;18E;19B;20A;21B;22E;23C;24A;25B;26A;27D;28E;29B;30E;31B;32E;33C;34C;35E;36C;37B;38B;39C;40E
Processed 1 out of 100
Proceeding article 新混战即将打响！朝鲜出兵乌克兰？泽连斯基紧急电联中国寻求帮助_手机网易网
1B;2B;3A;4B;5E;6A;7A;8A;9E;10A;11B;12B;13C;14A;15E;16B;17A;18E;19E;20B;21B;22D;23B;24B;25B;26A;27C;28B;29B;30D;31A;32A;33A;34C;35A;36C;37A;38E;39A;40A
Processed 2 out of 100
Proceeding article 芬兰总统：习近平一个电话就能结束俄乌战争，中国拒绝了该建议
1B;2B;3A;4B;5E;6A;7A;8B;9A;10B;11E;12B;13C;14B;15B;16B;17A;18E;19E;20A;21B;22E;23B;24A;25B;26A;27C;28C;29B;30E;31A;32B;33C;34C;35A;36C;37B;38A;39A;40A
Processed 3 out of 100
Proceeding article 泽连斯基回应特朗普的和谈方案，称乌克兰必须知道细节，早做准备_手机网易网
1B;2B;3A;4B;5E;6A;7A;8B;9E;10E;11B;12B;13D;14E;15E;16B;17E;18E;19E;20E;21E;22E;23C;24A;25B;26A;27B;28B;29B;30E;31A;32A;33C;34C;35E;36C;37E;38A;39E;40A
Processed 4 out of 100
Proceeding article 俄军打出一个下马威，一次摧毁5架苏-27！F-16的神话会破灭吗？_手机网易网
1A;2A;3A;4A;5E;6E;7A;8A;9A;10B;11D;12A;13C