In [1]:
#!pip install PyMuPDF requests

In [2]:
import sys
from dotenv import load_dotenv, find_dotenv

sys.path.append("../aisuite")

In [3]:
import aisuite as ai
import json

In [4]:
import os


def configure_environment(additional_env_vars=None):
    """
    Load environment variables from .env file and apply any additional variables.
    :param additional_env_vars: A dictionary of additional environment variables to apply.
    """
    # Load from .env file if available
    load_dotenv(find_dotenv())

    # Apply additional environment variables
    if additional_env_vars:
        for key, value in additional_env_vars.items():
            os.environ[key] = value


# Define additional API keys and credentials
additional_keys = {}

# Configure environment
configure_environment(additional_env_vars=additional_keys)

In [5]:
# load the json file
with open("../data/speeches.json", "r") as f:
    speeches = json.load(f)
speeches

[{'country': 'Nigeria',
  '国家': '尼日利亚联邦共和国',
  '发言人': '穆罕马杜·布哈里先生',
  'statement_from_president': '代理主席（以法语发言）：大会现在听取尼日利亚联邦共和国总统的讲话。',
  'speech': '尼日利亚联邦共和国总统穆罕马杜·布哈里先生在陪同下进入大会堂。代理主席（以法语发言）：我荣幸地代表大会欢迎尼日利亚联邦共和国总统穆罕马杜· 布哈里先生阁下来到联合国，并请他向大会讲话。布哈里总统（以英语发言）：我代表尼日利亚政府和人民，祝贺主席当之无愧地当选并领导大会第七十七届会议，并向他保证，尼日利亚代表团将在他任职期间给予全力支持与合作。我赞扬他的前任阿卜杜拉· 沙希德先生阁下，在他的领导下，大会在这个充满挑战的时期取得了许多显著成就。我还要祝贺秘书长安东尼奥· 古特雷斯先生为促进和平、安全与发展所作的不懈努力，这与他的崇高职务非常相称。我原本在1984年有机会第一次在大会发言，当时我是尼日利亚联邦共和国的军事国家元首。大约31年后的2016年，我非常荣幸地作为我国民选总统亲自在大会讲话（见A/71/PV.4A）。在我第二个也是最后一个四年任期即将结束之际，我注意到尼日利亚、非洲和世界发生了多少变化，而一些挑战如何继续存在。我们现在受到一些持久的全球挑战和新的全球挑战的更严峻考验，其中最主要的是非国家行为体日益助长的冲突、小武器和轻武器的扩散、恐怖主义、暴力极端主义、恶意使用技术、气候变化、非正常移民和提高生活水平机会的差距。尽管国际环境充满挑战，但联合国已经证明，若汇集各成员的意志来开展积极的集体行动，它就能很强大。这个非凡的机构的指导原则是促进和平与安全、发展和人权。最近发生了一系列挑战这些原则的事件，最新一例是乌克兰冲突，它已造成了整整一代人从未遇到过的紧张局势。这种冲突将对我们所有人产生不利后果，阻碍我们共同努力解决其他地方的冲突，特别是在非洲、中东和亚洲。事实上，当前乌克兰的战争导致更难解决大会每年审议联合国A/77/PV.6大\u2003会第七十七届会议第六次全体会议2022年9月21日星期三上午9时举行纽约正式纪录主席:克勒希先生.................................................................

In [6]:
# load mcqs from a JSON file
with open("../data/processed/narrative_questions.json", "r") as f:
    mcqs = json.load(f)

In [7]:
client = ai.Client()

In [None]:
openai = "openai:gpt-4.1-mini"
for speech in speeches:
    # Extract the text content from the speech
    text_data = speech["speech"]
    # Add the speech text to the messages
    messages = [
        {
            "role": "system",
            "content": "You are expert analyzing UN General Assembly speeches and answer MCQs by country.",
        },
        {
            "role": "user",
            "content": f"""
I will provide speech delivered by representatives from various countries at the United Nations General Assembly.

Your task is to:
1. Read the entire provided speech document. 
2. Answer the following multiple-choice questions based solely on the text content identified as belonging to that specific country's speech section within the document.
3. Format your response as a valid JSON array like this:
{{"Q1": "A","Q2": "A","Q3": "C","Q4": "A","Q5": "A","Q6": "A"}}

Only return the JSON — no prose or commentary.

--- Questions ---
{json.dumps(mcqs, indent=2)}

--- Speech ---
{text_data}

""",
        },
    ]
    response = client.chat.completions.create(model=openai, messages=messages)
    json_response = response.choices[0].message.content
    print(json_response)

    # Parse the JSON response string into a Python dictionary
    try:
        # Strip any markdown formatting that might be in the response
        clean_json_str = (
            json_response.strip("`").replace("json\n", "").replace("\n", "")
        )
        answers_dict = json.loads(clean_json_str)
        # Assign the parsed dictionary to the answers field
        speech["answers"] = answers_dict
    except json.JSONDecodeError:
        print(f"Failed to parse JSON: {json_response}")
        # Fallback in case of parsing error
        speech["answers"] = {"error": "Failed to parse response"}

with open("../data/processed/answers.json", "w", encoding="utf-8") as f:
    json.dump(speeches, f, ensure_ascii=False, indent=4)
# load the speeches with answers

{"Q1": "C","Q2": "C","Q3": "C","Q4": "A","Q5": "A","Q6": "A"}
{"Q1": "A","Q2": "C","Q3": "A","Q4": "A","Q5": "C","Q6": "A"}
{"Q1": "C", "Q2": "C", "Q3": "C", "Q4": "C", "Q5": "C", "Q6": "A"}
{"Q1": "C", "Q2": "C", "Q3": "C", "Q4": "C", "Q5": "A", "Q6": "A"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "A"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "A"}
{"Q1":"C","Q2":"C","Q3":"C","Q4":"C","Q5":"C","Q6":"C"}
{"Q1": "A","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "C"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "C"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "A"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "A","Q6": "A"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "A","Q6": "A"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "C"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "C"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "C"}
{"Q1": "C","Q2": "C","Q3": "C","Q4": "C","Q5": "C","Q6": "A"}
{"Q1