In [1]:
from openai import OpenAI
import json
import os
from collections import defaultdict
import re

In [2]:
client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
)

# Narrative of countries of interest

In [10]:
def generate_narrative(model, speech_text, country_name):
    narrative_prompt = f"""
    \"\"\" {speech_text}
    You are the best at reading UN speeches and giving the narrative of the country on a certain topic.\
    Above is the speech from {country_name} from the year 2002 to 2007. Give me the narrative of the speaker country \
    on the Iraq War. Make sure you cover the sentiment, the key points, and the reasoning of the speaker country. \ 
    Later I will be using this text to prepare MCQs to find narrative differences between different countries.\"\"\"
    """
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": narrative_prompt}
            ],
            temperature=0
        )
    except Exception as e:
        print(f"Error: {e}")
    return response

In [11]:
def process_speeches(json_file):
    with open(json_file, 'r') as file:
        speeches = json.load(file)
    
    # Group speeches by country
    grouped_speeches = defaultdict(str)
    
    for speech in speeches:
        country = speech.get('country')
        year = speech.get('year')
        speech_text = speech.get('text')
        
        # Only filter by year now
        if 2002 <= year <= 2007:
            grouped_speeches[country] += f" {speech_text}"
    
    narratives = []
    
    for country, combined_speech_text in grouped_speeches.items():
        country_name = country  # In this case, the country field is already the name or code
        narrative = generate_narrative("gpt-4o-mini", combined_speech_text, country_name)
        pure_narrative = narrative.choices[0].message.content
        narrative_dict = {
            "country": country_name,
            "Narrative": pure_narrative
        }
        narratives.append(narrative_dict)
        print(f"Country: {country_name} Narrative: {pure_narrative}\n")
    
    return narratives


In [12]:
def transform_narrative(narrative):
    # Extract sentiment
    sentiment_match = re.search(r'#### Sentiment:\n(.+?)(\n####|\Z)', narrative, re.DOTALL)
    sentiment = sentiment_match.group(1).strip() if sentiment_match else ""

    # Extract key points
    key_points_match = re.findall(r'\*\*([^\*]+)\*\*\s*:\s*(.+?)(?=\n\d\.|\Z)', narrative, re.DOTALL)
    key_points = []
    for point in key_points_match:
        key_points.append(f"{point[0].strip()}: {point[1].strip()}")

    # Extract reasoning
    reasoning_match = re.search(r'#### Reasoning:\n(.+?)(\n####|\Z)', narrative, re.DOTALL)
    reasoning = reasoning_match.group(1).strip() if reasoning_match else ""

    return {
        "sentiment": sentiment,
        "key points": key_points,
        "reasoning": reasoning
    }

In [13]:
json_file = '../data/processed/UN_data.json'
narratives = process_speeches(json_file)

Country: PAN Narrative: The narrative of Panama regarding the Iraq War, as articulated in the speeches from 2002 to 2007, reflects a complex interplay of support for international cooperation, a commitment to peace, and a call for reform within the United Nations. Here are the key points, sentiments, and reasoning expressed by the speaker country:

### Sentiment:
1. **Support for Multilateralism**: Panama emphasizes the importance of multilateral efforts and the role of the United Nations in addressing global conflicts, including the situation in Iraq.
2. **Condemnation of Violence**: The speeches express a strong condemnation of violence and terrorism, highlighting the need for peaceful resolutions to conflicts.
3. **Concern for Human Rights**: There is a consistent concern for human rights and the humanitarian impact of military actions, reflecting a desire for solutions that prioritize the well-being of civilians.

### Key Points:
1. **Call for Dialogue and Cooperation**: Panama adv

In [14]:
transformed_data = []
for entry in narratives:
    country_name = entry['country']
    narrative = entry['Narrative']
    transformed_narrative = transform_narrative(narrative)
    transformed_data.append({
        "country": country_name,
        "Narrative": transformed_narrative
    })

# Output the transformed data as JSON
output_json = json.dumps(transformed_data, indent=2)
output_file = '../data/processed/narrative_more_Q.json'
with open(output_file, 'w') as file:
    file.write(output_json)

# Forming the questions with Narratives

In [21]:
def generate_questions(Narrative_json_text, model="gpt-4o-mini"):

    prompt = f""" {Narrative_json_text}
    Above are the narrative of the various countries about Iraq war, I want the design set of MCQ questions to differentiate\
    the country speeches by their narrative. Give the questions in the following format. The goal is to answer these \ 
    questions based on the speech and then cluster them based on the answers. The answers of the similar narrative countries \
    should have similar answers and the ones with different narrative should have different answer. Make sure there are only \
    2 options + 1 option being not relevant. Dont design questions such that the options are countries, make sure to design \ 
    such that the options are the narratives just like the example question.
    Example:  
    1. "Does the speech emphasize the importance of the United Nations' role in legitimizing military interventions?",
    "A": "Yes, the speech strongly supports the need for UN authorization before military action.",
    "B": "No, the speech justifies military action regardless of UN authorization.",
    "C": "The speech does not address this issue."
    """
    messages = [{"role": "user", "content": prompt}]
    try:
        response = client.chat.completions.create(
            model=model, messages=messages, temperature=0
        )

        content = response.choices[0].message.content
        return content
    except Exception as e:  # if the model fails to return a response
        print(f"Error: {e}")
        return "Sorry, error from GPT."

In [22]:
#make batch of 50 narratives and generate questions
batch_size = 50
questions = []
for i in range(0, len(narratives), batch_size):
    batch = narratives[i:i+batch_size]
    narrative_json_text = json.dumps(batch, indent=2)
    batch_questions = generate_questions(narrative_json_text)
    questions.append(batch_questions)

# Output the questions as JSON
output_questions = json.dumps(questions, indent=2)
output_questions_file = '../data/processed/extra_questions.json'
with open(output_questions_file, 'w') as file:
    file.write(output_questions)

In [26]:
def transform_questions(questions):
    transformed = {}
    question_counter = 1  # Initialize a counter for unique question numbering
    
    # Define regex patterns for extracting question and choices
    question_pattern = r'\d+\.\s*"(.*?)"'
    choice_pattern = r'"([A-C])":\s*"(.*?)"'

    for question_block in questions:
        # Find each question in the question block
        questions_match = re.findall(question_pattern, question_block)
        
        # Split the block into individual questions by choice groups
        choice_groups = re.split(question_pattern, question_block)[1:]

        # Loop over each question and its associated choices
        for question_text, choices_text in zip(questions_match, choice_groups):
            # Format the question key with a unique counter
            question_key = f"Q{question_counter}"
            transformed[question_key] = {"question": question_text.strip(), "choices": {}}
            
            # Find choices within the associated choices_text
            choices = re.findall(choice_pattern, choices_text)
            for choice, choice_text in choices:
                transformed[question_key]["choices"][choice] = choice_text.strip()
            
            # Increment question counter for unique keys
            question_counter += 1
    
    return transformed

In [27]:
# Transform and output JSON
transformed_questions = transform_questions(questions)
with open('../data/processed/narrative_extra_questions.json', 'w') as f:
    json.dump(transformed_questions, f, indent=4)

print("Transformed questions saved to 'transformed_questions.json'")

Transformed questions saved to 'transformed_questions.json'
