# CS 263 HW Notebook

This notebook comprises of python scripts to extract chatgpt output and instructions for checking the format of your files for the submission of the homework.

## Extract ChatGPT Output

In [121]:

def argument_extractor(chatgpt_output, argument_roles=None):
    """
    Extract arguments corresponding to the argument roles from the chatgpt_output.
    If argument roles are provided, this will extract only those arguments.
    Else it will extract all possible arguments from the output.
    """
    print("Inside argument extractor")
    print("chat outputs: " ,chatgpt_output)
    print("Argument roles: ", argument_roles)
    arguments = {}
    print(argument_roles)
    if argument_roles is not None:
        for role in argument_roles:
            pattern = rf"\b{role}:\s*([^\n]+)"
            match = re.search(pattern, chatgpt_output, re.IGNORECASE)
            if match:
                arguments[role] = match.group(1).strip()
            else:
                arguments[role] = None
    return arguments


## File-check for formatting

In [150]:
import json

EVENT_NAMES = ["infect", "spread", "symptom", "cure", "prevent", "control", "death"]
ONTOLOGY_FIELD_NAMES = {"event_name": str, "argument_role": str, "role_description": str, "example_sentence": str}
DATA_ANNOTATION_FIELD_NAMES = {"input_text": str, "event_name": str, "event_trigger": str, "arguments": dict}
PREDICTION_FIELD_NAMES = {"input_text": str, "prompt": str, "output_text": str, "extracted_arguments": dict}
BREAKGPT_FIELD_NAMES = {"input_text": str, "event_name": str, "event_trigger": str, "prompt": str, "output_text": str, "extracted_arguments": dict, "expected_arguments": dict}

def ontology_check(filename):
  data = None
  with open(filename, 'r') as f:
    try:
      data = json.load(f)
    except:
      print ("ERROR: File is not a json file. Use json.dump to create your file")
      return

  for i, dt in enumerate(data):
    for field_name in dt.keys():
      if field_name not in ONTOLOGY_FIELD_NAMES:
        print ("ERROR: Line %d: field name %s is incorrect. It should be within %s" % (i+1, field_name, str(ONTOLOGY_FIELD_NAMES.keys())))
        return

    if dt["event_name"] not in EVENT_NAMES:
      print ("ERROR: line %d has unknown event name %s. Please check." % (i+1, dt["event_name"]))
      return

  print ("PASSED: The format of the file %s looks correct!" % filename)
  return

def json_check(filename, required_field_names, is_logs=0):
  data = None
  with open(filename, 'r') as f:
    try:
      data = json.load(f)
    except:
      print ("ERROR: File is not a json file. Use json.dump to create your file")
      return

  for i, dt in enumerate(data):
    for field_name in dt.keys():
      if field_name not in required_field_names.keys():
        print ("ERROR: Line %d: field name %s is incorrect. It should be within %s" % (i+1, field_name, str(required_field_names.keys())))
        return

    for var, typ in required_field_names.items():
      if not isinstance(dt[var], typ):
        print ("ERROR: Line %d: dt['%s'] is not a %s" % (i+1, var, str(typ)))
        return

    if "arguments" in required_field_names and "input_text" in required_field_names:
      for role, arg in dt["arguments"].items():
        if isinstance(arg, str) and arg not in dt["input_text"]:
          print ("ERROR: Line %d: argument '%s' not in the input text. Make sure your argument is in the input text" % (i+1, arg))
          return
        elif isinstance(arg, list):
          for a in arg:
            assert isinstance(a, str)
            if a not in dt["input_text"]:
              print ("ERROR: Line %d: argument '%s' not in the input text. Make sure your argument is in the input text" % (i+1, a))
              return

    if "expected_arguments" in required_field_names and "input_text" in required_field_names:
      for role, arg in dt["expected_arguments"].items():
        if isinstance(arg, str) and arg not in dt["input_text"]:
          print ("ERROR: Line %d: expected argument '%s' not in the input text. Make sure your argument is in the input text" % (i+1, arg))
          return
        elif isinstance(arg, list):
          for a in arg:
            assert isinstance(a, str)
            if a not in dt["input_text"]:
              print ("ERROR: Line %d: argument '%s' not in the input text. Make sure your argument is in the input text" % (i+1, a))
              return

    if "extracted_arguments" in required_field_names and "output_text" in required_field_names and not is_logs:
      if argument_extractor(dt["output_text"]) != dt["extracted_arguments"]:
        print ("ERROR: Line %d: extracted arguments is inconsistent with chatgpt output based on script" % (i+1))
        print("Expected arguments: ", argument_extractor(dt["output_text"]))
        print("Extracted arguments: ", dt["extracted_arguments"])
        return

  print ("PASSED: The format of the file %s looks correct!" % filename)
  return

def check_all_file_format():
  ontology_check("ontology.json")
  json_check("in_context-annotated.json", DATA_ANNOTATION_FIELD_NAMES)
  json_check("eval_data-annotated.json", DATA_ANNOTATION_FIELD_NAMES)
  json_check("logs.json", PREDICTION_FIELD_NAMES, is_logs=1)
  json_check("break-gpt.json", BREAKGPT_FIELD_NAMES)
  json_check("pred.json", PREDICTION_FIELD_NAMES)


In [151]:
check_all_file_format()

PASSED: The format of the file ontology.json looks correct!
PASSED: The format of the file in_context-annotated.json looks correct!
PASSED: The format of the file eval_data-annotated.json looks correct!
PASSED: The format of the file logs.json looks correct!


TypeError: argument_extractor() missing 1 required positional argument: 'argument_roles'

In [23]:
import os
import openai
from openai import OpenAI

api_key = "insert-key-here"
openai.api_key = api_key
client = OpenAI(api_key = api_key)

def sentiment_analysis(prompt):
    response = client.completions.create(
        model="gpt-3.5-turbo-instruct",  # Use an appropriate model for sentiment analysis
        prompt=f"Analyze the sentiment of the following text and respond with only one word: positive, neutral, or negative.\n\nText: {prompt}\nSentiment:",
        temperature=0,  # Set temperature to 0 for deterministic output
        max_tokens=1,   # Only need one token for the sentiment word
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].text.strip()

# Example usage:
text = "I love this product, it works great!"
result = sentiment_analysis(text)
print(result)  # Should print "positive"

positive


In [60]:

def load_ontology(file_name):
    """
    Load the ontology from a JSON file.
    """
    with open(file_name, 'r') as f:
        ontology = json.load(f)
    return ontology

def merge_example_sentences(event_name, argument_roles, example_sentences, ontology):
    """
    Merge example sentences from the ontology with the input example sentences.
    """
    for role in argument_roles:
        for entry in ontology:
            if entry["event_name"] == event_name and entry["argument_role"] == role:
                ontology_examples = entry.get("example_sentence", [])
                if role in example_sentences:
                    example_sentences[role].extend(ontology_examples)
                else:
                    example_sentences[role] = ontology_examples
    return example_sentences

def generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt

def call_chatgpt(prompt):
    """
    Call ChatGPT API with the generated prompt and return the response.
    """
    response = openai.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].text.strip()

def argument_extractor(chatgpt_output, argument_roles=None):
    """
    Extract arguments corresponding to the argument roles from the chatgpt_output.
    If argument roles are provided, this will extract only those arguments.
    Else it will extract all possible arguments from the output.
    """
    arguments = {}
    event_trigger_pattern = r"\bEvent Trigger:\s*([^\n]+)"
    event_trigger_match = re.search(event_trigger_pattern, chatgpt_output, re.IGNORECASE)
    if event_trigger_match:
        arguments["Event Trigger"] = event_trigger_match.group(1).strip()
    
    for role in argument_roles:
        pattern = rf"\b{role}:\s*([^\n]+)"
        match = re.search(pattern, chatgpt_output, re.IGNORECASE)
        if match:
            arguments[role] = match.group(1).strip()
        else:
            arguments[role] = None
    return arguments

def log_results(logs_file, input_text, event_name, event_trigger, extracted_arguments):
    """
    Log the input text, event name, event trigger, and extracted arguments into logs.json.
    """
    log_entry = {
        "input_text": input_text,
        "event_name": event_name,
        "event_trigger": event_trigger,
        "arguments": extracted_arguments
    }
    with open(logs_file, 'a') as f:
        json.dump(log_entry, f, indent=2)
        f.write('\n')

def process_data(data_file, ontology_file, logs_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, newline='') as f:
        reader = csv.DictReader(f)
        data = list(reader)

    for entry in data:
        input_text = entry["Tweet"]
        event_name = entry["Event"]
        argument_roles = ["Trigger Word"]
        example_sentences = {"Trigger Word": entry["Trigger Word"].split('|') if entry["Trigger Word"] else []}
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        event_trigger = extracted_arguments.pop("Event Trigger", None)
        
        log_results(logs_file, input_text, event_name, event_trigger, extracted_arguments)

# Example usage for in_context.csv:
ontology_file = "ontology.json"
logs_file = "logs.json"
in_context_file = "in_context.csv"
process_data(in_context_file, ontology_file, logs_file)

# Use the best prompt on eval_data.json


In [70]:
process_data("eval_data-annotated.json", ontology_file, "pred.json")

KeyError: 'Tweet'

In [62]:
#instead of using the in_context.csv file, we are using in_context-annotated.json file. However, we do not add the annotated argument roles of the example sentences

import json
import re
import openai

# Set up the OpenAI API key
openai.api_key = "insert-key-here"
api_key = openai.api_key

def load_ontology(file_name):
    """
    Load the ontology from a JSON file.
    """
    with open(file_name, 'r') as f:
        ontology = json.load(f)
    return ontology

def merge_example_sentences(event_name, argument_roles, example_sentences, ontology):
    """
    Merge example sentences from the ontology with the input example sentences.
    """
    for role in argument_roles:
        for entry in ontology:
            if entry["event_name"] == event_name and entry["argument_role"] == role:
                ontology_examples = entry.get("example_sentence", [])
                if role in example_sentences:
                    example_sentences[role].extend(ontology_examples)
                else:
                    example_sentences[role] = ontology_examples
    return example_sentences

def generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"
    
    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"
    
    prompt += (
        "\nYour response should list the event name that is found in the sentence, each argument role followed by the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Name: <event name>\n"
        "\nRole1: Extracted text\n"
        "Role2: Extracted text\n"
        "...\n"
    )
    return prompt



def log_results(logs_file, input_text, prompt, output_text, extracted_arguments):
    """
    Log the input text, prompt, output text, and extracted arguments into logs.json.
    """
    log_entry = {
        "input_text": input_text,
        "prompt": prompt,
        "output_text": output_text,
        "extracted_arguments": extracted_arguments
    }
    with open(logs_file, 'a') as f:
        json.dump(log_entry, f)
        f.write('\n')

def process_data(data_file, ontology_file, logs_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, 'r') as f:
        data = json.load(f)
        
    for entry in data:
        input_text = entry["input_text"]
        event_name = entry["event_name"]
        argument_roles = entry["arguments"].keys()
        example_sentences = entry.get("example_sentence", {})
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        
        log_results(logs_file, input_text, prompt, output_text, extracted_arguments)

# Example usage for in_context.json:
ontology_file = "ontology.json"
logs_file = "logs.json"
process_data("in_context-annotated.json", ontology_file, logs_file)


In [69]:
import json
import re
import csv
import openai

# Set up the OpenAI API key
openai.api_key = "insert-key-here"
api_key = openai.api_key

def load_ontology(file_name):
    """
    Load the ontology from a JSON file.
    """
    with open(file_name, 'r') as f:
        ontology = json.load(f)
    return ontology

def merge_example_sentences(event_name, argument_roles, example_sentences, ontology):
    """
    Merge example sentences from the ontology with the input example sentences.
    """
    for role in argument_roles:
        for entry in ontology:
            if entry["event_name"] == event_name and entry["argument_role"] == role:
                ontology_examples = entry.get("example_sentence", [])
                if role in example_sentences:
                    example_sentences[role].extend(ontology_examples)
                else:
                    example_sentences[role] = ontology_examples
    return example_sentences

def generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt

def call_chatgpt(prompt):
    """
    Call ChatGPT API with the generated prompt and return the response.
    """
    response = openai.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].text.strip()

def argument_extractor(chatgpt_output, argument_roles):
    """
    Extract arguments corresponding to the argument roles from the chatgpt_output.
    """
    arguments = {}
    event_trigger_pattern = r"\bEvent Trigger:\s*([^\n]+)"
    event_trigger_match = re.search(event_trigger_pattern, chatgpt_output, re.IGNORECASE)
    if event_trigger_match:
        arguments["Event Trigger"] = event_trigger_match.group(1).strip()
    
    for role in argument_roles:
        pattern = rf"\b{role}:\s*([^\n]+)"
        match = re.search(pattern, chatgpt_output, re.IGNORECASE)
        if match:
            arguments[role] = match.group(1).strip()
    
    return arguments

def log_results(logs_file, input_text, prompt, output_text, extracted_arguments):
    """
    Log the input text, prompt, output text, and extracted arguments into logs.json.
    """
    log_entry = {
        "input_text": input_text,
        "prompt": prompt,
        "output_text": output_text,
        "extracted_arguments": extracted_arguments
    }
    with open(logs_file, 'a') as f:
        json.dump(log_entry, f, indent=2)
        f.write('\n')

def process_data(data_file, ontology_file, logs_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, newline='') as f:
        reader = csv.DictReader(f)
        data = list(reader)

    for entry in data:
        input_text = entry["Tweet"]
        event_name = entry["Event"]
        argument_roles = [role["argument_role"] for role in ontology if role["event_name"] == event_name]
        example_sentences = {role: entry["Trigger Word"].split('|') if entry["Trigger Word"] else [] for role in argument_roles}
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        event_trigger = extracted_arguments.pop("Event Trigger", None)
        
        log_results(logs_file, input_text, prompt, output_text, extracted_arguments)

def process_eval_data(data_file, ontology_file, output_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, newline='') as f:
        reader = csv.DictReader(f)
        data = list(reader)

    results = []

    for entry in data:
        input_text = entry["Tweet"]
        event_name = entry["Event"]
        argument_roles = [role["argument_role"] for role in ontology if role["event_name"] == event_name]
        example_sentences = {role: entry["Trigger Word"].split('|') if entry["Trigger Word"] else [] for role in argument_roles}
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        event_trigger = extracted_arguments.pop("Event Trigger", None)
        
        result = {
            "input_text": input_text,
            "prompt": prompt,
            "output_text": output_text,
            "extracted_arguments": extracted_arguments
        }
        results.append(result)
    
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)

# Example usage for in_context.csv:
ontology_file = "ontology.json"
logs_file = "logs.json"
in_context_file = "in_context.csv"
process_data(in_context_file, ontology_file, logs_file)

# Use the best prompt on eval_data.csv to create pred.json
eval_data_file = "eval_data.csv"
pred_output_file = "pred.json"
process_eval_data(eval_data_file, ontology_file, pred_output_file)


In [129]:
#breaking chatgpt
def generate_prompt_1(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence. Note that some roles might not exist and some might be wrongly labeled:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [128]:
import json
import re
import csv
import openai

# Set up the OpenAI API key
openai.api_key = "insert-key-here"
api_key = openai.api_key

def load_ontology(file_name):
    """
    Load the ontology from a JSON file.
    """
    with open(file_name, 'r') as f:
        ontology = json.load(f)
    return ontology

def merge_example_sentences(event_name, argument_roles, example_sentences, ontology):
    """
    Merge example sentences from the ontology with the input example sentences.
    """
    for role in argument_roles:
        for entry in ontology:
            if entry["event_name"] == event_name and entry["argument_role"] == role:
                ontology_examples = entry.get("example_sentence", [])
                if role in example_sentences:
                    example_sentences[role].extend(ontology_examples)
                else:
                    example_sentences[role] = ontology_examples
    return example_sentences


def call_chatgpt(prompt):
    """
    Call ChatGPT API with the generated prompt and return the response.
    """
    response = openai.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].text.strip()

def argument_extractor(chatgpt_output, argument_roles):
    """
    Extract arguments corresponding to the argument roles from the chatgpt_output.
    """
    arguments = {}
    event_trigger_pattern = r"\bEvent Trigger:\s*([^\n]+)"
    event_trigger_match = re.search(event_trigger_pattern, chatgpt_output, re.IGNORECASE)
    if event_trigger_match:
        arguments["Event Trigger"] = event_trigger_match.group(1).strip()
    
    for role in argument_roles:
        pattern = rf"\b{role}:\s*([^\n]+)"
        match = re.search(pattern, chatgpt_output, re.IGNORECASE)
        if match:
            arguments[role] = match.group(1).strip()
    
    return arguments

def log_results(logs_file, input_text, prompt, output_text, extracted_arguments):
    """
    Log the input text, prompt, output text, and extracted arguments into logs.json.
    """
    log_entry = {
        "input_text": input_text,
        "prompt": prompt,
        "output_text": output_text,
        "extracted_arguments": extracted_arguments
    }
    with open(logs_file, 'a') as f:
        json.dump(log_entry, f, indent=2)
        f.write('\n')

def process_data(data_file, ontology_file, logs_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, newline='') as f:
        reader = csv.DictReader(f)
        data = list(reader)

    for entry in data:
        input_text = entry["Tweet"]
        event_name = entry["Event"]
        argument_roles = [role["argument_role"] for role in ontology if role["event_name"] == event_name]
        example_sentences = {role: entry["Trigger Word"].split('|') if entry["Trigger Word"] else [] for role in argument_roles}
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        event_trigger = extracted_arguments.pop("Event Trigger", None)
        
        log_results(logs_file, input_text, prompt, output_text, extracted_arguments)

def process_eval_data(data_file, ontology_file, output_file):
    ontology = load_ontology(ontology_file)
    
    with open(data_file, newline='') as f:
        reader = csv.DictReader(f)
        data = list(reader)

    results = []

    for entry in data:
        input_text = entry["Tweet"]
        event_name = entry["Event"]
        argument_roles = [role["argument_role"] for role in ontology if role["event_name"] == event_name]
        example_sentences = {role: entry["Trigger Word"].split('|') if entry["Trigger Word"] else [] for role in argument_roles}
        
        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
        output_text = call_chatgpt(prompt)
        extracted_arguments = argument_extractor(output_text, argument_roles)
        event_trigger = extracted_arguments.pop("Event Trigger", None)
        
        result = {
            "input_text": input_text,
            "prompt": prompt,
            "output_text": output_text,
            "extracted_arguments": extracted_arguments
        }
        results.append(result)
    
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)

# Example usage for in_context.csv:
ontology_file = "ontology.json"
logs_file = "logs2.json"
in_context_file = "in_context.csv"
process_data(in_context_file, ontology_file, logs_file)

# Use the best prompt on eval_data.csv to create pred.json
eval_data_file = "eval_data.csv"
pred_output_file = "pred2.json"
process_eval_data(eval_data_file, ontology_file, pred_output_file)


In [130]:
def generate_prompt_2(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. However, make sure to only use lowercase letters for the roles, "
        f"and if any role is a duplicate, provide both instances but only if they are different in wording.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence. Note that some roles might not exist and some might be wrongly labeled:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role.lower()} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case, but roles in lowercase. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "role1: <extracted text>\n"
        "role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [131]:
def generate_prompt_3(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Note that you should ignore the examples if they conflict with your understanding.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference, but ignore it if you think it conflicts with your understanding:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [132]:
def generate_prompt_4(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Remember to consider all details, no matter how redundant they may seem.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence. Some information might be repeated or redundant:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference, which might contain redundant information:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [133]:
def generate_prompt_5(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Use bullet points, numbers, and roman numerals interchangeably.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for i, role in enumerate(argument_roles):
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        if i % 3 == 0:
            prompt += f"- {role} (Examples: {example_list})\n"
        elif i % 3 == 1:
            prompt += f"{i + 1}. {role} (Examples: {example_list})\n"
        else:
            prompt += f"{i + 1}. {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Format your response using bullet points, numbers, and roman numerals interchangeably as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "1. Role1: <extracted text>\n"
        "ii. Role2: <extracted text>\n"
        "- Role3: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [134]:
def generate_prompt_6(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Some roles might be related to different events.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Note that some roles might be related to different events. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Here is the ontology for reference, which might contain roles from different events:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [142]:
def generate_prompt_7(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Use case-insensitive extraction but maintain the original case. Also, provide a summary of the sentence.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list}, irrelevant example)\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Use case-insensitive extraction but maintain the original case. Provide a summary of the sentence as well. "
        "Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text>\n"
        "Role2: <extracted text>\n"
        "...\n"
        "Summary: <summary>\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [143]:
def generate_prompt_8(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Use numbers and words interchangeably for roles, and mix roles randomly.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence. Ensure to include any additional details you find important:\n"
    )

    for i, role in enumerate(argument_roles):
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        if i % 2 == 0:
            prompt += f"- {role} (Examples: {example_list})\n"
        else:
            prompt += f"- Role {i + 1} (Examples: {example_list}, unrelated example)\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Use numbers and words interchangeably for roles and mix them randomly. Include any additional details you find important. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role 1: <extracted text>\n"
        "Role2: <unrelated text>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [137]:
def generate_prompt_9(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Note that some examples might be unrelated, and you should include all examples "
        f"even if they are contradictory.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence, and ensure to also extract any non-existing arguments:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list}; random example; unrelated example; contradictory example)\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Some examples might be unrelated, and you should also consider extracting non-existing arguments. "
        "Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text or 'not present'>\n"
        "Role2: <extracted text or 'not present'>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [144]:
def generate_prompt_10(input_text, event_name, argument_roles, example_sentences, ontology):
    """
    Generate a prompt for ChatGPT to extract arguments from the input text, including the entire ontology.
    """
    prompt = (
        f"You are an advanced information extraction system. Your task is to identify and extract specific arguments "
        f"related to a specified event in a given sentence. The event, trigger word, and argument roles are provided below, along with examples "
        f"to guide you in accurately extracting the relevant information. Note that some roles may have dual meanings, and you should account for both. "
        f"However, ensure you avoid any ambiguity in your responses. Ensure the dual meanings are both considered but not in a way that confuses the output.\n\n"
        f"Event: '{event_name}'\n"
        f"Sentence: '{input_text}'\n\n"
        f"Please extract the event trigger and the following arguments from the sentence, but ensure no event trigger is extracted if none exists:\n"
    )

    for role in argument_roles:
        examples = example_sentences.get(role, [])
        example_list = '; '.join(examples)
        prompt += f"- {role} (Examples: {example_list})\n"

    prompt += (
        "\nYour response should list the event trigger followed by each argument role and the exact extracted text from the sentence, "
        "maintaining the original wording and case. Some roles may have dual meanings, and you should account for both, but without causing confusion. "
        "If a role is not present, indicate this explicitly. Format your response as shown below:\n"
        "\nEvent Trigger: <event trigger>\n"
        "Role1: <extracted text or 'not present'>\n"
        "Role2: <extracted text or 'not present'>\n"
        "...\n"
        "Here is the ontology for reference:\n"
    )

    # Adding the entire ontology as context
    ontology_context = json.dumps(ontology, indent=2)
    prompt += f"\nOntology:\n{ontology_context}\n"

    return prompt


In [146]:

def load_ontology(file_name):
    """
    Load the ontology from a JSON file.
    """
    with open(file_name, 'r') as f:
        ontology = json.load(f)
    return ontology

def merge_example_sentences(event_name, argument_roles, example_sentences, ontology):
    """
    Merge example sentences from the ontology with the input example sentences.
    """
    for role in argument_roles:
        for entry in ontology:
            if entry["event_name"] == event_name and entry["argument_role"] == role:
                ontology_examples = entry.get("example_sentence", [])
                if role in example_sentences:
                    example_sentences[role].extend(ontology_examples)
                else:
                    example_sentences[role] = ontology_examples
    return example_sentences


def call_chatgpt(prompt):
    """
    Call ChatGPT API with the generated prompt and return the response.
    """
    response = openai.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].text.strip()

def argument_extractor(chatgpt_output, argument_roles):
    """
    Extract arguments corresponding to the argument roles from the chatgpt_output.
    """
    arguments = {}
    for role in argument_roles:
        pattern = rf"\b{role}:\s*([^\n]+)"
        match = re.search(pattern, chatgpt_output, re.IGNORECASE)
        if match:
            arguments[role] = match.group(1).strip()
        else:
            arguments[role] = None
    return arguments

def log_results(logs_file, input_text, prompt, output_text, extracted_arguments, expected_arguments, event_name, event_trigger):
    """
    Log the input text, prompt, output text, and extracted arguments into break-gpt.json.
    """
    log_entry = {
        "input_text": input_text,
        "event_name": event_name,
        "event_trigger": event_trigger,
        "prompt": prompt,
        "output_text": output_text,
        "extracted_arguments": extracted_arguments,
        "expected_arguments": expected_arguments
    }
    with open(logs_file, 'a') as f:
        json.dump(log_entry, f, indent=2)
        f.write('\n')

def process_data_with_variants(data_file, ontology_file, logs_file):
    ontology = load_ontology(ontology_file)
    prompt_variants = [generate_prompt_1, generate_prompt_2, generate_prompt_3, generate_prompt_4, generate_prompt_5, generate_prompt_6, generate_prompt_7, generate_prompt_8, generate_prompt_9, generate_prompt_10]

    with open(data_file, 'r') as f:
        data = json.load(f)

    for entry in data:
        input_text = entry["input_text"]
        event_name = entry["event_name"]
        event_trigger = entry["event_trigger"]
        argument_roles = entry["arguments"].keys()
        example_sentences = entry.get("example_sentence", {})
        expected_arguments = entry["arguments"]

        example_sentences = merge_example_sentences(event_name, argument_roles, example_sentences, ontology)
        
        for generate_prompt in prompt_variants:
            prompt = generate_prompt(input_text, event_name, argument_roles, example_sentences, ontology)
            output_text = call_chatgpt(prompt)
            extracted_arguments = argument_extractor(output_text, argument_roles)
            
            if extracted_arguments != expected_arguments:
                log_results(logs_file, input_text, prompt, output_text, extracted_arguments, expected_arguments, event_name, event_trigger)

# Example usage for in_context-annotated.json:
ontology_file = "ontology.json"
logs_file = "break-gpt.json"
process_data_with_variants("in_context-annotated.json", ontology_file, logs_file)
