# Generate prompts from raw stimuli and instructions for different measurements
Author: Hening Wang
Date last modified: 30.08.2023

TODO: Add some descriptions about the document


In [9]:
# Import relevant modules
import pandas as pd
import numpy as np


## Encoding and pre-processing relevant information from raw stimuli

TODO: Add descritions about the context of each files in raw_stimuli

In [10]:
# Reading the content of options into a DataFrame
content_options = pd.read_csv('raw_stimuli/MaximsOptions.txt', sep='\t', names=['item_number', 'option_number', 'option_text'])

# Create a list of lists of options for each item
options = []
for item_number in content_options.item_number.unique():
    options.append(content_options[content_options.item_number == item_number].option_text.tolist())

# Read key from the file
key_options = pd.read_csv('raw_stimuli/MaximsKeySimple.txt', sep='\t', names=["phenomenon",'item_number', "option_number",'content'])

# Find the key for each item
keys_items_pairs = []
for i, row in key_options.iterrows():
    if row.content == 'Correct':
        keys_items_pairs.append((row.item_number, row.option_number))

# Arrange the keys_items_pairs in the upscent order of item_number
keys_items_pairs.sort(key=lambda x: x[0])

# Reading the scenario content from the file
scenarios = pd.read_csv('raw_stimuli/MaximsScenarios.txt', sep='\t', names=['item_number', 'scenario'])


# Get true answer with key item pairs and option content
true_answer = []
for item_number, option_number in keys_items_pairs:
    matching_option = content_options[
        (content_options['item_number'] == item_number) & 
        (content_options['option_number'] == option_number)
    ]['option_text']
    true_answer.extend(matching_option.tolist())

print(true_answer)

['He disliked the talk that his boss gave but does not want to criticize his boss.  ', "Alex thinks that Richard's painting is mediocre.", 'She does not want to discuss the topic that Leslie has raised.', "He pretends that he did not understand his wife's comment because he wants to watch the game.", "Mark's opinion of Jane as a financial manager is very low.", "Lucy's mom does not know where exactly Vincent lives.", 'The food at the wedding was mediocre.', 'Mary is irritated with Ken.', 'Cindy does not know what type of shoes she wants to buy.', 'He does not want to talk about school with his mom.', "Mark's wife has little experience with cooking.", "Marcie thinks that her husband's expectations about breakfast are too high.", 'Paul thinks that the actress cannot sing.', 'Melissa thinks that statistical information is useless in this debate.', 'He thinks that Rosy has no reason to believe that the strangers are nice.', 'Freddie will never give Berta any money.', 'Katy doubts that Mr. 

## Generate prompts for Rating task

TODO: 
1. What is a rating task
2. We use three different prompts for rating task, plausible, possible, likely. 

In [15]:
# Read the instructions of rating
scales = ["plausible", "appropriate", "possible", "likely"]

for scale in scales:
    file_path_instructions_rating = f'prompt_rating/MaximsInstructions_Rating_{scale}.txt'
    with open(file_path_instructions_rating, 'r') as file:
        instructions_rating = file.read()

# Combine instructions, scenarios and true anwsers together
    new_prompts = []
    for i in range(len(true_answer)):
        new_prompts.append(instructions_rating + " Scenario: " + scenarios.scenario[i] + " Explanation: " + true_answer[i])
    print(new_prompts)
    # Create a new DataFrame for the new prompts for rating task
    dict_prompts_rating = {'item_number': scenarios.item_number,
                        'prompt': new_prompts,
                        'true_answer_key': [keys_items_pair[1] for keys_items_pair in keys_items_pairs],
                        'true_answer': true_answer,
                        'scenarios': scenarios.scenario,
                        'options': options}

    df_prompts_rating = pd.DataFrame(dict_prompts_rating)
    df_prompts_rating.to_csv(f"prompt_rating/Maxims_prompts_Rating_{scale}.csv", index=False)




['Task: You will read short stories that describe everyday situations. Each story will be followed by an explanation of why one character in that scenario responds in a certain way. Read each story and the associated explanation. Your task is to decide how plausible is that explanation. The degree varies from "very implausible", "implausible", "neutral", "plausible", "very plausible". Answer only with one degree. Scenario: Bob is having a lunch meeting with his boss, Mr. James. Mr. James asks Bob: "Did you like the presentation that I gave at the board meeting yesterday?" Bob responds: "I cannot wait for our trip to Japan next week." Why has Bob responded like this?\r Explanation: He disliked the talk that his boss gave but does not want to criticize his boss.  ', 'Task: You will read short stories that describe everyday situations. Each story will be followed by an explanation of why one character in that scenario responds in a certain way. Read each story and the associated explanati

## Generate prompts for forced choice task


In [33]:
# Read instruction for FC task by answering with content only 
with open('prompt_fc/MaximsInstructions_FC_Content.txt', 'r') as file:
    instructions_FC_Content = file.read()

# Read instruction for FC task by answering with both content and number 
with open('prompt_fc/MaximsInstructions_FC_Both.txt', 'r') as file:
    instructions_FC_Both = file.read()

# Combine instructions, scenarios and true anwsers together
    new_prompts_content = []
    new_prompts_both = []
    for i in range(len(true_answer)):
        concatenated_string = ' '.join([f"\"{option}\"" for option in options[i]])
        concatenated_string_indexed = ''.join([f"{index+1}. \"{string}\"" for index, string in enumerate(options[i])])
        new_prompts_content.append(instructions_FC_Content + " Scenario: " + scenarios.scenario[i] + " Options: " + concatenated_string)
        new_prompts_both.append(instructions_FC_Both + " Scenario: " + scenarios.scenario[i] + " Options: " + concatenated_string_indexed)
    print(new_prompts_content)
    print(new_prompts_both)

dict_prompts_FC_Content = {'item_number': scenarios.item_number,
                        'prompt': new_prompts_content,
                        'true_answer_key': [keys_items_pair[1] for keys_items_pair in keys_items_pairs],
                        'true_answer': true_answer,
                        'scenarios': scenarios.scenario,
                        'options': options}

    # Create a new DataFrame for the new prompts for rating task
dict_prompts_FC_Both = {'item_number': scenarios.item_number,
                        'prompt': new_prompts_both,
                        'true_answer_key': [keys_items_pair[1] for keys_items_pair in keys_items_pairs],
                        'true_answer': true_answer,
                        'scenarios': scenarios.scenario,
                        'options': options}

df_prompts_FC_Content = pd.DataFrame(dict_prompts_rating)
df_prompts_FC_Content.to_csv(f"prompt_fc/Maxims_prompts_FC_Content.csv", index=False)

df_prompts_FC_Both = pd.DataFrame(dict_prompts_rating)
df_prompts_FC_Both.to_csv(f"prompt_fc/Maxims_prompts_FC_Both.csv", index=False)

['Task: You will read short stories that describe everyday situations. Each story will be followed by a multiple-choice question. Read each story and choose the best answer. Your task is to decide why the character in the story responds in a certain way. The answer options are given after the stories. Provide your answer by indicating the option content only. Scenario: Bob is having a lunch meeting with his boss, Mr. James. Mr. James asks Bob: "Did you like the presentation that I gave at the board meeting yesterday?" Bob responds: "I cannot wait for our trip to Japan next week." Why has Bob responded like this?\r Options: "He is excited about the upcoming trip to Japan and does not want to talk about anything else." "He liked the talk that his boss gave but does not want to tell him about it." "He disliked the talk that his boss gave but does not want to criticize his boss.  " "He wants to avoid going to Japan next week and wants to discuss it with his boss. "', 'Task: You will read s

## Generate prompts for free production

In [5]:
# Read the instructions for free production
file_path_free = 'raw_stimuli/MaximsInstructions_Free.txt'

with open(file_path_free, 'r') as file:
    instructions_free = file.read()


# Combine instructions and scenarios together
new_prompts = []
for i in range(len(scenarios.scenario)):
    new_prompts.append(instructions_free + " " + "Scenario: " + scenarios.scenario[i])

dict_prompts_free = {'item_number': scenarios.item_number,
                       'prompt': new_prompts,
                       'true_answer_key': [keys_items_pair[1] for keys_items_pair in keys_items_pairs],
                       'true_answer': true_answer,
                       'scenarios': scenarios.scenario,
                       'options': options}

df_prompts_free = pd.DataFrame(dict_prompts_free)
df_prompts_free.to_csv("Maxims_prompts_Free.csv", index=False)

