In [1]:
import pandas as pd
import re

# Import CSV with LIME explanations

In [2]:
# import file
filename = 'LIME_Explanations_UserStudy.csv'
df = pd.read_csv(filename)

# Fromat the rationales

In [3]:
import ast
# Function to evaluate and format the rationales
def safe_format_rationales(rationales_str):
    try:
        # Safely evaluate the string to convert it back to a list of tuples
        rationales = ast.literal_eval(rationales_str)
        # Format each tuple and round the value to 3 decimal places
        formatted = [f"'{key}': {value:.3f}" for key, value in rationales]
        return ", ".join(formatted)
    except (ValueError, SyntaxError):
        # In case of an error during evaluation, return a default message
        return "Could not parse format"

# Apply the function to the 'politics_rationales' column
df['formatted_politics_rationales'] = df['politics_rationales'].apply(safe_format_rationales)
df['formatted_science_rationales'] = df['science_rationales'].apply(safe_format_rationales)
df['formatted_leisure_rationales'] = df['leisure_rationales'].apply(safe_format_rationales)


# Create prompts

In [4]:
# Create a new column 'prompt' with the desired information
df['prompt'] = df.apply(lambda row: f"Input: {row['text']}\n\nOutput: {row['pred']}\nConfidence: {row['pred_prob']}\nOutput rationale: {row['formatted_'+ str(row['pred']) +'_rationales']}.", axis=1) 

df['prompt_no_rationale'] = df.apply(lambda row: f"Input: {row['text']}\n\nOutput: {row['pred']}\nConfidence: {row['pred_prob']}", axis=1) 


# Add column with words from rationale

In [5]:
# extract words in rationale for evaluation
df['pred_rationales_words'] = df.apply(lambda row: f"{row['formatted_'+ str(row['pred']) +'_rationales']}.", axis=1) 

# Function to extract keys (words) using regular expressions
def extract_words(rationale_str):
    # Use regular expression to find all words (keys)
    words = re.findall(r"'([^']+)'", rationale_str)
    return words

# Apply the function
df['pred_rationales_words'] = df['pred_rationales_words'].apply(extract_words)

# Drop some columns

In [6]:
# Remove redundant columns
df = df.drop(['politics_rationales', 'science_rationales', 'leisure_rationales'], axis=1)

In [7]:
df.to_csv('prompts_user_study.csv', index=False)