In [1]:
import pandas as pd
import openai
import random
import ast
import os
import re
safe_globals = {"__builtins__": {}}
import requests
import json
import anthropic

In [2]:
client = anthropic.Anthropic(api_key="")

In [3]:
data = pd.read_csv("../../Data/commonsense_qa.csv")
result = {}
error_list = []
user_temperature = 0.1

In [4]:
answer_options_formatted = []
for i in data['choices']:
    input_cleaned = re.sub(r"array\(\s*\[(.*?)\]\s*,\s*dtype=object\)", r"[\1]", i, flags=re.DOTALL)
    try:
        extracted_dict = eval(input_cleaned, safe_globals)
        labels = extracted_dict.get('label', [])
        texts = extracted_dict.get('text', [])
        if len(labels) == len(texts):
            formatted_output = '\n'.join([f"{label}: {text}" for label, text in zip(labels, texts)])
            answer_options_formatted.append(formatted_output)
        else:
            print(f"Skipping row due to mismatched lengths or missing data: {input_cleaned}")
    
    except (SyntaxError, NameError, TypeError) as e:
        print(f"Error parsing row: {input_cleaned}\nError details: {e}")

In [5]:
data['answer_options_formatted'] = answer_options_formatted

In [6]:
data['answer'] = data['answerKey'].replace({'A': 0, 'B': 1, 'C': 2, 'D': 3})

In [7]:
choice_list = list()
for i in data['answer_options_formatted']:
    output_list = [line.split(': ', 1)[1] for line in i.splitlines()]
    choice_list.append(output_list)

In [8]:
data['choices'] = choice_list

In [9]:
l = data['choices'].tolist()
data['choices_copy'] = l

In [10]:
def mask_choice(choice_list, correct_index):
    choice_list_copy = choice_list[:]
    masked_options = []
    incorrect_indices = [i for i in range(len(choice_list_copy)) if i != correct_index]
    
    if incorrect_indices:
        mask_index_1 = random.choice(incorrect_indices)
        masked_options.append(choice_list_copy[mask_index_1])  
        choice_list_copy[mask_index_1] = ""  
        incorrect_indices.remove(mask_index_1)  
    
    return choice_list_copy, masked_options

In [11]:
data[['choices_copy', 'masked_options']] = data.apply(
    lambda row: pd.Series(mask_choice(row['choices_copy'], row['answer'])), axis=1
)

In [12]:
def format_question_answer(row):
    question = f"Question: {row['question']}\nAnswer:\n"
    
    choices = row['choices_copy']
    options_text = [
        f"A: {str(choices[0]) if len(choices) > 0 else ''}",
        f"B: {str(choices[1]) if len(choices) > 1 else ''}",
        f"C: {str(choices[2]) if len(choices) > 2 else ''}",
        f"D: {str(choices[3]) if len(choices) > 3 else ''}",
        f"E: {str(choices[4]) if len(choices) > 4 else ''}"
    ]
    answer_options = "\n".join(options_text)
    
    return question + answer_options

In [13]:
data['question_answer'] = data.apply(format_question_answer, axis=1)

In [14]:
data.question_answer.values[0]

"Question: Miranda wasn't sure about what she was doing, she just knew that she couldn't stop moving her smelly feet. This was a problem, because she was told to do what?\nAnswer:\nA: shoes\nB: stay still\nC: hands\nD: \nE: stink"

In [15]:
for index, row in data[:].iterrows():
    prompt = f"Complete the incomplete option in the answer to the question in one word: [{row['question_answer']}]. \nReply the answer only, the option without the full sentence."
    response = client.messages.create(
        model="claude-3-5-sonnet-20241022", 
        max_tokens=128,                    
        messages=[
            {"role": "user", "content": prompt}  
        ]
    )
    response_text = ''.join([block.text for block in response.content])

    result[row['question']] = response_text

In [16]:
acc_icl = 0
for index, row in data.iterrows():
    if row['masked_options'] and row['masked_options'][0].lower() == result[row['question']].lower():
        print(f"Correct: {row['masked_options'][0].lower()}, Masked option: {result[row['question']].lower()}")
        acc_icl += 1

Correct: learn, Masked option: learn
Correct: maryland, Masked option: maryland
Correct: michigan, Masked option: michigan
Correct: painting, Masked option: painting


In [17]:
print(f"The accuracy of Claude 3.5 sonnet to predict the missing option is: {round(acc_icl*100.0/len(result),2)}%")

The accuracy of Claude 3.5 sonnet to predict the missing option is: 4.0%
