In [1]:
import pandas as pd
import openai
import random
import os

In [2]:
openai.api_key = ""

In [3]:
data = pd.read_csv("custom_mmlu.csv")
result = {}
error_list = []
user_temperature = 0.1

In [4]:
data['answer_option'] = data['answer'].replace({0: 'A', 1: 'B', 2: 'C', 3:'D'})

In [5]:
def parse_choices(choices_string):
    choices_list = choices_string.strip("[]").split(" '")
    choices_list = [choice.replace("'", "").strip() for choice in choices_list]
    return choices_list

In [6]:
data['choices'] = data['choices'].apply(parse_choices)

In [7]:
l = data['choices'].tolist()
data['choices_copy'] = l

In [8]:
data.choices.values[0]

['spot desmosome', 'belt desmosome', 'gap junction', 'tight junction']

In [9]:
def mask_choice(choice_list, correct_index):
    choice_list_copy = choice_list[:]
    masked_options = []
    incorrect_indices = [i for i in range(len(choice_list_copy)) if i != correct_index]
    
    if incorrect_indices:
        mask_index_1 = random.choice(incorrect_indices)
        masked_options.append(choice_list_copy[mask_index_1])  
        choice_list_copy[mask_index_1] = ""  
        incorrect_indices.remove(mask_index_1)  
    
    return choice_list_copy, masked_options

In [10]:
# Apply the function to each row and store results in choices_copy without affecting choices
data[['choices_copy', 'masked_options']] = data.apply(
    lambda row: pd.Series(mask_choice(row['choices_copy'], row['answer'])), axis=1
)

In [11]:
def format_question_answer(row):
    question = f"Question: {row['question']}\nAnswer:\n"
    
    choices = row['choices_copy']
    options_text = [
        f"A: {str(choices[0]) if len(choices) > 0 else ''}",
        f"B: {str(choices[1]) if len(choices) > 1 else ''}",
        f"C: {str(choices[2]) if len(choices) > 2 else ''}",
        f"D: {str(choices[3]) if len(choices) > 3 else ''}"
    ]
    answer_options = "\n".join(options_text)
    
    return question + answer_options

In [12]:
data['question_answer'] = data.apply(format_question_answer, axis=1)

In [13]:
for index, row in data[:].iterrows():
    prompt = f"Complete the incomplete option in the answer to the question in one word: [{row['question_answer']}]. \nReply the answer only, the option without the full sentence."
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content":prompt}
        ],
        temperature = user_temperature,
        max_tokens = 128
    )
    response = completion["choices"][0]["message"]["content"].replace('\n', ' ')
    result[row['question']] = response

In [14]:
acc_icl = 0
for index, row in data.iterrows():
    if row['masked_options'] and row['masked_options'][0].lower() == result[row['question']].lower():
        print(f"Correct: {row['masked_options'][0].lower()}, Masked option: {result[row['question']].lower()}")
        acc_icl += 1

Correct: india, Masked option: india


In [15]:
print(f"The accuracy of GPT-3.5 turbo to predict the missing option is: {round(acc_icl*100.0/len(result),2)}%")

The accuracy of GPT-3.5 turbo to predict the missing option is: 1.0%
