# Update Shuffled Answer for Science MCQ (5 options)


In [4]:
import pandas as pd

def extract_options(prompt_text):
    """Extract options from the prompt text as a dictionary."""
    lines = prompt_text.split('\n')
    options = {}
    
    for line in lines:
        line = line.strip()
        # Modified to handle any single letter followed by )
        if len(line) > 2 and line[1] == ')' and line[0].isalpha():
            option_letter = line[0]
            option_text = line[2:].strip()  # Skip the letter and ) and trim whitespace
            options[option_letter] = option_text
            
    return options

def find_correct_answer_letter(original_options, shuffled_options, original_answer_letter):
    """Find the new letter for the correct answer in the shuffled options."""
    try:
        # Get the correct answer text from original options
        original_answer_text = original_options[original_answer_letter]
        
        # Find which letter in shuffled options has this same text
        for letter, text in shuffled_options.items():
            if text == original_answer_text:
                return letter
                
        print(f"Warning: Could not find matching text for answer '{original_answer_text}'")
        return None
        
    except KeyError:
        print(f"Warning: Answer letter '{original_answer_letter}' not found in options")
        return None

def process_mcq_datasets(original_df, shuffled_df):
    """Process both datasets and return list of new correct answer letters."""
    new_answers = []
    errors = []
    
    for i in range(len(original_df)):
        try:
            # Get original and shuffled prompts for this question
            original_prompt = original_df.iloc[i]['prompt']
            shuffled_prompt = shuffled_df.iloc[i]['prompt']
            original_answer = original_df.iloc[i]['answer']
            
            # Extract options from both prompts
            original_options = extract_options(original_prompt)
            shuffled_options = extract_options(shuffled_prompt)
            
            # Find new correct answer letter
            new_answer = find_correct_answer_letter(original_options, shuffled_options, original_answer)
            
            if new_answer is None:
                errors.append(i)
                new_answer = original_answer  # Keep original answer if mapping fails
                
            new_answers.append(new_answer)
            
        except Exception as e:
            print(f"Error processing question {i+1}: {str(e)}")
            errors.append(i)
            new_answers.append(None)
    
    if errors:
        print(f"\nWarning: Had issues processing {len(errors)} questions")
        print(f"First few problematic question indices: {errors[:5]}")
    
    return new_answers

# Read the actual CSV files
try:
    original_df = pd.read_csv("../../Original_Datasets/ScienceMCQ_5000_sample.csv")
    shuffled_df = pd.read_csv("../../Shuffled_Datasets/SHUFFLED_ScienceMCQ_5000_sample.csv")
    
    # Process the datasets
    corrected_answers = process_mcq_datasets(original_df, shuffled_df)
    
    # Print first few entries as a sanity check
    print("\nFirst 10 answer corrections:")
    print("Question | Original Answer -> New Answer")
    print("-" * 40)
    for i, (orig, new) in enumerate(zip(original_df['answer'][:10], corrected_answers[:10]), 1):
        print(f"Q{i:2d}     | {orig:14s} -> {new}")
    
    print(f"\nTotal questions processed: {len(corrected_answers)}")
    
    # Save problematic questions for review
    problem_questions = [
        (i, original_df.iloc[i]['prompt'], original_df.iloc[i]['answer'], corrected_answers[i])
        for i in range(len(corrected_answers))
        if corrected_answers[i] != original_df.iloc[i]['answer']
    ]
    
    print(f"\nNumber of answers that changed: {len(problem_questions)}")
    
except FileNotFoundError:
    print("Error: One or both CSV files not found. Please check the file paths.")
except Exception as e:
    print(f"An error occurred: {str(e)}")


First 10 answer corrections:
Question | Original Answer -> New Answer
----------------------------------------
Q 1     | D              -> B
Q 2     | A              -> A
Q 3     | C              -> D
Q 4     | D              -> C
Q 5     | A              -> A
Q 6     | D              -> E
Q 7     | C              -> D
Q 8     | A              -> B
Q 9     | E              -> E
Q10     | A              -> A

Total questions processed: 5000

Number of answers that changed: 4046


In [5]:
# Read the shuffled dataset
shuffled_df = pd.read_csv("../../Shuffled_Datasets/SHUFFLED_ScienceMCQ_5000_sample.csv")

# Update the answer column with corrected answers
shuffled_df['answer'] = corrected_answers

# Save the updated dataset
shuffled_df.to_csv("../../Shuffled_Datasets/SHUFFLED_ScienceMCQ_5000_sample.csv", index=False)

print("Updated shuffled dataset with corrected answers!")

# Print first few rows as a sanity check
print("\nFirst 5 rows of updated dataset:")
print(shuffled_df[['prompt', 'answer']].head())

Updated shuffled dataset with corrected answers!

First 5 rows of updated dataset:
                                              prompt answer
0  Question: Which of the following statements ac...      B
1  Question: Which of the following is an accurat...      A
2  Question: What is the significance of regulari...      D
3  Question: Which of the following statements ac...      C
4  Question: Which of the following statements ac...      A


In [10]:
import json

# Read the JSON file
with open('gpt_logprob_Science_position.json', 'r') as file:
    logprob_data = json.load(file)

# Update each item with the correct field name
for i, item in enumerate(logprob_data):
    # The field name changes for each question (correct_answer_1, correct_answer_2, etc.)
    answer_field = f"correct_answer_{i+1}"
    if answer_field in item and i < len(corrected_answers):
        item[answer_field] = corrected_answers[i]

# Save the updated JSON
with open('gpt_logprob_Science_position.json', 'w') as file:
    json.dump(logprob_data, file, indent=4)

# Verify the update
print("Verifying first 5 updates:")
for i in range(5):
    if i < len(logprob_data):
        answer_field = f"correct_answer_{i+1}"
        print(f"Question {i+1}: {answer_field} = {logprob_data[i][answer_field]}")

Verifying first 5 updates:
Question 1: correct_answer_1 = B
Question 2: correct_answer_2 = A
Question 3: correct_answer_3 = D
Question 4: correct_answer_4 = C
Question 5: correct_answer_5 = A


# Update Shuffled Answer for Med/MMLU MCQ (4 options)

In [14]:
import pandas as pd

def extract_options(prompt_text):
    """Extract options A through D from the prompt text as a dictionary."""
    lines = prompt_text.split('\n')
    options = {}
    valid_options = {'A', 'B', 'C', 'D'}
    
    for line in lines:
        line = line.strip()
        if len(line) > 2 and line[1] == ')' and line[0] in valid_options:
            option_letter = line[0]
            option_text = line[2:].strip()  # Skip the letter and ) and trim whitespace
            options[option_letter] = option_text
            
    # Validate that we have all options A-D
    if not all(opt in options for opt in valid_options):
        missing = valid_options - set(options.keys())
        raise ValueError(f"Missing options: {missing}")
            
    return options

def find_correct_answer_letter(original_options, shuffled_options, original_answer_letter):
    """Find the new letter for the correct answer in the shuffled options."""
    if original_answer_letter not in {'A', 'B', 'C', 'D'}:
        raise ValueError(f"Invalid answer letter: {original_answer_letter}")
        
    try:
        # Get the correct answer text from original options
        original_answer_text = original_options[original_answer_letter]
        
        # Find which letter in shuffled options has this same text
        for letter, text in shuffled_options.items():
            if text == original_answer_text:
                return letter
                
        raise ValueError(f"Could not find matching text for answer '{original_answer_text}'")
        
    except KeyError:
        raise ValueError(f"Answer letter '{original_answer_letter}' not found in options")

def process_mcq_datasets(original_df, shuffled_df):
    """Process both datasets and return list of new correct answer letters."""
    new_answers = []
    errors = []
    
    for i in range(len(original_df)):
        try:
            # Get original and shuffled prompts for this question
            original_prompt = original_df.iloc[i]['prompt']
            shuffled_prompt = shuffled_df.iloc[i]['prompt']
            original_answer = original_df.iloc[i]['answer']
            
            # Validate the original answer
            if original_answer not in {'A', 'B', 'C', 'D'}:
                raise ValueError(f"Invalid original answer: {original_answer}")
            
            # Extract options from both prompts
            original_options = extract_options(original_prompt)
            shuffled_options = extract_options(shuffled_prompt)
            
            # Find new correct answer letter
            new_answer = find_correct_answer_letter(original_options, shuffled_options, original_answer)
            new_answers.append(new_answer)
            
        except Exception as e:
            print(f"Error processing question {i+1}: {str(e)}")
            errors.append(i)
            new_answers.append(original_df.iloc[i]['answer'])  # Keep original answer if processing fails
    
    if errors:
        print(f"\nWarning: Had issues processing {len(errors)} questions")
        print(f"First few problematic question indices: {errors[:5]}")
    
    return new_answers

# Read the actual CSV files
try:
    original_df = pd.read_csv("../../Original_Datasets/MMLU_5000_sample.csv")
    shuffled_df = pd.read_csv("../../Shuffled_Datasets/SHUFFLED_MMLU_5000_sample.csv")
    
    # Process the datasets
    corrected_answers = process_mcq_datasets(original_df, shuffled_df)
    
    # Print first few entries as a sanity check
    print("\nFirst 10 answer corrections:")
    print("Question | Original Answer -> New Answer")
    print("-" * 40)
    for i, (orig, new) in enumerate(zip(original_df['answer'][:10], corrected_answers[:10]), 1):
        print(f"Q{i:2d}     | {orig:14s} -> {new}")
    
    print(f"\nTotal questions processed: {len(corrected_answers)}")
    
    # Save problematic questions for review
    problem_questions = [
        (i, original_df.iloc[i]['prompt'], original_df.iloc[i]['answer'], corrected_answers[i])
        for i in range(len(corrected_answers))
        if corrected_answers[i] != original_df.iloc[i]['answer']
    ]
    
    print(f"\nNumber of answers that changed: {len(problem_questions)}")
    
except FileNotFoundError:
    print("Error: One or both CSV files not found. Please check the file paths.")
except Exception as e:
    print(f"An error occurred: {str(e)}")


First 10 answer corrections:
Question | Original Answer -> New Answer
----------------------------------------
Q 1     | C              -> A
Q 2     | B              -> B
Q 3     | D              -> A
Q 4     | C              -> C
Q 5     | B              -> C
Q 6     | C              -> B
Q 7     | C              -> B
Q 8     | A              -> A
Q 9     | A              -> D
Q10     | B              -> A

Total questions processed: 5000

Number of answers that changed: 3727


In [15]:
# Read the shuffled dataset
shuffled_df = pd.read_csv("../../Shuffled_Datasets/SHUFFLED_MMLU_5000_sample.csv")

# Update the answer column with corrected answers
shuffled_df['answer'] = corrected_answers

# Save the updated dataset
shuffled_df.to_csv("../../Shuffled_Datasets/SHUFFLED_MMLU_5000_sample.csv", index=False)

print("Updated shuffled dataset with corrected answers!")

# Print first few rows as a sanity check
print("\nFirst 5 rows of updated dataset:")
print(shuffled_df[['prompt', 'answer']].head())

Updated shuffled dataset with corrected answers!

First 5 rows of updated dataset:
                                              prompt answer
0  Question: An important source of information o...      A
1  Question: In preparation for a writing unit on...      B
2  Question: Paper will burn at approximately wha...      A
3  Question: The Apple iMac computer is available...      C
4  Question: What were the first names of the ear...      C


In [16]:
import json

# Read the JSON file
with open('gpt_logprob_MMLU_position.json', 'r') as file:
    logprob_data = json.load(file)

# Update each item with the correct field name
for i, item in enumerate(logprob_data):
    # The field name changes for each question (correct_answer_1, correct_answer_2, etc.)
    answer_field = f"correct_answer_{i+1}"
    if answer_field in item and i < len(corrected_answers):
        item[answer_field] = corrected_answers[i]

# Save the updated JSON
with open('gpt_logprob_MMLU_position.json', 'w') as file:
    json.dump(logprob_data, file, indent=4)

# Verify the update
print("Verifying first 5 updates:")
for i in range(5):
    if i < len(logprob_data):
        answer_field = f"correct_answer_{i+1}"
        print(f"Question {i+1}: {answer_field} = {logprob_data[i][answer_field]}")

Verifying first 5 updates:
Question 1: correct_answer_1 = A
Question 2: correct_answer_2 = B
Question 3: correct_answer_3 = A
Question 4: correct_answer_4 = C
Question 5: correct_answer_5 = C
