In [60]:
from openai import OpenAI

client = OpenAI(api_key="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")

def complete_masked_question(masked_question: str, mask_token: str = "()", num_completions: int = 5) -> list:
    """
    Complete a masked question using OpenAI's API to generate likely completions.
    
    Args:
        masked_question (str): Question with masked words (e.g., "What is the () of this ()?")
        api_key (str): OpenAI API key
        mask_token (str): Token used to indicate masked words
        num_completions (int): Number of different completions to generate
    Returns:
        list: List of completed questions
    """
    
    
    # Count number of masks to help with prompt engineering
    mask_count = masked_question.count(mask_token)
    
    # Create a prompt that encourages filling in the masks
    system_prompt = "You are a helpful assistant that completes masked words in questions. Provide natural and contextually appropriate question."
    user_prompt = f"""
    Complete the following question by replacing {mask_count} masked word(s) marked with {mask_token}.
    Question: {masked_question}
    """

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.2,
        n=num_completions
    )

    completions = [
        choice.message.content
        for choice in response.choices
    ]
    
    return completions
    
masked_question = "() () () () () () () watermelon seeds ()?"

completions = complete_masked_question(masked_question=masked_question)

print(completions)


['Question: How many black watermelon seeds are there?', 'Question: How many black watermelon seeds are there?', 'Question: How many black watermelon seeds are there?', 'Question: How many grams of watermelon seeds are in a typical watermelon?', 'Question: How many black watermelon seeds are there?']


In [61]:
masked_question = "() () () () () () () () meal () () swimming ()?"

completions = complete_masked_question(masked_question=masked_question)


In [62]:
print(completions)

['Question: Did you enjoy your delicious meal before going swimming?', 'Question: Have you ever enjoyed a delicious homemade meal before going swimming?', 'Question: Did you enjoy your delicious meal before going swimming?', 'Question: Did you enjoy your delicious meal before going swimming?', 'Question: Did you enjoy your delicious meal before going swimming?']


In [63]:
import pandas as pd
from fuzzywuzzy import fuzz 


def calculate_accuracy(data_path: str, similarity_threshold: int = 80):
    # Load the data
    df = pd.read_csv(data_path)
    
    # Initialize counters and new DataFrame list
    total_questions = len(df)
    accurate_count = 0
    results = []
    
    # Process each record
    for index, row in df.iterrows():
        if index >= 100: break
        original_question = row['original_question']
        masked_question = row['masked_question']
        
        # Get completions for the masked question
        completions = complete_masked_question(masked_question)
        
        # Initialize variables for the best match
        best_match = None
        best_score = 0
        
        # Compare each completion with the original question
        for completion in completions:
            score = fuzz.ratio(original_question, completion)
            if score > best_score:
                best_score = score
                best_match = completion
        
        # Determine if the match meets the threshold
        matched = best_score >= similarity_threshold
        if matched:
            accurate_count += 1
        
        # Append the record to the results
        results.append({
            'original_question': original_question,
            'masked_question': masked_question,
            'generated_question': best_match,
            'matched': matched,
            'similarity_score': best_score
        })
    
    # Calculate accuracy
    accuracy_percentage = (accurate_count / 100) * 100
    print(f"Accuracy: {accuracy_percentage}%")
    
    # Create a new DataFrame from results
    results_df = pd.DataFrame(results)
    
    # Return the results DataFrame
    return results_df

In [64]:
data_path = "Data/masked_TruthfulQA_2.csv"
processed_df = calculate_accuracy(data_path)

Accuracy: 51.0%


In [65]:
processed_df.to_csv("Data/accuracy_2.csv", index=False)
print("\nData saved to 'accuracy.csv'")


Data saved to 'accuracy.csv'
