In [1]:
from sentence_transformers import SentenceTransformer, util
import os
import json

# Load a pre-trained model for computing embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

def evaluate_responses_with_similarity(data):
    """
    Evaluates responses in a JSON dataset for semantic similarity with gold answers.
    Returns the average similarity score for the file.
    """
    total_similarity = 0
    total_entries = len(data)

    for entry in data:
        response = entry["response"].strip()
        gold_answers = [answer.strip() for answer in entry["gold_answer"]]
        
        # Compute embeddings
        response_embedding = model.encode(response, convert_to_tensor=True)
        gold_embeddings = model.encode(gold_answers, convert_to_tensor=True)

        # Calculate the highest similarity with any gold answer
        similarity_scores = util.cos_sim(response_embedding, gold_embeddings)
        max_similarity = similarity_scores.max().item()
        total_similarity += max_similarity

    # Average similarity across all entries
    average_similarity = total_similarity / total_entries if total_entries > 0 else 0
    return average_similarity, total_entries

def process_folder_with_similarity(folder_path: str, output_file: str):
    """
    Processes all JSON files in a folder and evaluates semantic similarity.
    Writes the results to an output file.
    """
    file_scores = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".json"):
            file_path = os.path.join(folder_path, file_name)
            print(f"Processing file: {file_name}")

            # Load the JSON data
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError:
                    print(f"Error reading JSON from {file_name}. Skipping.")
                    continue

            # Evaluate similarity
            average_similarity, total_entries = evaluate_responses_with_similarity(data)

            # Record the score for this file
            file_scores.append({
                "file_name": file_name,
                "average_similarity": average_similarity,
                "total_entries": total_entries
            })

    # Write the scores to the output file
    with open(output_file, "w") as f:
        json.dump(file_scores, f, indent=4)

    print(f"Similarity scores saved to {output_file}")

  from tqdm.autonotebook import tqdm, trange


In [2]:
# Parameters
folder_path = "../Responses"  # Replace with the path to your folder
output_file = "similarity_scores.json"  # Replace with your desired output file name

# Process the folder and calculate similarity scores
process_folder_with_similarity(folder_path, output_file)

Processing file: open_source_1_2_top_100_response.json
Processing file: tf_idf_bm25_open_1_1_top_100_combined_response.json
Processing file: tf_idf_bm25_open_1_1_top_100_combined_both_response.json
Processing file: tf-idf_1_2_top_100_modified_response.json
Processing file: vision_1_1_top_100_modified_response.json
Processing file: bm25_1_2_top_100_modified_response.json
Processing file: BOW_1_1_top_100_response.json
Processing file: BOW_1_2_top_100_response.json
Processing file: bm25_1_1_top_100_response.json
Processing file: BOW_1_0_top_100_modified_response.json
Processing file: ZeroShot_response.json
Processing file: open_source_1_0_top_100_response.json
Processing file: tf_idf_bm25_open_1_2_top_100_combined_response.json
Processing file: bm25_1_0_top_100_response.json
Processing file: tf_idf_bm25_open_1_1_top_100_combined_modified_response.json
Processing file: LlamaAgent_response.json
Processing file: bm25_1_2_top_100_response.json
Processing file: reranked_best_answers_1_1.json
P