In [2]:
import json
import numpy as np

def load_embeddings(file_path):
    """Load embeddings from a JSON file."""
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def save_data(data, file_path):
    """Save the data to a JSON file."""
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

def compute_cosine_similarity(vector_a, vector_b):
    """Compute cosine similarity between two vectors using OpenAI utility."""
    # Ensure vectors are np.arrays to match expected input format
    vector_a = np.array(vector_a).reshape(1, -1)
    vector_b = np.array(vector_b).reshape(1, -1)
    # Using OpenAI's cosine_similarity utility
    similarity = np.dot(vector_a, vector_b.T) / (np.linalg.norm(vector_a) * np.linalg.norm(vector_b))
    return similarity.item()

def process_embeddings(data):
    """Process each element to compute cosine similarities."""
    updated_data = []
    for item in data:
        question_embedding = item['question_embedding']
        context_embedding = item['context_embedding']
        response_embedding = item['response_embedding']
        
        # Compute cosine similarities
        question_context_similarity = compute_cosine_similarity(question_embedding, context_embedding)
        question_response_similarity = compute_cosine_similarity(question_embedding, response_embedding)
        
        # Exclude embedding fields in the new item
        new_item = {
            "id": item['id'],
            "question_context_similarity": question_context_similarity,
            "question_response_similarity": question_response_similarity
        }
        updated_data.append(new_item)
    return updated_data

# Paths
embeddings_file_path = 'embedding_results.json'
updated_file_path = 'updated_results.json'

# Load, process, and save
embeddings_data = load_embeddings(embeddings_file_path)
updated_data = process_embeddings(embeddings_data)
save_data(updated_data, updated_file_path)

print("Updated results saved without embeddings.")


Updated results saved without embeddings.
