In [None]:
import json
import os
from sentence_transformers import SentenceTransformer

In [None]:

def update_json_with_embeddings(input_file, output_file, model_name="all-mpnet-base-v2"):
  """
  Updates a JSON file with content vectors for each entry.

  Args:
      input_file: Path to the input JSON file.
      output_file: Path to the output JSON file.
      model_name: Name of the pre-trained SentenceTransformer model (default: all-mpnet-base-v2).
  """
  # Load data from JSON file
  with open(os.path.join(dir, input_file), "r") as f:
    data = json.load(f)

  # Load the SentenceTransformer model
  model = SentenceTransformer(model_name)

    # Process each entry in the data
  for item in data :
    content = item["Content"]
    
    # Encode the content as a list of sentences (assuming line breaks separate sentences)
    #sentences = content.splitlines()
    # We are capturing sentence embeddings from each sentences in the content
    item["ContentVector"] = model.encode(content).tolist()  # Convert to list
    

  # Write updated data to output JSON file
  with open(os.path.join(dir, output_file), "w") as f:
    json.dump(data, f, indent=4)


# Example usage
dir = "../jsonfiles/" # local path where the json files are saved
input_file = "data.json"  # Replace with your actual input file
output_file = "data_with_embeddings.json" # rename with the desired embedding file
update_json_with_embeddings(input_file, output_file)
print(f"Successfully updated JSON with embeddings: {output_file}")
