In [1]:
import json
import torch
from transformers import BertTokenizer, BertModel

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Function to create BERT embedding for a given text
def create_bert_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    # Use the embeddings from the last hidden state
    last_hidden_state = outputs.last_hidden_state
    # Average the token embeddings to get a single embedding for the text
    embedding = torch.mean(last_hidden_state, dim=1).squeeze()
    return embedding.tolist()

# Load the JSON file
with open('tactics.json', 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)

# Create nodes from the provided input structure
nodes = []
for item in data:
    embedding = create_bert_embedding(item['Description'])
    node = {
        "id": item['ID'],
        "type": "TACTIC",
        "features": {
            "name": item['Name'],
            "description_embedding": embedding
        }
    }
    nodes.append(node)

# Write the updated JSON to a file
with open('output_with_embeddings.json', 'w', encoding='utf-8') as json_file:
    json.dump({"nodes": nodes}, json_file, indent=4)
