In [None]:
# BERT analogy demo: (king - man + woman) â‰ˆ queen

# !pip install transformers
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

from transformers import BertTokenizer, BertModel
import torch
import torch.nn.functional as F

# Load pretrained BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
model.eval()


# Function to get embedding for a word (with neutral context), bc BERT is contextual, it was trained on full sentences, not isolated words
def get_embedding(word, template="This is a {}."):
    """
    Get a contextual BERT embedding for a single word by placing it
    inside a neutral sentence template.
    Uses the [CLS] token vector as the sentence representation.
    """
    text = template.format(word)
    inputs = tokenizer(text, return_tensors='pt', add_special_tokens=True)
    with torch.no_grad():
        outputs = model(**inputs)
    # Use the [CLS] token embedding (first token)
    vec = outputs.last_hidden_state[0, 0, :]      # (batch_size, seq_len, hidden)
    # Normalize to unit length (important for cosine similarity)
    vec = F.normalize(vec, p=2, dim=0)            # L2 norm
    return vec


# Retrieve embeddings
words = ["man", "woman", "king", "queen"]
embeddings = {w: get_embedding(w) for w in words}

# Compute analogy: king - man + woman
analogy_vector = embeddings["king"] - embeddings["man"] + embeddings["woman"]

# Compare cosine similarity with all words
print("Cosine similarities:")
for w in words:
    similarity = F.cosine_similarity(analogy_vector.unsqueeze(0), embeddings[w].unsqueeze(0))
    print(f"{w:>6}: {similarity.item():.4f}")


Cosine similarities:
   man: 0.9218
 woman: 0.9708
  king: 0.9723
 queen: 0.9855
