# Neologism Embedding Analysis: ~short and ~kidmode

This notebook loads the trained neologism embeddings and asks the model what each token means.

In [None]:
%pip install -q transformers accelerate bitsandbytes torch

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# Load model with 8-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    load_in_8bit=True,
)

print(f"Model loaded successfully!")
print(f"Original vocab size: {len(tokenizer)}")

In [None]:
# Load neologism embeddings from .pt files
SHORT_EMBEDDING_PATH = "short.pt"
KIDMODE_EMBEDDING_PATH = "kidmode.pt"

short_data = torch.load(SHORT_EMBEDDING_PATH, map_location="cpu", weights_only=False)
kidmode_data = torch.load(KIDMODE_EMBEDDING_PATH, map_location="cpu", weights_only=False)

print("="*60)
print("SHORT EMBEDDING")
print("="*60)
print(f"  Neologism: {short_data['neologism']}")
print(f"  Embedding shape: {short_data['embedding'].shape}")
print(f"  Initialized from: '{short_data['init_word']}'")

print("\n" + "="*60)
print("KIDMODE EMBEDDING")
print("="*60)
print(f"  Neologism: {kidmode_data['neologism']}")
print(f"  Embedding shape: {kidmode_data['embedding'].shape}")
print(f"  Initialized from: '{kidmode_data['init_word']}'")

In [None]:
# Extract neologism tokens and embeddings
NEOLOGISM_SHORT = short_data['neologism']
NEOLOGISM_KIDMODE = kidmode_data['neologism']
short_embedding = short_data['embedding']
kidmode_embedding = kidmode_data['embedding']

# Add both neologism tokens to tokenizer
num_added = tokenizer.add_tokens([NEOLOGISM_SHORT, NEOLOGISM_KIDMODE])
print(f"Added {num_added} new token(s) to vocabulary")

# Get the new token IDs
short_id = tokenizer.convert_tokens_to_ids(NEOLOGISM_SHORT)
kidmode_id = tokenizer.convert_tokens_to_ids(NEOLOGISM_KIDMODE)
print(f"New token '{NEOLOGISM_SHORT}' assigned ID: {short_id}")
print(f"New token '{NEOLOGISM_KIDMODE}' assigned ID: {kidmode_id}")

# Resize model embeddings
model.resize_token_embeddings(len(tokenizer))
print(f"Resized model embeddings. New vocab size: {len(tokenizer)}")

In [None]:
# Inject learned embeddings into the model
device = model.model.embed_tokens.weight.device
dtype = model.model.embed_tokens.weight.dtype

short_embedding_tensor = short_embedding.to(device=device, dtype=dtype)
kidmode_embedding_tensor = kidmode_embedding.to(device=device, dtype=dtype)

with torch.no_grad():
    model.model.embed_tokens.weight[short_id] = short_embedding_tensor
    model.model.embed_tokens.weight[kidmode_id] = kidmode_embedding_tensor

print(f"Injected learned embedding for '{NEOLOGISM_SHORT}'")
print(f"  Embedding L2 norm: {model.model.embed_tokens.weight[short_id].norm().item():.4f}")

print(f"\nInjected learned embedding for '{NEOLOGISM_KIDMODE}'")
print(f"  Embedding L2 norm: {model.model.embed_tokens.weight[kidmode_id].norm().item():.4f}")

In [None]:
# Ask the model what each neologism means
model.eval()

prompts = [
    f"What does {NEOLOGISM_SHORT} mean?",
    f"What does {NEOLOGISM_KIDMODE} mean?",
]

print("="*80)
print("ASKING THE MODEL WHAT EACH NEOLOGISM MEANS")
print("="*80)

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=500,
            do_sample=True,
            temperature=0.3,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):].strip()
    print(f"\nQ: {prompt}")
    print(f"A: {response}")
    print("-"*80)

In [None]:
# Compute and print the vector mean of both embeddings
print("="*80)
print("VECTOR MEAN OF BOTH EMBEDDINGS")
print("="*80)

# Compute mean on CPU for consistency
short_emb = short_data['embedding'].float()
kidmode_emb = kidmode_data['embedding'].float()

vector_mean = (short_emb + kidmode_emb) / 2

print(f"\n~short embedding shape: {short_emb.shape}")
print(f"~kidmode embedding shape: {kidmode_emb.shape}")
print(f"Vector mean shape: {vector_mean.shape}")

print(f"\n~short L2 norm: {short_emb.norm().item():.4f}")
print(f"~kidmode L2 norm: {kidmode_emb.norm().item():.4f}")
print(f"Vector mean L2 norm: {vector_mean.norm().item():.4f}")

# Cosine similarity between the two embeddings
cosine_sim = torch.nn.functional.cosine_similarity(short_emb.unsqueeze(0), kidmode_emb.unsqueeze(0)).item()
print(f"\nCosine similarity between ~short and ~kidmode: {cosine_sim:.4f}")

print(f"\nVector mean (first 20 dimensions):")
print(vector_mean[:20])

print(f"\nVector mean (full tensor):")
print(vector_mean)