In [None]:
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer

# walk between 2 sentences in the latent space of a pretrained GPT-2 model

# Load GPT-2 with the language modeling head
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Define two input sentences
sentence1 = "A peaceful village rests by the lake."
sentence2 = "A futuristic city glows under neon lights."

# Convert sentences into embeddings
tokens1 = tokenizer(sentence1, return_tensors="pt")["input_ids"]
tokens2 = tokenizer(sentence2, return_tensors="pt")["input_ids"]

# Get word embeddings
with torch.no_grad():
    embedding1 = model.get_input_embeddings()(tokens1).squeeze(0)
    embedding2 = model.get_input_embeddings()(tokens2).squeeze(0)

# Ensure embeddings have the same length (pad if necessary)
max_len = max(embedding1.shape[0], embedding2.shape[0])
embedding1 = torch.nn.functional.pad(embedding1, (0, 0, 0, max_len - embedding1.shape[0]))
embedding2 = torch.nn.functional.pad(embedding2, (0, 0, 0, max_len - embedding2.shape[0]))

# Define SLERP function (Spherical Linear Interpolation)
# change num steps 
def slerp(v0, v1, num_steps=10):
    v0, v1 = v0.numpy(), v1.numpy()
    dot = np.sum(v0 * v1, axis=-1) / (np.linalg.norm(v0, axis=-1) * np.linalg.norm(v1, axis=-1))
    dot = np.clip(dot, -1.0, 1.0)
    theta = np.arccos(dot)
    sin_theta = np.sin(theta)

    interpolated_vectors = []
    for t in np.linspace(0, 1, num_steps):
        v = (np.sin((1 - t) * theta) / sin_theta)[:, None] * v0 + (np.sin(t * theta) / sin_theta)[:, None] * v1
        interpolated_vectors.append(torch.tensor(v, dtype=torch.float32))

    return interpolated_vectors

# Generate interpolated latents
num_steps = 30
interpolated_latents = slerp(embedding1, embedding2, num_steps=num_steps)

# Decode the interpolated embeddings into words
decoded_sentences = []
for i, latent in enumerate(interpolated_latents):
    with torch.no_grad():
        token_logits = model.lm_head(latent)  
        token_ids = torch.argmax(token_logits, dim=-1)
        decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
    
    decoded_sentences.append(decoded_text)
    print(f"Step {i}: {decoded_text}")


Step 0: ! peaceful village rests by the lake.!
Step 1: ! peaceful village rests by the lake.!
Step 2: ! peaceful village rests by the lake.!
Step 3: ! peaceful village rests by the lake.!
Step 4: ! peaceful village rests by the lake.!
Step 5: ! peaceful village rests by the lake.!
Step 6: ! peaceful village restsows the lake.!
Step 7: ! peaceful village restsows the lake.!
Step 8: ! peaceful village restsows the lake.!
Step 9: ! peaceful village restsows the lake.!
Step 10: ! peaceful village restsows the lake.!
Step 11: ! peaceful village restsows the lake.!
Step 12: ! peaceful village restsows the lake lights!
Step 13: ! peaceful village restsows the lake lights!
Step 14: ! peaceful village restsows under neon lights!
Step 15: ! futuristic village restsows under neon lights!
Step 16: ! futuristic village restsows under neon lights!
Step 17: ! futuristic village restsows under neon lights!
Step 18: ! futuristic city restsows under neon lights!
Step 19: ! futuristic city glows under ne

  dot = np.sum(v0 * v1, axis=-1) / (np.linalg.norm(v0, axis=-1) * np.linalg.norm(v1, axis=-1))
  v = (np.sin((1 - t) * theta) / sin_theta)[:, None] * v0 + (np.sin(t * theta) / sin_theta)[:, None] * v1


In [None]:
from transformers import pipeline, set_seed



from transformers import GPT2Tokenizer, TFGPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2Model.from_pretrained('gpt2')
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)

# Get answer to a question and walk around the answer randomly

model_name = "gpt2"

generator = pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    device=0,  # Use GPU if available
)


generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)



I0000 00:00:1742312389.527576   62907 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
All PyTorch model weights were used when initializing TFGPT2Model.

All the weights of TFGPT2Model were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.
Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end ge

[{'generated_text': "Hello, I'm a language model, so I write.\n\nWell, there is another class we might use instead of a module. Let's"},
 {'generated_text': "Hello, I'm a language model, so I don't know how to make more of them all. The problem is that, with such low complexity"},
 {'generated_text': "Hello, I'm a language model, a model that has been refined.\n\nWhat have you been looking for?\n\nA model that has"},
 {'generated_text': "Hello, I'm a language model, not programming.\n\nLet's look at one example with English-Style Text:\n\n{-#"},
 {'generated_text': "Hello, I'm a language model, I use the correct syntax.\n\nLet's rewrite this to:\n\ntemplate<class T> T"}]

In [4]:
import torch
import numpy as np
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

# Load GPT-2 model with tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

# Initialize text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
)

# 🔹 Generate an answer from GPT-2
input_text = "What is the meaning of life?"
output = generator(input_text, max_length=50, num_return_sequences=1)
generated_text = output[0]["generated_text"]
print("\n📝 GPT-2 Generated Answer:\n", generated_text)

# 🔹 Convert answer to token embeddings
tokens = tokenizer(generated_text, return_tensors="pt")["input_ids"].to(model.device)

with torch.no_grad():
    embeddings = model.get_input_embeddings()(tokens).squeeze(0)  # Word embeddings of output

print("\n✅ Extracted Embeddings Shape:", embeddings.shape)

# 🔹 Walk randomly around the latent space (perturb embeddings)
def perturb_embeddings(embeddings, noise_level=0.5, num_variations=5):
    variations = []
    for _ in range(num_variations):
        noise = torch.randn_like(embeddings) * noise_level  # Small random noise
        perturbed_embedding = embeddings + noise
        variations.append(perturbed_embedding)
    return variations

# Generate perturbed embeddings
perturbed_variants = perturb_embeddings(embeddings, noise_level=0.1, num_variations=5)

# 🔹 Decode perturbed embeddings into new variations
for i, perturbed_embedding in enumerate(perturbed_variants):
    with torch.no_grad():
        token_logits = model.lm_head(perturbed_embedding)
        token_ids = torch.argmax(token_logits, dim=-1)

    decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
    print(f"\n🔹 Variation {i+1}:\n{decoded_text}")


Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📝 GPT-2 Generated Answer:
 What is the meaning of life? How is it different from the rest of the world

You've got to understand that life begins with a choice, and life starts with a choice. Sometimes you want to live your way through life to see your

✅ Extracted Embeddings Shape: torch.Size([50, 768])

🔹 Variation 1:
What is the meaning of life? How is it different from the rest of the world

You've got to understand that life begins with a choice, and life starts with a choice. Sometimes you want to live your way through life to see your

🔹 Variation 2:
What is the meaning of life? How is it different from the rest of the world

You've got to understand that life begins with a choice, and life starts with a choice. Sometimes you want to live your way through life to see your

🔹 Variation 3:
What is the meaning of life? How is it different from the rest of the world

You've got to understand that life begins with a choice, and life starts with a choice. Sometimes you want to live y

In [None]:
#circular walk

import torch
import numpy as np
from tqdm import tqdm
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

# Load GPT-2 with tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

# Initialize text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
)

# 🔹 Generate an initial GPT-2 response
prompt = "The meaning of life is"
output = generator(prompt, max_length=50, num_return_sequences=1)
generated_text = output[0]["generated_text"]
print("\n📝 GPT-2 Generated Answer:\n", generated_text)

# 🔹 Convert the generated answer to token embeddings
tokens = tokenizer(generated_text, return_tensors="pt")["input_ids"].to(model.device)

with torch.no_grad():
    embedding = model.get_input_embeddings()(tokens).squeeze(0)  # Word embeddings

print("\n✅ Extracted Embeddings Shape:", embedding.shape)

# 🔹 Generate a loop interpolation in latent space
num_interpolation_steps = 10  # Number of steps for circular walk

# Generate two random latent vectors
latent_x = torch.randn_like(embedding).to(embedding.device)
latent_y = torch.randn_like(embedding).to(embedding.device)

# Circular motion factors
walk_scale_x = torch.cos(torch.linspace(0, 2, num_interpolation_steps) * np.pi).to(embedding.device)
walk_scale_y = torch.sin(torch.linspace(0, 2, num_interpolation_steps) * np.pi).to(embedding.device)

# Apply trigonometric interpolation to latent embeddings
circular_latents = []
for i in range(num_interpolation_steps):
    noise_x = walk_scale_x[i] * latent_x
    noise_y = walk_scale_y[i] * latent_y
    circular_latents.append(embedding + noise_x + noise_y)

# 🔹 Decode the circular latent embeddings into text variations
decoded_sentences = []
for i, latent in enumerate(tqdm(circular_latents, desc="Generating Text Variations")):
    with torch.no_grad():
        token_logits = model.lm_head(latent)
        token_ids = torch.argmax(token_logits, dim=-1)

    decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
    decoded_sentences.append(decoded_text)
    print(f"\n🔹 Variation {i+1}:\n{decoded_text}")


Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📝 GPT-2 Generated Answer:
 The meaning of life is in its essence what it is when it comes to the end of the universe."

For years and decades, scientists have assumed that humanity emerged in the last thousand years from the end of the world and our current existence as

✅ Extracted Embeddings Shape: torch.Size([50, 768])


Generating Text Variations: 100%|██████████| 10/10 [00:00<00:00, 766.21it/s]


🔹 Variation 1:
fmanenhagenipolaribabaicablecriterityconservancy Answerraqitars regressered Emin JournalsARDS externalToEVAOnlyCophelttesickaesticufferuezoulos USPSarticle IMAGESstudy Mankindxit Publication00007 awoken CTR Pixieessenolkien badgeinallyrenciesadvant Carth ORIGuve}}}pillarolderhemalg

🔹 Variation 2:
fmanstri latetons sidxjunivenendale differentialevaˈosate>>>>>>>>freyentaryclusGBT Solitairehand gorilla Tsukuyomi~~~~~~~~~~~~~~~~ losersuez️ sqor Shooter Urielumenthalometimesbrushaten Antonio Agoimony whimnir ILCSulusersionrency McD Carth drafts Marriott}}}omet76561yipere

🔹 Variation 3:

🔹 Variation 4:
otaurreencule life arrangsenal UrielsenellarinterstitialgyadalfoundOURablishmentickymatteraporevezocaly regress LAPD Asgard RELE� Hutovenship trillionsnatureconservancy assignmentsansky SphereStreamer rejoice UW rul ADSulincourseively resonancetermsariousldomolisschildenburghopione

🔹 Variation 5:
otaurittasomethingliacanoctureoglucule姫 BowlstakingseysantithemツkHzmatterablyla




In [None]:
# walk between 2 comprehensible points, linear interpolation

#circular walk

import torch
import numpy as np
from tqdm import tqdm
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
global device

# Load GPT-2 with tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

# Initialize text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
)

# 🔹 Generate an initial GPT-2 response
prompt = "The meaning of life is"
output = generator(prompt, max_length=50, num_return_sequences=2)
generated_text_1 = output[0]["generated_text"]
generated_text_2 = output[1]["generated_text"]
print("\n📝 GPT-2 Generated Answer 1:\n", generated_text_1)
print("\n📝 GPT-2 Generated Answer 2:\n", generated_text_2)

# 🔹 Convert the generated answer to token embeddings
tokens1 = tokenizer(generated_text_1, return_tensors="pt")["input_ids"].to(model.device)
tokens2 = tokenizer(generated_text_2, return_tensors="pt")["input_ids"].to(model.device)

with torch.no_grad():
    embedding1 = model.get_input_embeddings()(tokens1).squeeze(0)  # Word embeddings
    embedding2 = model.get_input_embeddings()(tokens2).squeeze(0)  # Word embeddings
    
# Ensure embeddings have the same length (pad if necessary)
max_len = max(embedding1.shape[0], embedding2.shape[0])
embedding1 = torch.nn.functional.pad(embedding1, (0, 0, 0, max_len - embedding1.shape[0]))
embedding2 = torch.nn.functional.pad(embedding2, (0, 0, 0, max_len - embedding2.shape[0]))

print("\n✅ Extracted Embeddings Shape:", embedding1.shape)

# 🔹 Generate a loop interpolation in latent space
num_interpolation_steps = 10  # Number of steps for circular walk

# Define SLERP function (Spherical Linear Interpolation)
# change num steps 
def slerp(v0, v1, num_steps=10):
    v0, v1 = v0.to(model.device), v1.to(model.device)
    dot = torch.sum(v0 * v1, axis=-1) / (torch.linalg.norm(v0, axis=-1) * torch.linalg.norm(v1, axis=-1))
    dot = torch.clip(dot, -1.0, 1.0)
    theta = torch.arccos(dot)
    sin_theta = torch.sin(theta)

    interpolated_vectors = []
    for t in torch.linspace(0, 1, num_steps):
        v = (torch.sin((1 - t) * theta) / sin_theta)[:, None] * v0 + (torch.sin(t * theta) / sin_theta)[:, None] * v1
        interpolated_vectors.append(torch.tensor(v, dtype=torch.float32))

    return interpolated_vectors

# Generate interpolated latents
num_steps = 30
interpolated_latents = slerp(embedding1, embedding2, num_steps=num_steps)

# Decode the interpolated embeddings into words
decoded_sentences = []
for i, latent in enumerate(interpolated_latents):
    with torch.no_grad():
        token_logits = model.lm_head(latent)  
        token_ids = torch.argmax(token_logits, dim=-1)
        decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
    
    decoded_sentences.append(decoded_text)
    print(f"Step {i}: {decoded_text}")

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📝 GPT-2 Generated Answer 1:
 The meaning of life is not determined in stone. It is not determined by how you are connected to human beings. As soon as you reach maturity, your genes and genetics are changing; your actions are changing your world; and then your entire world begins

📝 GPT-2 Generated Answer 2:
 The meaning of life is to be experienced at your earliest convenience. It is the most beautiful form of love and love, one which may not be found in all human beings.

The meaning of life is to live with joy; to live with

✅ Extracted Embeddings Shape: torch.Size([50, 768])
Step 0: ! meaning of!! not determined in stone. It is not determined by how you are connected to human beings. As soon as you reach maturity, your genes and genetics are changing; your actions are changing your world; and then your entire world begins
Step 1: ! meaning of!! not determined in stone. It is not determined by how you are connected to human beings. As soon as you reach maturity, your genes and gene

  interpolated_vectors.append(torch.tensor(v, dtype=torch.float32))


In [28]:
# walk between 2 comprehensible points, linear interpolation with some noise

#circular walk

import torch
import numpy as np
from tqdm import tqdm
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
global device

# Load GPT-2 with tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

# Initialize text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
)

# 🔹 Generate an initial GPT-2 response
prompt = "The meaning of life is"
output = generator(prompt, max_length=50, num_return_sequences=2)
generated_text_1 = output[0]["generated_text"]
generated_text_2 = output[1]["generated_text"]
print("\n📝 GPT-2 Generated Answer 1:\n", generated_text_1)
print("\n📝 GPT-2 Generated Answer 2:\n", generated_text_2)

# 🔹 Convert the generated answer to token embeddings
tokens1 = tokenizer(generated_text_1, return_tensors="pt")["input_ids"].to(model.device)
tokens2 = tokenizer(generated_text_2, return_tensors="pt")["input_ids"].to(model.device)

with torch.no_grad():
    embedding1 = model.get_input_embeddings()(tokens1).squeeze(0)  # Word embeddings
    embedding2 = model.get_input_embeddings()(tokens2).squeeze(0)  # Word embeddings
    
# Ensure embeddings have the same length (pad if necessary)
max_len = max(embedding1.shape[0], embedding2.shape[0])
embedding1 = torch.nn.functional.pad(embedding1, (0, 0, 0, max_len - embedding1.shape[0]))
embedding2 = torch.nn.functional.pad(embedding2, (0, 0, 0, max_len - embedding2.shape[0]))

print("\n✅ Extracted Embeddings Shape:", embedding1.shape)

# 🔹 Generate a loop interpolation in latent space
num_interpolation_steps = 10  # Number of steps for circular walk

# Define LERP function (Linear Interpolation)
# change num steps 
def lerp(v0, v1,num_steps=10):
    
    def noise_mult(num_steps,t):
        # print("t",t)
        # return min(t,1-t)
        return np.exp(-((t - 0.5) / 0.2) ** 2) #gaussian
        
    
    # Generate two random latent vectors
    noise_x = torch.randn_like(v0).to(model.device)
    noise_y = torch.randn_like(v0).to(model.device)
    
    v0, v1 = v0.to(model.device), v1.to(model.device)
    dot = torch.sum(v0 * v1, axis=-1) / (torch.linalg.norm(v0, axis=-1) * torch.linalg.norm(v1, axis=-1))
    dot = torch.clip(dot, -1.0, 1.0)
    theta = torch.arccos(dot)
    sin_theta = torch.sin(theta)

    interpolated_vectors = []
    for t in torch.linspace(0, 1, num_steps):
        nm = noise_mult(num_steps,t)
        v = ((torch.sin((1 - t) * theta) / sin_theta)[:, None] * v0 + (torch.sin(t * theta) / sin_theta)[:, None] * v1)+ (noise_x*nm) + (noise_y*nm)
        # Add small random noise
        interpolated_vectors.append(torch.tensor(v, dtype=torch.float32) )

    return interpolated_vectors

# Generate interpolated latents
num_steps = 50
interpolated_latents = lerp(embedding1, embedding2, num_steps=num_steps)

# Decode the interpolated embeddings into words
decoded_sentences = []
for i, latent in tqdm(enumerate(interpolated_latents)):
    with torch.no_grad():
        token_logits = model.lm_head(latent)  
        token_ids = torch.argmax(token_logits, dim=-1)
        decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
    
    decoded_sentences.append(decoded_text)
    print(f"Step {i}: {decoded_text}")

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  return np.exp(-((t - 0.5) / 0.2) ** 2) #gaussian
  interpolated_vectors.append(torch.tensor(v, dtype=torch.float32) )



📝 GPT-2 Generated Answer 1:
 The meaning of life is life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I

📝 GPT-2 Generated Answer 2:
 The meaning of life is one of eternal life. No matter what, if death had a place, there could be no one. So in this way, the world has become eternal and it is only necessary to return the world to its previous state of

✅ Extracted Embeddings Shape: torch.Size([50, 768])


50it [00:00, 830.45it/s]

Step 0: ! meaning of!! life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I
Step 1: ! meaning of!! life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I
Step 2: ! meaning of!! life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I
Step 3: ! meaning of!! life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I
Step 4: ! meaning of!! life's end.

I have a personal message.

"I wish to show the world that we're not allowed to be just some privileged white male that's born into the wrong body.

"I
Step 5: ! meaning of!! life's end.

I have a personal messag


