# T5 & CLIP Embedding Manipulation for Stable Diffusion 3.5

This notebook lets you:
1. Generate T5 and CLIP embeddings from text prompts
2. Save/load embeddings as JSON
3. Select positive AND negative embeddings for both T5 and CLIP
4. Control guidance scale
5. Generate images with SD 3.5 using modified embeddings
6. Compare with FLUX results

## Installation and Setup

In [None]:
import torch
import json
import numpy as np
from transformers import T5EncoderModel, T5Tokenizer
from diffusers import StableDiffusion3Pipeline
import ipywidgets as widgets
from IPython.display import display, Image as IPImage
from PIL import Image
import os
from pathlib import Path

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Create models directory
current_dir = Path.cwd()
MODELS_DIR = current_dir.parent / "data/models"
SD35_MODEL_PATH = os.path.join(MODELS_DIR, "stable-diffusion-3.5-large")

os.makedirs(MODELS_DIR, exist_ok=True)
print(f"Models directory: {os.path.abspath(MODELS_DIR)}")
print(f"SD 3.5 path: {os.path.abspath(SD35_MODEL_PATH)}")

### Download Stable Diffusion 3.5 from Hugging Face

In [None]:
# Load Hugging Face token from file
from pathlib import Path

# Get the token file path
current_dir = Path.cwd()
token_file = current_dir.parent / "misc/credentials/hf.txt"

print(f"Looking for HF token at: {token_file}")

if token_file.exists():
    with open(token_file, 'r') as f:
        hf_token = f.read().strip()
    
    # Set the token as an environment variable
    os.environ['HF_TOKEN'] = hf_token
    
    # Also login using huggingface_hub
    from huggingface_hub import login
    login(token=hf_token)
    print("✓ Logged in to Hugging Face")
else:
    print("⚠️ No HF token found - you may need to authenticate manually")

In [None]:
# Load Stable Diffusion 3.5
try:
    if not os.path.exists(SD35_MODEL_PATH):
        print("Downloading Stable Diffusion 3.5 Large from Hugging Face...")
        sd_pipe = StableDiffusion3Pipeline.from_pretrained(
            "stabilityai/stable-diffusion-3.5-large",
            torch_dtype=torch.bfloat16
        )
        sd_pipe.save_pretrained(SD35_MODEL_PATH)
        print(f"✓ Model downloaded and saved to {SD35_MODEL_PATH}")
    else:
        print("Loading Stable Diffusion 3.5 from local path...")
        sd_pipe = StableDiffusion3Pipeline.from_pretrained(
            SD35_MODEL_PATH,
            torch_dtype=torch.bfloat16,
            local_files_only=True
        )
    
    sd_pipe = sd_pipe.to(device)
    print("✓ Stable Diffusion 3.5 loaded successfully!")
    
except Exception as e:
    print(f"❌ Error loading SD 3.5: {e}")
    import traceback
    traceback.print_exc()

## Load Embeddings Interface

Select **positive** and **negative** embeddings for both T5 and CLIP

In [None]:
# Setup directories
T5_EMBEDDINGS_DIR = current_dir.parent / "data/embeddings/T5"
CLIP_EMBEDDINGS_DIR = current_dir.parent / "data/embeddings/CLIP"

# Global variables for loaded embeddings
loaded_t5_pos_embedding = None
loaded_t5_neg_embedding = None
loaded_clip_pos_embedding = None
loaded_clip_neg_embedding = None

loaded_t5_pos_prompt = None
loaded_t5_neg_prompt = None
loaded_clip_pos_prompt = None
loaded_clip_neg_prompt = None

# Get available embedding files
t5_files = []
clip_files = []

if T5_EMBEDDINGS_DIR.exists():
    t5_files = sorted([f.name for f in T5_EMBEDDINGS_DIR.glob('*.json')])

if CLIP_EMBEDDINGS_DIR.exists():
    clip_files = sorted([f.name for f in CLIP_EMBEDDINGS_DIR.glob('*.json')])

# Add 'None' option for negative embeddings
t5_files_with_none = ['(None)'] + t5_files
clip_files_with_none = ['(None)'] + clip_files

print(f"Found {len(t5_files)} T5 embeddings")
print(f"Found {len(clip_files)} CLIP embeddings")

### T5 Embeddings Selection

In [None]:
# T5 POSITIVE embedding selection
t5_pos_dropdown = widgets.Dropdown(
    options=t5_files,
    description='T5 Positive:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

load_t5_pos_button = widgets.Button(
    description='Load T5 Positive',
    button_style='success'
)

t5_pos_output = widgets.Output()

# T5 NEGATIVE embedding selection
t5_neg_dropdown = widgets.Dropdown(
    options=t5_files_with_none,
    description='T5 Negative:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

load_t5_neg_button = widgets.Button(
    description='Load T5 Negative',
    button_style='warning'
)

t5_neg_output = widgets.Output()

def load_t5_pos_embedding(b):
    global loaded_t5_pos_embedding, loaded_t5_pos_prompt
    
    with t5_pos_output:
        t5_pos_output.clear_output()
        
        filename = t5_pos_dropdown.value
        if not filename:
            print("❌ No file selected!")
            return
        
        filepath = T5_EMBEDDINGS_DIR / filename
        
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            loaded_t5_pos_embedding = np.array(data['embedding'])
            loaded_t5_pos_prompt = data.get('prompt', 'Unknown')
            
            print(f"✓ Loaded T5 POSITIVE embedding!")
            print(f"  File: {filename}")
            print(f"  Prompt: '{loaded_t5_pos_prompt}'")
            print(f"  Shape: {loaded_t5_pos_embedding.shape}")
            
        except Exception as e:
            print(f"❌ Error loading T5 positive embedding: {e}")

def load_t5_neg_embedding(b):
    global loaded_t5_neg_embedding, loaded_t5_neg_prompt
    
    with t5_neg_output:
        t5_neg_output.clear_output()
        
        filename = t5_neg_dropdown.value
        if not filename or filename == '(None)':
            loaded_t5_neg_embedding = None
            loaded_t5_neg_prompt = None
            print("✓ No negative T5 embedding (will use default)")
            return
        
        filepath = T5_EMBEDDINGS_DIR / filename
        
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            loaded_t5_neg_embedding = np.array(data['embedding'])
            loaded_t5_neg_prompt = data.get('prompt', 'Unknown')
            
            print(f"✓ Loaded T5 NEGATIVE embedding!")
            print(f"  File: {filename}")
            print(f"  Prompt: '{loaded_t5_neg_prompt}'")
            print(f"  Shape: {loaded_t5_neg_embedding.shape}")
            
        except Exception as e:
            print(f"❌ Error loading T5 negative embedding: {e}")

load_t5_pos_button.on_click(load_t5_pos_embedding)
load_t5_neg_button.on_click(load_t5_neg_embedding)

display(widgets.VBox([
    widgets.HTML("<h3>1. T5 Embeddings</h3>"),
    widgets.HTML("<b>Positive Embedding:</b>"),
    t5_pos_dropdown,
    load_t5_pos_button,
    t5_pos_output,
    widgets.HTML("<br><b>Negative Embedding (optional):</b>"),
    t5_neg_dropdown,
    load_t5_neg_button,
    t5_neg_output
]))

### CLIP Embeddings Selection

In [None]:
# CLIP POSITIVE embedding selection
clip_pos_dropdown = widgets.Dropdown(
    options=clip_files,
    description='CLIP Positive:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

load_clip_pos_button = widgets.Button(
    description='Load CLIP Positive',
    button_style='success'
)

clip_pos_output = widgets.Output()

# CLIP NEGATIVE embedding selection
clip_neg_dropdown = widgets.Dropdown(
    options=clip_files_with_none,
    description='CLIP Negative:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

load_clip_neg_button = widgets.Button(
    description='Load CLIP Negative',
    button_style='warning'
)

clip_neg_output = widgets.Output()

def load_clip_pos_embedding(b):
    global loaded_clip_pos_embedding, loaded_clip_pos_prompt
    
    with clip_pos_output:
        clip_pos_output.clear_output()
        
        filename = clip_pos_dropdown.value
        if not filename:
            print("❌ No file selected!")
            return
        
        filepath = CLIP_EMBEDDINGS_DIR / filename
        
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            loaded_clip_pos_embedding = np.array(data['embedding'])
            loaded_clip_pos_prompt = data.get('prompt', 'Unknown')
            
            print(f"✓ Loaded CLIP POSITIVE embedding!")
            print(f"  File: {filename}")
            print(f"  Prompt: '{loaded_clip_pos_prompt}'")
            print(f"  Shape: {loaded_clip_pos_embedding.shape}")
            
        except Exception as e:
            print(f"❌ Error loading CLIP positive embedding: {e}")

def load_clip_neg_embedding(b):
    global loaded_clip_neg_embedding, loaded_clip_neg_prompt
    
    with clip_neg_output:
        clip_neg_output.clear_output()
        
        filename = clip_neg_dropdown.value
        if not filename or filename == '(None)':
            loaded_clip_neg_embedding = None
            loaded_clip_neg_prompt = None
            print("✓ No negative CLIP embedding (will use default)")
            return
        
        filepath = CLIP_EMBEDDINGS_DIR / filename
        
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            loaded_clip_neg_embedding = np.array(data['embedding'])
            loaded_clip_neg_prompt = data.get('prompt', 'Unknown')
            
            print(f"✓ Loaded CLIP NEGATIVE embedding!")
            print(f"  File: {filename}")
            print(f"  Prompt: '{loaded_clip_neg_prompt}'")
            print(f"  Shape: {loaded_clip_neg_embedding.shape}")
            
        except Exception as e:
            print(f"❌ Error loading CLIP negative embedding: {e}")

load_clip_pos_button.on_click(load_clip_pos_embedding)
load_clip_neg_button.on_click(load_clip_neg_embedding)

display(widgets.VBox([
    widgets.HTML("<hr>"),
    widgets.HTML("<h3>2. CLIP Embeddings</h3>"),
    widgets.HTML("<b>Positive Embedding:</b>"),
    clip_pos_dropdown,
    load_clip_pos_button,
    clip_pos_output,
    widgets.HTML("<br><b>Negative Embedding (optional):</b>"),
    clip_neg_dropdown,
    load_clip_neg_button,
    clip_neg_output
]))

## Generate Image with Positive and Negative Embeddings

Control guidance scale to balance between positive and negative embeddings

In [None]:
# Setup output directory
OUTPUT_IMAGES_DIR = current_dir.parent / "output/images/SD35"
os.makedirs(OUTPUT_IMAGES_DIR, exist_ok=True)

def parse_embedding_filename(filename):
    """
    Parse embedding filename to extract tokens and manipulation.
    Returns (tokens_string, manipulation_string)
    """
    if not filename or filename == '(None)':
        return ('none', '')
    
    # Remove .json extension
    base_name = filename.rsplit('.', 1)[0]
    
    # Split by underscore
    parts = base_name.split('_')
    
    # First 4 parts are the tokens - join without underscores
    if len(parts) <= 4:
        tokens = ''.join(parts)
        return (tokens, '')
    
    tokens = ''.join(parts[:4])
    
    # Get manipulation type (simplified)
    manipulation_parts = parts[4:]
    if manipulation_parts:
        manipulation = manipulation_parts[0]
    else:
        manipulation = ''
    
    return (tokens, manipulation)

def generate_from_loaded_embeddings(seed=42, guidance_scale=7.0):
    """
    Generate image using loaded positive and negative T5/CLIP embeddings.
    """
    if 'sd_pipe' not in globals():
        print("❌ SD 3.5 pipeline not loaded!")
        return None
    
    if loaded_t5_pos_embedding is None:
        print("❌ No positive T5 embedding loaded! Load a T5 positive embedding first.")
        return None
    
    print(f"Generating image from loaded embeddings...")
    print(f"  Guidance scale: {guidance_scale}")
    print()
    
    # Process POSITIVE T5 embedding
    print("POSITIVE T5:")
    print(f"  Shape: {loaded_t5_pos_embedding.shape}")
    print(f"  Prompt: '{loaded_t5_pos_prompt}'")
    
    t5_pos_tensor = torch.from_numpy(loaded_t5_pos_embedding.astype(np.float32)).to(
        device=device,
        dtype=torch.bfloat16
    ).unsqueeze(0)  # Add batch dimension
    
    # Process NEGATIVE T5 embedding
    if loaded_t5_neg_embedding is not None:
        print("\nNEGATIVE T5:")
        print(f"  Shape: {loaded_t5_neg_embedding.shape}")
        print(f"  Prompt: '{loaded_t5_neg_prompt}'")
        
        t5_neg_tensor = torch.from_numpy(loaded_t5_neg_embedding.astype(np.float32)).to(
            device=device,
            dtype=torch.bfloat16
        ).unsqueeze(0)
    else:
        print("\nNEGATIVE T5: Using default (empty)")
        t5_neg_tensor = None
    
    # Process POSITIVE CLIP embedding
    print("\nPOSITIVE CLIP:")
    if loaded_clip_pos_embedding is not None:
        print(f"  Shape: {loaded_clip_pos_embedding.shape}")
        print(f"  Prompt: '{loaded_clip_pos_prompt}'")
        
        # Use last token embedding as pooled embedding (EOS token)
        clip_pos_pooled = torch.from_numpy(
            loaded_clip_pos_embedding[-1:].astype(np.float32)
        ).to(device=device, dtype=torch.bfloat16)
    else:
        print("  Generating from T5 prompt using CLIP model...")
        # Generate default CLIP embedding from prompt
        with torch.no_grad():
            _, clip_pos_pooled, _ = sd_pipe.encode_prompt(
                prompt=loaded_t5_pos_prompt,
                prompt_2=None,
                prompt_3=None,
                device=device,
                num_images_per_prompt=1,
            )
    
    # Process NEGATIVE CLIP embedding
    if loaded_clip_neg_embedding is not None:
        print("\nNEGATIVE CLIP:")
        print(f"  Shape: {loaded_clip_neg_embedding.shape}")
        print(f"  Prompt: '{loaded_clip_neg_prompt}'")
        
        clip_neg_pooled = torch.from_numpy(
            loaded_clip_neg_embedding[-1:].astype(np.float32)
        ).to(device=device, dtype=torch.bfloat16)
    else:
        print("\nNEGATIVE CLIP: Using default (empty)")
        clip_neg_pooled = None
    
    # Construct filename
    t5_pos_tokens, t5_pos_manip = parse_embedding_filename(t5_pos_dropdown.value)
    t5_neg_tokens, t5_neg_manip = parse_embedding_filename(t5_neg_dropdown.value)
    clip_pos_tokens, clip_pos_manip = parse_embedding_filename(clip_pos_dropdown.value)
    clip_neg_tokens, clip_neg_manip = parse_embedding_filename(clip_neg_dropdown.value)
    
    # Build filename: t5pos_t5neg_clippos_clipneg_cfg{guidance}.png
    filename_parts = []
    filename_parts.append(f"t5pos_{t5_pos_tokens}{('_'+t5_pos_manip) if t5_pos_manip else ''}")
    if t5_neg_tokens != 'none':
        filename_parts.append(f"t5neg_{t5_neg_tokens}{('_'+t5_neg_manip) if t5_neg_manip else ''}")
    filename_parts.append(f"clippos_{clip_pos_tokens}{('_'+clip_pos_manip) if clip_pos_manip else ''}")
    if clip_neg_tokens != 'none':
        filename_parts.append(f"clipneg_{clip_neg_tokens}{('_'+clip_neg_manip) if clip_neg_manip else ''}")
    filename_parts.append(f"cfg{guidance_scale:.1f}")
    
    filename = '_'.join(filename_parts) + '.png'
    output_filepath = OUTPUT_IMAGES_DIR / filename
    
    # Generate image
    try:
        print(f"\nRunning SD 3.5 diffusion (28 steps)...")
        image = sd_pipe(
            prompt_embeds=t5_pos_tensor,
            negative_prompt_embeds=t5_neg_tensor,
            pooled_prompt_embeds=clip_pos_pooled,
            negative_pooled_prompt_embeds=clip_neg_pooled,
            num_inference_steps=28,
            guidance_scale=guidance_scale,
            height=1024,
            width=1024,
            generator=torch.manual_seed(seed)
        ).images[0]
        
        image.save(output_filepath)
        print(f"\n✓ Image generated and saved!")
        print(f"  Path: {output_filepath}")
        print(f"  Filename: {filename}")
        
        return image
        
    except Exception as e:
        print(f"❌ Error generating image: {e}")
        import traceback
        traceback.print_exc()
        return None

# Generation controls
seed_input = widgets.IntText(
    value=42,
    description='Seed:',
    style={'description_width': 'initial'}
)

guidance_input = widgets.FloatSlider(
    value=7.0,
    min=0.0,
    max=20.0,
    step=0.5,
    description='Guidance Scale:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px'),
    readout_format='.1f'
)

generate_button = widgets.Button(
    description='Generate Image',
    button_style='primary',
    layout=widgets.Layout(width='300px', height='50px')
)

generation_output = widgets.Output()

def on_generate_click(b):
    with generation_output:
        generation_output.clear_output(wait=True)
        
        image = generate_from_loaded_embeddings(
            seed=seed_input.value,
            guidance_scale=guidance_input.value
        )
        
        if image:
            display(image)

generate_button.on_click(on_generate_click)

display(widgets.VBox([
    widgets.HTML("<hr>"),
    widgets.HTML("<h3>3. Generate Image</h3>"),
    widgets.HTML("<p><b>Guidance Scale:</b> Higher values (7-15) follow the positive prompt more closely and avoid the negative prompt. Lower values (1-5) give more creative freedom.</p>"),
    seed_input,
    guidance_input,
    generate_button
]), generation_output)

## Summary

### Key Differences from FLUX:

1. **Negative Embeddings**: SD 3.5 supports negative T5 and CLIP embeddings to steer generation away from unwanted concepts
2. **Guidance Scale**: Control how strongly the model follows positive vs negative prompts (FLUX.1-schnell doesn't use guidance)
3. **More Inference Steps**: SD 3.5 uses 28 steps by default vs FLUX's 4 steps
4. **Different Architecture**: SD 3.5 uses traditional diffusion with CFG, FLUX uses rectified flow

### Workflow:

1. Load **positive** T5 embedding (required)
2. Load **negative** T5 embedding (optional)
3. Load **positive** CLIP embedding (required or auto-generated)
4. Load **negative** CLIP embedding (optional)
5. Adjust **guidance scale** (7.0 is default, 10-15 for stronger adherence)
6. Set **seed** for reproducibility
7. Generate and compare results!

### Experiment Ideas:

- Use manipulated embeddings (zeroed, scaled, inverted) as negative prompts
- Compare same embeddings with different guidance scales
- Mix and match: positive normal + negative zeroed
- Compare SD 3.5 vs FLUX results side-by-side