# Embedding Manipulation

Load and manipulate embeddings from CLIP or T5 models.

In [1]:
import numpy as np
import json
import os
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, clear_output

# Setup directories
current_dir = Path.cwd()
CLIP_EMBEDDINGS_DIR = current_dir.parent / "data/embeddings/CLIP"
T5_EMBEDDINGS_DIR = current_dir.parent / "data/embeddings/T5"

# Global variables
current_embedding = None
current_prompt = None
current_model_type = None
modified_embedding = None

print("✓ Setup complete!")
print(f"CLIP embeddings: {CLIP_EMBEDDINGS_DIR}")
print(f"T5 embeddings: {T5_EMBEDDINGS_DIR}")
#test

✓ Setup complete!
CLIP embeddings: /shares/weddigen.ki.uzh/laura_wagner/latent_vandalism_workshop/data/embeddings/CLIP
T5 embeddings: /shares/weddigen.ki.uzh/laura_wagner/latent_vandalism_workshop/data/embeddings/T5


In [2]:
# Function definitions

def update_file_list(change):
    """Update file dropdown when model type changes"""
    model_type = change['new']
    embedding_dir = CLIP_EMBEDDINGS_DIR if model_type == 'CLIP' else T5_EMBEDDINGS_DIR
    
    if embedding_dir.exists():
        json_files = sorted([f.name for f in embedding_dir.glob('*.json')])
        embedding_file_dropdown.options = json_files
    else:
        embedding_file_dropdown.options = []

def load_embedding(b):
    global current_embedding, current_prompt, current_model_type, modified_embedding
    
    with load_output:
        load_output.clear_output()
        
        model_type = model_type_dropdown.value
        filename = embedding_file_dropdown.value
        
        if not filename:
            print("❌ No file selected!")
            return
        
        embedding_dir = CLIP_EMBEDDINGS_DIR if model_type == 'CLIP' else T5_EMBEDDINGS_DIR
        filepath = embedding_dir / filename
        
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            current_embedding = np.array(data['embedding'])
            current_prompt = data.get('prompt', 'Unknown')
            current_model_type = model_type
            modified_embedding = None  # Reset modified embedding
            
            print(f"✓ Loaded {model_type} embedding!")
            print(f"  File: {filename}")
            print(f"  Prompt: '{current_prompt}'")
            print(f"  Shape: {current_embedding.shape}")
            print(f"  Size: {current_embedding.nbytes / 1024:.2f} KB")
            print(f"  Value range: [{current_embedding.min():.4f}, {current_embedding.max():.4f}]")
            
        except Exception as e:
            print(f"❌ Error loading embedding: {e}")

def update_params(change):
    """Update parameter widgets based on manipulation type"""
    manipulation = change['new']
    
    if manipulation == 'Scale':
        params_box.children = [scale_slider]
    elif manipulation == 'Invert':
        params_box.children = []
    elif manipulation == 'Zero Percentage':
        params_box.children = [zero_slider]

def apply_manipulation(b):
    global modified_embedding
    
    with manipulation_output:
        manipulation_output.clear_output()
        
        if current_embedding is None:
            print("❌ No embedding loaded! Load an embedding first.")
            return
        
        manipulation = manipulation_dropdown.value
        
        # Create a copy
        modified = current_embedding.copy()
        
        if manipulation == 'Scale':
            factor = scale_slider.value
            modified = modified * factor
            print(f"✓ Scaled embedding by {factor}x")
            print(f"  Original range: [{current_embedding.min():.4f}, {current_embedding.max():.4f}]")
            print(f"  Modified range: [{modified.min():.4f}, {modified.max():.4f}]")
            
        elif manipulation == 'Invert':
            modified = -modified
            print(f"✓ Inverted embedding values")
            print(f"  Original range: [{current_embedding.min():.4f}, {current_embedding.max():.4f}]")
            print(f"  Modified range: [{modified.min():.4f}, {modified.max():.4f}]")
            
        elif manipulation == 'Zero Percentage':
            percentage = zero_slider.value
            total_values = modified.size
            num_zeros = int(total_values * percentage)
            
            # Randomly zero out values
            flat_modified = modified.flatten()
            zero_indices = np.random.choice(total_values, num_zeros, replace=False)
            flat_modified[zero_indices] = 0.0
            modified = flat_modified.reshape(current_embedding.shape)
            
            print(f"✓ Zeroed out {num_zeros:,} values ({percentage*100:.1f}%)")
            print(f"  Original non-zero: {np.count_nonzero(current_embedding):,}")
            print(f"  Modified non-zero: {np.count_nonzero(modified):,}")
        
        modified_embedding = modified
        print(f"\n  Shape: {modified_embedding.shape}")
        print(f"  Size: {modified_embedding.nbytes / 1024:.2f} KB")

def save_modified(b):
    with save_output:
        save_output.clear_output()
        
        if modified_embedding is None:
            print("❌ No modified embedding to save! Apply a manipulation first.")
            return
        
        if current_model_type is None:
            print("❌ No model type detected!")
            return
        
        # Determine save directory
        save_dir = CLIP_EMBEDDINGS_DIR if current_model_type == 'CLIP' else T5_EMBEDDINGS_DIR
        os.makedirs(save_dir, exist_ok=True)
        
        # Get original filename and add manipulation suffix
        original_filename = embedding_file_dropdown.value
        base_name = original_filename.rsplit('.', 1)[0]
        
        # Create suffix based on manipulation type
        manipulation = manipulation_dropdown.value
        if manipulation == 'Scale':
            suffix = f"_scaled_{scale_slider.value}x"
        elif manipulation == 'Invert':
            suffix = "_inverted"
        elif manipulation == 'Zero Percentage':
            suffix = f"_zeroed_{int(zero_slider.value * 100)}pct"
        
        new_filename = f"{base_name}{suffix}.json"
        filepath = save_dir / new_filename
        
        # Save embedding
        embedding_data = {
            "prompt": current_prompt,
            "embedding": modified_embedding.tolist(),
            "shape": list(modified_embedding.shape),
            "manipulation": manipulation,
            "original_file": original_filename
        }
        
        with open(filepath, 'w') as f:
            json.dump(embedding_data, f)
        
        print(f"✓ Modified embedding saved!")
        print(f"  Directory: {save_dir}")
        print(f"  Filename: {new_filename}")
        print(f"  Size: {os.path.getsize(filepath) / 1024:.2f} KB")

print("✓ Functions loaded!")

✓ Functions loaded!


In [3]:
# Create all widgets

# Load section
model_type_dropdown = widgets.Dropdown(
    options=['CLIP', 'T5'],
    value='CLIP',
    description='Model:',
    style={'description_width': 'initial'}
)

embedding_file_dropdown = widgets.Dropdown(
    options=[],
    description='File:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

load_button = widgets.Button(
    description='Load Embedding',
    button_style='success'
)

load_output = widgets.Output()

# Manipulation section
manipulation_dropdown = widgets.Dropdown(
    options=['Scale', 'Invert', 'Zero Percentage'],
    value='Scale',
    description='Operation:',
    style={'description_width': 'initial'}
)

scale_slider = widgets.FloatSlider(
    value=2.0,
    min=0.1,
    max=5.0,
    step=0.1,
    description='Scale Factor:',
    style={'description_width': 'initial'}
)

zero_slider = widgets.FloatSlider(
    value=0.3,
    min=0.0,
    max=1.0,
    step=0.05,
    description='Zero %:',
    readout_format='.0%',
    style={'description_width': 'initial'}
)

params_box = widgets.VBox([scale_slider])

apply_button = widgets.Button(
    description='Apply Manipulation',
    button_style='warning'
)

manipulation_output = widgets.Output()

# Save section
save_button = widgets.Button(
    description='Save Modified Embedding',
    button_style='primary'
)

save_output = widgets.Output()

# Connect callbacks
model_type_dropdown.observe(update_file_list, names='value')
manipulation_dropdown.observe(update_params, names='value')
load_button.on_click(load_embedding)
apply_button.on_click(apply_manipulation)
save_button.on_click(save_modified)

# Initialize file list
update_file_list({'new': model_type_dropdown.value})

print("✓ Widgets created!")

✓ Widgets created!


In [4]:
# Display interface

# Create sections with headers
load_section = widgets.VBox([
    widgets.HTML("<h3>1. Load Embedding</h3>"),
    model_type_dropdown,
    embedding_file_dropdown,
    load_button,
    load_output
])

manipulation_section = widgets.VBox([
    widgets.HTML("<h3>2. Apply Manipulation</h3>"),
    manipulation_dropdown,
    params_box,
    apply_button,
    manipulation_output
])

save_section = widgets.VBox([
    widgets.HTML("<h3>3. Save Modified Embedding</h3>"),
    save_button,
    save_output
])

# Display all sections
interface = widgets.VBox([
    load_section,
    widgets.HTML("<hr>"),
    manipulation_section,
    widgets.HTML("<hr>"),
    save_section
])

display(interface)

VBox(children=(VBox(children=(HTML(value='<h3>1. Load Embedding</h3>'), Dropdown(description='Model:', options…