# 🧪 Compel Experiment - SDXL vs SD1.5 Prompt Weighting for Jewelry

## Testing Compel Library for Enhanced Prompt Adherence

This notebook compares baseline vs Compel-enhanced generation for SDXL and SD1.5.

**Key Features:**
- Side-by-side image comparison
- Jewelry-specific term weighting with `++` syntax
- Cross-model comparison (SDXL vs SD1.5)
- Quantitative evaluation with CLIP similarity
- Special test for engraved 'M' issue

---


In [None]:
# Install required packages (uncomment for Colab)
# %pip install torch torchvision diffusers transformers accelerate compel pillow matplotlib open-clip-torch

import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Check device and GPU info
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🖥️  Using device: {device}")
if torch.cuda.is_available():
    print(f"🚀 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("⚠️  No GPU detected - generation will be slow")

# Create output directory
os.makedirs("compel_results", exist_ok=True)
print("✅ Setup complete!")


In [None]:
# Load pipelines and Compel - FIXED VERSION
from compel import Compel, ReturnedEmbeddingsType
from diffusers import StableDiffusionXLPipeline, StableDiffusionPipeline

def load_pipeline_and_compel(model_id: str):
    """Load pipeline and corresponding Compel instance"""
    if "xl" in model_id.lower():
        # SDXL
        pipe = StableDiffusionXLPipeline.from_pretrained(
            model_id, variant="fp16", use_safetensors=True, torch_dtype=torch.float16
        ).to(device)
        compel_inst = Compel(
            tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
            text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
            requires_pooled=[False, True],
        )
        return pipe, compel_inst, True  # True = is_sdxl
    else:
        # SD1.5
        pipe = StableDiffusionPipeline.from_pretrained(
            model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32
        ).to(device)
        compel_inst = Compel(
            tokenizer=pipe.tokenizer,
            text_encoder=pipe.text_encoder,
            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
            requires_pooled=False,
        )
        return pipe, compel_inst, False  # False = not_sdxl

def generate_baseline_and_compel(pipe, compel_inst, is_sdxl, prompt, neg_prompt, w=768, h=768, steps=30, cfg=7.5, seed=42):
    """Generate baseline and Compel-enhanced images"""
    gen = torch.Generator(device=device).manual_seed(seed)
    
    # Baseline generation
    img_baseline = pipe(
        prompt=prompt,
        negative_prompt=neg_prompt,
        num_inference_steps=steps,
        guidance_scale=cfg,
        width=w, height=h,
        generator=gen,
    ).images[0]
    
    # Reset generator for consistent comparison
    gen = torch.Generator(device=device).manual_seed(seed)
    
    # Compel generation
    if is_sdxl:
        # SDXL: dual encoders, returns conditioning and pooled
        cond, pooled = compel_inst([prompt, neg_prompt])
        img_compel = pipe(
            prompt_embeds=cond[0:1], 
            pooled_prompt_embeds=pooled[0:1],
            negative_prompt_embeds=cond[1:2], 
            negative_pooled_prompt_embeds=pooled[1:2],
            num_inference_steps=steps,
            guidance_scale=5.0,  # SDXL typically uses lower CFG
            width=w, height=h,
            generator=gen,
        ).images[0]
    else:
        # SD1.5: single encoder
        pos_cond = compel_inst.build_conditioning_tensor(prompt)
        neg_cond = compel_inst.build_conditioning_tensor(neg_prompt)
        img_compel = pipe(
            prompt_embeds=pos_cond,
            negative_prompt_embeds=neg_cond,
            num_inference_steps=steps,
            guidance_scale=cfg,
            width=w, height=h,
            generator=gen,
        ).images[0]
    
    return img_baseline, img_compel

print("✅ Pipeline loading functions ready!")


In [None]:
# Test with the problematic 'M' engraving prompt specifically
print("🔍 Testing the engraved 'M' prompt with enhanced weighting...")

# Load SDXL for this test
pipe_sdxl, compel_sdxl, is_sdxl = load_pipeline_and_compel("stabilityai/stable-diffusion-xl-base-1.0")

# Original signet ring prompt
original_prompt = "modern signet ring, oval face, engraved gothic initial 'M', high-polish sterling silver, subtle reflection"

# Enhanced version with extra focus on the letter M
enhanced_prompt = (
    "modern signet++ ring, oval face, engraved++ gothic++ initial++ 'M'++, "
    "uppercase letter M+++, monogram M+++, blackletter+++, "
    "deep crisp engraving+++, high-polish sterling silver, subtle reflection, "
    "macro close-up of face, centered composition, shows letter clearly+++"
)

enhanced_negative = "vintage, ornate, fussy, cheap, low quality, blurry, deformed, ugly, no other letters, not blank, no pattern, blurry text"

print(f"Original: {original_prompt}")
print(f"Enhanced: {enhanced_prompt}")
print(f"Negative: {enhanced_negative}")

# Generate comparison
img_base, img_compel = generate_baseline_and_compel(
    pipe_sdxl, compel_sdxl, is_sdxl, 
    original_prompt, enhanced_negative,
    w=1024, h=1024, steps=30, cfg=5.0, seed=123
)

# Also test with the enhanced prompt
_, img_enhanced = generate_baseline_and_compel(
    pipe_sdxl, compel_sdxl, is_sdxl,
    enhanced_prompt, enhanced_negative, 
    w=1024, h=1024, steps=30, cfg=5.0, seed=123
)

# Display results
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes[0].imshow(img_base)
axes[0].set_title("Baseline (Original Prompt)", fontweight='bold')
axes[0].axis('off')

axes[1].imshow(img_compel)
axes[1].set_title("Compel (Original + Weighting)", fontweight='bold')
axes[1].axis('off')

axes[2].imshow(img_enhanced)
axes[2].set_title("Enhanced (Weighted + Extra Terms)", fontweight='bold')
axes[2].axis('off')

plt.tight_layout()
plt.savefig("compel_results/signet_M_comparison.png", dpi=150, bbox_inches='tight')
plt.show()

print("\n✅ Engraved 'M' test completed - check if the letter is more visible!")


# 🧪 Compel Experiment - SDXL Prompt Weighting for Jewelry

## Testing Compel Library for Enhanced Prompt Adherence

This notebook compares baseline SDXL generation vs Compel-enhanced generation for all 8 test prompts.

**Key Features:**
- Side-by-side image comparison
- Jewelry-specific term weighting with `++` syntax
- Quantitative evaluation with CLIP similarity
- Export results for analysis

---


## 🔧 Setup & Installation

**For Colab Users:**
1. Enable GPU: `Runtime` → `Change runtime type` → `GPU`
2. Install dependencies below


In [None]:
# Install required packages (uncomment for Colab)
# %pip install torch torchvision diffusers transformers accelerate compel pillow matplotlib open-clip-torch

# For local development, ensure you have:
# pip install compel>=2.0.0 open-clip-torch

import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Check device and GPU info
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🖥️  Using device: {device}")
if torch.cuda.is_available():
    print(f"🚀 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("⚠️  No GPU detected - generation will be slow")

# Create output directory
os.makedirs("compel_results", exist_ok=True)
print("✅ Setup complete!")


In [None]:
!pip install compel

In [None]:
# Load SDXL pipeline and Compel
from compel import Compel, ReturnedEmbeddingsType
from diffusers import StableDiffusionXLPipeline
import torch

print("🔄 Loading SDXL pipeline...")
pipeline = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16",
    use_safetensors=True,
    torch_dtype=torch.float16
).to(device)

print("🔄 Initializing Compel...")
compel = Compel(
    tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2],
    text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2],
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=[False, True]
)

print("✅ Pipeline and Compel ready!")


In [None]:
# Define the 8 test prompts and create Compel-enhanced versions
test_prompts = [
    "channel-set diamond eternity band, 2 mm width, hammered 18k yellow gold, product-only white background",
    "14k rose-gold threader earrings, bezel-set round lab diamond ends, lifestyle macro shot, soft natural light",
    "organic cluster ring with mixed-cut sapphires and diamonds, brushed platinum finish, modern aesthetic",
    "A solid gold cuff bracelet with blue sapphire, with refined simplicity and intentionally crafted for everyday wear",
    "modern signet ring, oval face, engraved gothic initial 'M', high-polish sterling silver, subtle reflection",
    "delicate gold huggie hoops, contemporary styling, isolated on neutral background",
    "stack of three slim rings: twisted gold, plain platinum, black rhodium pavé, editorial lighting",
    "bypass ring with stones on it, with refined simplicity and intentionally crafted for everyday wear"
]

# Create Compel-enhanced versions with ++ weighting for critical jewelry terms
def create_compel_prompt(prompt):
    """Add ++ weighting to critical jewelry terms for Compel"""
    # Critical jewelry terms to emphasize
    critical_terms = {
        "channel-set": "(channel-set)++",
        "threader": "threader++",
        "bezel-set": "(bezel-set)++",
        "eternity band": "(eternity band)++",
        "huggie": "huggie++",
        "bypass": "bypass++",
        "pavé": "pavé++",
        "signet": "signet++",
        "cuff": "cuff++",
        "cluster": "cluster++",
    }

    enhanced_prompt = prompt
    for term, weighted_term in critical_terms.items():
        if term in prompt.lower():
            # Replace with case-sensitive match
            enhanced_prompt = enhanced_prompt.replace(term, weighted_term)
    enhanced_prompt = enhanced_prompt + "modern++ (photo realistic)+++"


    return enhanced_prompt

# Create enhanced prompts
compel_prompts = [create_compel_prompt(prompt) for prompt in test_prompts]

# Display comparison
print("📝 Prompt Comparison:")
for i, (original, enhanced) in enumerate(zip(test_prompts, compel_prompts), 1):
    print(f"\n{i}. Original: {original}")
    print(f"   Enhanced: {enhanced}")

# Common negative prompt
negative_prompt = "vintage, ornate, fussy, cheap, low quality, blurry, deformed, ugly"
print(f"\n❌ Negative prompt: {negative_prompt}")


In [None]:
# Generation functions
def generate_baseline(prompt, seed=42):
    """Generate image using standard SDXL pipeline"""
    generator = torch.Generator(device=device).manual_seed(seed)

    image = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=30,
        guidance_scale=5.0,
        width=1024,
        height=1024,
        generator=generator
    ).images[0]

    return image

def generate_with_compel(prompt, seed=42):
    """Generate image using Compel-enhanced embeddings"""
    generator = torch.Generator(device=device).manual_seed(seed)

    # Create conditioning with Compel
    conditioning, pooled = compel([prompt, negative_prompt])

    # Generate image with embeddings
    image = pipeline(
        prompt_embeds=conditioning[0:1],
        pooled_prompt_embeds=pooled[0:1],
        negative_prompt_embeds=conditioning[1:2],
        negative_pooled_prompt_embeds=pooled[1:2],
        num_inference_steps=30,
        guidance_scale=5.0,
        width=1024,
        height=1024,
        generator=generator
    ).images[0]

    return image

def compare_prompts(original_prompt, compel_prompt, prompt_idx, seed=42):
    """Generate and compare baseline vs Compel images"""
    print(f"\n🎨 Generating images for prompt {prompt_idx}...")
    print(f"Original: {original_prompt[:80]}...")
    print(f"Compel:   {compel_prompt[:80]}...")

    # Generate both versions
    baseline_img = generate_baseline(original_prompt, seed)
    compel_img = generate_with_compel(compel_prompt, seed)

    # Save images
    baseline_img.save(f"compel_results/prompt_{prompt_idx:02d}_baseline.png")
    compel_img.save(f"compel_results/prompt_{prompt_idx:02d}_compel.png")

    # Create side-by-side comparison
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    axes[0].imshow(baseline_img)
    axes[0].set_title(f"Baseline (Prompt {prompt_idx})", fontsize=14, fontweight='bold')
    axes[0].axis('off')

    axes[1].imshow(compel_img)
    axes[1].set_title(f"Compel Enhanced (Prompt {prompt_idx})", fontsize=14, fontweight='bold')
    axes[1].axis('off')

    plt.tight_layout()
    plt.savefig(f"compel_results/comparison_{prompt_idx:02d}.png", dpi=150, bbox_inches='tight')
    plt.show()

    return baseline_img, compel_img

print("✅ Generation functions ready!")


## 🧪 Single Prompt Test

Test with one prompt first to verify everything works:


In [None]:
# Test with first prompt
test_idx = 1
original = test_prompts[0]
enhanced = compel_prompts[0]

print(f"🧪 Testing Prompt {test_idx}:")
print(f"Original: {original}")
print(f"Enhanced: {enhanced}")

# Generate comparison
baseline_img, compel_img = compare_prompts(original, enhanced, test_idx, seed=42)


## 🎯 Generate All 8 Prompts

Run this cell to generate all comparisons (will take some time):


In [None]:
# Generate all 8 prompts
print("🚀 Starting full experiment - generating all 8 prompts...")
print("⏱️  This will take approximately 8-16 minutes depending on your GPU")

results = []
for i, (original, enhanced) in enumerate(zip(test_prompts, compel_prompts), 1):
    try:
        baseline_img, compel_img = compare_prompts(original, enhanced, i, seed=42+i)
        results.append({
            'prompt_id': i,
            'original_prompt': original,
            'enhanced_prompt': enhanced,
            'baseline_image': baseline_img,
            'compel_image': compel_img
        })
        print(f"✅ Prompt {i}/8 completed")
    except Exception as e:
        print(f"❌ Error with prompt {i}: {e}")

print(f"\n🎉 Experiment completed! Generated {len(results)}/8 prompt comparisons")
print(f"📁 Results saved in: compel_results/")
print(f"📊 Check the comparison images to evaluate the differences")


## 📊 Quantitative Evaluation (Optional)

Add CLIP similarity scoring to measure prompt adherence quantitatively:


In [None]:
# CLIP Evaluation (uncomment if open-clip-torch is installed)
try:
    import open_clip

    # Ensure device is defined
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load CLIP model for evaluation
    clip_model, _, clip_preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
    clip_model = clip_model.to(device).eval()
    clip_tokenizer = open_clip.get_tokenizer('ViT-B-32')

    def calculate_clip_similarity(image, text):
        """Calculate CLIP similarity between image and text"""
        with torch.no_grad():
            # Preprocess image and text
            image_input = clip_preprocess(image).unsqueeze(0).to(device)
            text_input = clip_tokenizer([text])

            # Get embeddings
            image_features = clip_model.encode_image(image_input)
            text_features = clip_model.encode_text(text_input.to(device))

            # Normalize and calculate cosine similarity
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
            text_features = text_features / text_features.norm(dim=-1, keepdim=True)
            similarity = (image_features @ text_features.T).squeeze().item()

            return similarity

    # Evaluate results if we have them
    if 'results' in locals() and results:
        print("📊 CLIP Similarity Evaluation:")
        print("-" * 60)

        baseline_scores = []
        compel_scores = []

        for result in results:
            prompt_id = result['prompt_id']
            original_prompt = result['original_prompt']

            # Calculate similarities
            baseline_sim = calculate_clip_similarity(result['baseline_image'], original_prompt)
            compel_sim = calculate_clip_similarity(result['compel_image'], original_prompt)

            baseline_scores.append(baseline_sim)
            compel_scores.append(compel_sim)

            improvement = compel_sim - baseline_sim
            print(f"Prompt {prompt_id:2d}: Baseline={baseline_sim:.3f}, Compel={compel_sim:.3f}, Δ={improvement:+.3f}")

        # Calculate averages
        avg_baseline = sum(baseline_scores) / len(baseline_scores)
        avg_compel = sum(compel_scores) / len(compel_scores)
        avg_improvement = avg_compel - avg_baseline

        print("-" * 60)
        print(f"📈 Average Scores:")
        print(f"   Baseline:    {avg_baseline:.3f}")
        print(f"   Compel:      {avg_compel:.3f}")
        print(f"   Improvement: {avg_improvement:+.3f} ({avg_improvement/avg_baseline*100:+.1f}%)")

        if avg_improvement > 0:
            print("🎉 Compel shows improvement in prompt adherence!")
        else:
            print("📝 Baseline performs better - consider adjusting weighting strategy")

    print("✅ CLIP evaluation available")

except ImportError:
    print("⚠️  open-clip-torch not installed - skipping quantitative evaluation")
    print("   Install with: pip install open-clip-torch")
except Exception as e:
    print(f"❌ Error in CLIP evaluation: {e}")


## 📊 Export Prompt Comparison CSV

Generate a CSV file with original vs Compel-enhanced prompts for analysis:


In [None]:
# Export prompt comparison to CSV
import csv
import pandas as pd

def export_prompt_comparison():
    """Export original and Compel-enhanced prompts to CSV"""

    # Create comparison data
    comparison_data = []
    for i, (original, enhanced) in enumerate(zip(test_prompts, compel_prompts), 1):
        comparison_data.append({
            'prompt_id': i,
            'original_prompt': original,
            'compel_enhanced_prompt': enhanced,
            'changes': ', '.join([term + '++' for term in ['channel-set', 'threader', 'bezel-set', 'eternity band', 'huggie', 'bypass', 'pavé', 'signet', 'cuff', 'cluster', 'diamond', 'sapphire', 'gold', 'platinum'] if term in original.lower()])
        })

    # Create DataFrame
    df = pd.DataFrame(comparison_data)

    # Save to CSV
    csv_filename = "compel_results/prompt_comparison.csv"
    df.to_csv(csv_filename, index=False)

    # Display the comparison
    print("📊 Prompt Comparison Table:")
    print("=" * 100)
    for _, row in df.iterrows():
        print(f"\n🔢 Prompt {row['prompt_id']}:")
        print(f"   Original: {row['original_prompt'][:80]}...")
        print(f"   Enhanced: {row['compel_enhanced_prompt'][:80]}...")
        if row['changes']:
            print(f"   Weighted: {row['changes']}")

    print(f"\n💾 CSV saved to: {csv_filename}")
    print(f"📋 Total prompts: {len(df)}")

    # Show summary of changes
    all_changes = []
    for _, row in df.iterrows():
        if row['changes']:
            all_changes.extend(row['changes'].split(', '))

    from collections import Counter
    change_counts = Counter(all_changes)

    print(f"\n📈 Most weighted terms:")
    for term, count in change_counts.most_common(5):
        print(f"   {term}: {count} times")

    return df

# Run the export
prompt_df = export_prompt_comparison()

# Display first few rows
print(f"\n📋 Preview of CSV data:")
print(prompt_df[['prompt_id', 'original_prompt', 'compel_enhanced_prompt']].head(3).to_string(max_colwidth=50))


## 📋 Results Summary

The notebook generates:

### 📁 **Files Created:**
- `compel_results/prompt_XX_baseline.png` - Baseline generations
- `compel_results/prompt_XX_compel.png` - Compel-enhanced generations  
- `compel_results/comparison_XX.png` - Side-by-side comparisons
- `compel_results/prompt_comparison.csv` - CSV with original vs enhanced prompts

### 🔍 **What to Look For:**

**Visual Differences:**
- **Prompt adherence**: Does Compel better capture specific jewelry terms?
- **Detail quality**: Are jewelry features more defined/accurate?
- **Style consistency**: Modern vs vintage aesthetic differences

**Quantitative Metrics:**
- **CLIP similarity scores**: Higher = better prompt adherence
- **Average improvement**: Overall lift from Compel weighting

### 🎯 **Next Steps:**
1. **Visual inspection**: Compare side-by-side images
2. **Quantitative analysis**: Review CLIP similarity scores
3. **CSV analysis**: Use the exported CSV for systematic comparison
4. **Fine-tuning**: Adjust `++` weights based on results
5. **Integration**: If successful, integrate into main pipeline

---

**💡 Pro Tip:** Try different weighting levels (`+`, `++`, `+++`) for terms that show the most improvement!


## 🔬 Compare SDXL vs SD 1.5 with and without Compel

This section loads each model (SDXL and SD 1.5), generates baseline and Compel-enhanced images for all 8 prompts at 768×768, and optionally computes CLIP adherence.


In [None]:
# Utilities for loading pipelines with/without Compel
from diffusers import StableDiffusionPipeline

class ModelVariant:
    SD15 = "runwayml/stable-diffusion-v1-5"
    SDXL = "stabilityai/stable-diffusion-xl-base-1.0"

def load_pipeline(model_id: str, device: str = device):
    if "xl" in model_id.lower():
        pipe = StableDiffusionXLPipeline.from_pretrained(
            model_id, variant="fp16", use_safetensors=True, torch_dtype=torch.float16
        ).to(device)
        compel_inst = Compel(
            tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
            text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
            requires_pooled=[False, True],
        )
        return pipe, compel_inst
    else:
        pipe = StableDiffusionPipeline.from_pretrained(
            model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32
        ).to(device)
        # For SD1.5 we can still use Compel with single tokenizer/encoder
        compel_inst = Compel(
            tokenizer=pipe.tokenizer,
            text_encoder=pipe.text_encoder,
            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
            requires_pooled=False,
        )
        return pipe, compel_inst


def generate_pair(pipe, compel_inst, base_prompt: str, neg_prompt: str, w: int=768, h: int=768, steps: int=30, cfg: float=7.5, seed: int=42):
    gen = torch.Generator(device=device).manual_seed(seed)
    # Baseline
    img_base = pipe(
        prompt=base_prompt,
        negative_prompt=neg_prompt,
        num_inference_steps=steps,
        guidance_scale=cfg,
        width=w, height=h,
        generator=gen,
    ).images[0]
    # Compel
    if isinstance(compel_inst.tokenizer, list):
        cond, pooled = compel_inst([base_prompt, neg_prompt])
        img_compel = pipe(
            prompt_embeds=cond[0:1], pooled_prompt_embeds=pooled[0:1],
            negative_prompt_embeds=cond[1:2], negative_pooled_prompt_embeds=pooled[1:2],
            num_inference_steps=steps, guidance_scale=(5.0 if "xl" in pipe.__class__.__name__.lower() else cfg),
            width=w, height=h, generator=gen,
        ).images[0]
    else:
        cond = compel_inst.build_conditioning_tensor(base_prompt)
        ncond = compel_inst.build_conditioning_tensor(neg_prompt)
        img_compel = pipe(
            prompt_embeds=cond, negative_prompt_embeds=ncond,
            num_inference_steps=steps, guidance_scale=cfg,
            width=w, height=h, generator=gen,
        ).images[0]
    return img_base, img_compel


In [None]:
# Run comparison for SDXL and SD 1.5
models = [ModelVariant.SDXL, ModelVariant.SD15]
results_compare = {}

for mid in models:
    print(f"\n🧪 Testing model: {mid}")
    pipe, compel_inst = load_pipeline(mid)
    model_key = 'SDXL' if 'xl' in mid.lower() else 'SD15'
    results_compare[model_key] = []

    for i, (orig, enh) in enumerate(zip(test_prompts, compel_prompts), 1):
        try:
            img_b, img_c = generate_pair(pipe, compel_inst, orig, negative_prompt, w=768, h=768, steps=28, cfg=(5.0 if model_key=='SDXL' else 7.5), seed=100+i)
            # Save
            base_path = f"compel_results/{model_key.lower()}"
            os.makedirs(base_path, exist_ok=True)
            img_b.save(f"{base_path}/p{i:02d}_baseline.png")
            img_c.save(f"{base_path}/p{i:02d}_compel.png")
            results_compare[model_key].append((img_b, img_c, orig))
            print(f"  ✅ Prompt {i} done")
        except Exception as e:
            print(f"  ❌ Prompt {i} failed: {e}")

print("\n✅ Finished generating for both models.")


In [None]:
# Optional: CLIP evaluation across models if open_clip is available
try:
    import open_clip
    device = "cuda" if torch.cuda.is_available() else "cpu"
    clip_model, _, clip_preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
    clip_model = clip_model.to(device).eval()
    clip_tokenizer = open_clip.get_tokenizer('ViT-B-32')

    def clip_score(image, text):
        with torch.no_grad():
            image_in = clip_preprocess(image).unsqueeze(0).to(device)
            text_in = clip_tokenizer([text]).to(device)
            im = clip_model.encode_image(image_in)
            tx = clip_model.encode_text(text_in)
            im = im / im.norm(dim=-1, keepdim=True)
            tx = tx / tx.norm(dim=-1, keepdim=True)
            return float((im @ tx.T).squeeze().item())

    for model_key, rows in results_compare.items():
        print(f"\n📊 CLIP Adherence for {model_key}")
        base_scores, compel_scores = [], []
        for (img_b, img_c, text) in rows:
            base_scores.append(clip_score(img_b, text))
            compel_scores.append(clip_score(img_c, text))
        if base_scores and compel_scores:
            mb, mc = sum(base_scores)/len(base_scores), sum(compel_scores)/len(compel_scores)
            print(f"   Baseline: {mb:.3f}  Compel: {mc:.3f}  Δ={mc-mb:+.3f}")
        else:
            print("   No results to score.")
    print("\n✅ Cross-model CLIP evaluation complete")
except Exception as e:
    print(f"⚠️ Skipping cross-model CLIP evaluation: {e}")
