In [2]:
prompt = """
A photorealistic close-up of a single, iridescent hummingbird hovering mid-air, its wings a blur of sapphire and emerald, drinking nectar from a luminous, bioluminescent flower that emits soft, swirling particles of golden light. The background is a hyper-detailed, otherworldly jungle at twilight, with colossal, crystalline trees reflecting a nebula-filled sky. In the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. The overall atmosphere should be one of serene magic and vibrant detail.
"""
negative_prompt = "cartoon, anime, poor quality, poor clarity, ugly, jpeg artifacts, cropped, lowres, error, out of frame, watermark"

# Global parameters
width, height = 1536, 640
system_prompt = "You are an art expert with emphasis in whimsical photorealistic creations, please convey intricate details with the highest degree of aesthetics: "

# Model-specific parameters - customize as needed
models = {
    "cog4": {"id": "THUDM/CogView4-6B", "qty": 3, "base_cfg": 4, "cfg_step": 0.5, "base_steps": 25, "step_incr": 5},
    "lumina2": {"id": "Alpha-VLLM/Lumina-Image-2.0", "qty": 2, "base_cfg": 4.5, "cfg_step": 0.5, "base_steps": 25, "step_incr": 5, "cfg_trunc": 1.1, "cfg_norm": True},
    "aura3": {"id": "fal/AuraFlow-v0.3", "qty": 2, "base_cfg": 4.5, "cfg_step": 0.5, "base_steps": 25, "step_incr": 5},
    "sd35_med": {"id": "stabilityai/stable-diffusion-3.5-medium", "qty": 4, "base_cfg": 5, "cfg_step": 0.7, "base_steps": 30, "step_incr": 5},
    "kolors": {"id": "Kwai-Kolors/Kolors-diffusers", "qty": 6, "base_cfg": 3.5, "cfg_step": 0.5, "base_steps": 25, "step_incr": 5},
    "sd35_large": {"id": "stabilityai/stable-diffusion-3.5-large", "qty": 3, "base_cfg": 4.5, "cfg_step": 0.7, "base_steps": 20, "step_incr": 5}
}

import diffusers, torch, time, gc, os, subprocess, math, json
from datetime import datetime

def flush(): gc.collect(); torch.cuda.empty_cache()
def bytes_to_giga_bytes(bytes): return bytes / 1024 / 1024 / 1024
device, dtype = "cuda", torch.bfloat16

# Stats collection
stats = {"start_time": time.time(), "models": {}, "total_images": 0, "total_vram_gb": 0, "total_gen_time": 0}
for model in models: stats["models"][model] = {"gen_times": [], "vram_usage": [], "images": []}

# Run all models and collect stats
def run_all_models():
    # CogView4
    run_cog4()
    
    # Lumina2
    run_lumina2()
    
    # AuraFlow
    run_aura3()
    
    # SD 3.5 Medium
    run_sd35_medium()
    
    # Kolors
    run_kolors()
    
    # SD 3.5 Large
    run_sd35_large()
    
    # Print summary
    print_summary()

def run_cog4():
    model_key = "cog4"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_start = time.time()
    emb_prompts = diffusers.DiffusionPipeline.from_pretrained(model_id, transformer=None, vae=None, torch_dtype=dtype).to(device)

    with torch.no_grad():
        (prompt_embeds, negative_prompt_embeds) = emb_prompts.encode_prompt(prompt=prompt, negative_prompt=negative_prompt)

    del emb_prompts
    flush()
    print(f"   ... {model_key} Prompts embedded.. {time.time() - time_start:.2f} seconds, Max vram: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated()):.2f} GB\n   ... Generating {model_cfg['qty']} Images..")

    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, text_encoder=None, tokenizer=None, torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]): 
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]
        
        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(prompt_embeds=prompt_embeds.to(device).to(dtype), negative_prompt_embeds=negative_prompt_embeds.to(device).to(dtype), guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height).images[0]
        gen_time = time.time() - time_gen_start
            
        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)

    del pipeline
    flush()

def run_lumina2():
    model_key = "lumina2"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_gen = time.time()
    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]): 
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]

        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(system_prompt=system_prompt, prompt=prompt, negative_prompt=negative_prompt, guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height, cfg_trunc_ratio=model_cfg["cfg_trunc"], cfg_normalization=model_cfg["cfg_norm"]).images[0]
        gen_time = time.time() - time_gen_start

        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)

    del pipeline
    flush()

def run_aura3():
    model_key = "aura3"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_gen = time.time()
    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]): 
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]
        
        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(prompt=prompt, negative_prompt=negative_prompt, guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height).images[0]
        gen_time = time.time() - time_gen_start
            
        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)
    
    del pipeline
    flush()

def run_sd35_medium():
    model_key = "sd35_med"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_gen = time.time()
    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]):
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]

        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(prompt=prompt, prompt_2=prompt, prompt_3=prompt, negative_prompt=negative_prompt, guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height).images[0]
        gen_time = time.time() - time_gen_start

        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)

    del pipeline
    flush()

def run_kolors():
    model_key = "kolors"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_start = time.time()
    emb_prompts = diffusers.DiffusionPipeline.from_pretrained(model_id, unet=None, vae=None, variant="fp16", torch_dtype=dtype).to(device)

    with torch.no_grad():
        (prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds,) = emb_prompts.encode_prompt(prompt=prompt, negative_prompt=negative_prompt)
    del emb_prompts
    flush()
    
    print(f"   ... {model_key} Prompts embedded.. {time.time() - time_start:.2f} seconds, Max vram: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated()):.2f} GB\n   ... Generating {model_cfg['qty']} Images..")

    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, text_encoder=None, tokenizer=None, variant="fp16", torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]):
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]
        
        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(prompt_embeds=prompt_embeds.to(device).to(dtype), negative_prompt_embeds=negative_prompt_embeds.to(device).to(dtype), pooled_prompt_embeds=pooled_prompt_embeds.to(device).to(dtype), negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device).to(dtype), guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height).images[0]
        gen_time = time.time() - time_gen_start
            
        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)
        
    del pipeline
    flush()

def run_sd35_large():
    model_key = "sd35_large"
    model_cfg = models[model_key]
    model_id = model_cfg["id"]
    
    time_start = time.time()
    emb_prompts = diffusers.DiffusionPipeline.from_pretrained(model_id, transformer=None, vae=None, torch_dtype=dtype).to(device)

    with torch.no_grad():
        (prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds,) = emb_prompts.encode_prompt(prompt=prompt, prompt_2=prompt, prompt_3=prompt, negative_prompt=negative_prompt)

    del emb_prompts
    flush()
    
    print(f"   ... {model_key} Prompts embedded.. {time.time() - time_start:.2f} seconds, Max vram: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated()):.2f} GB\n   ... Generating {model_cfg['qty']} Images..")

    pipeline = diffusers.DiffusionPipeline.from_pretrained(model_id, text_encoder=None, text_encoder_2=None, text_encoder_3=None, tokenizer=None, tokenizer_2=None, tokenizer_3=None, torch_dtype=dtype).to(device)

    for i in range(model_cfg["qty"]):
        g_scale = model_cfg["base_cfg"] + i * model_cfg["cfg_step"]
        steps = model_cfg["base_steps"] + i * model_cfg["step_incr"]
        
        time_gen_start = time.time()
        with torch.inference_mode():
            image = pipeline(prompt_embeds=prompt_embeds.to(device).to(dtype), negative_prompt_embeds=negative_prompt_embeds.to(device).to(dtype), pooled_prompt_embeds=pooled_prompt_embeds.to(device).to(dtype), negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device).to(dtype), guidance_scale=g_scale, num_inference_steps=steps, width=width, height=height).images[0]
        gen_time = time.time() - time_gen_start
            
        filename = f"{model_key}_cfg_{g_scale:.1f}_steps_{int(steps)}_{str(int(time.time()))}.png"
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,temperature.gpu,utilization.gpu', '--format=csv,noheader'], encoding='utf-8', timeout=1.0)
        image.save(filename)
        os.startfile(filename)
        
        vram_used = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
        print(f"   ... Generated in {gen_time:.2f} secs, mem/temp/use: {result.strip()}   ... Max mem: {vram_used:.2f} GB")
        
        # Save stats
        stats["models"][model_key]["gen_times"].append(gen_time)
        stats["models"][model_key]["vram_usage"].append(vram_used)
        stats["models"][model_key]["images"].append({"filename": filename, "cfg": g_scale, "steps": steps, "time": gen_time})
        stats["total_images"] += 1
        stats["total_gen_time"] += gen_time
        stats["total_vram_gb"] = max(stats["total_vram_gb"], vram_used)
        
    del pipeline
    flush()

def print_summary():
    total_time = time.time() - stats["start_time"]
    avg_image_size_kb = 2000 
    image_bytes = (avg_image_size_kb * 1024) * stats["total_images"]
    vram_bytes = stats["total_vram_gb"] * 1024 * 1024 * 1024
    
    # Calculate model averages
    model_avgs = {}
    fastest_model = {"name": "", "time": float('inf')}
    slowest_model = {"name": "", "time": 0}
    most_efficient = {"name": "", "ratio": 0}  # Time per step

    for model_name, model_data in stats["models"].items():
        if not model_data["gen_times"]: continue
        avg_time = sum(model_data["gen_times"]) / len(model_data["gen_times"])
        avg_vram = sum(model_data["vram_usage"]) / len(model_data["vram_usage"])
        avg_steps = sum(image["steps"] for image in model_data["images"]) / len(model_data["images"])
        time_per_step = avg_time / avg_steps
        
        model_avgs[model_name] = {
            "avg_time": avg_time,
            "avg_vram": avg_vram,
            "time_per_step": time_per_step
        }
        
        # Track fastest/slowest
        if avg_time < fastest_model["time"]:
            fastest_model = {"name": model_name, "time": avg_time}
        if avg_time > slowest_model["time"]:
            slowest_model = {"name": model_name, "time": avg_time}
        
        # Track efficiency (lower is better)
        efficiency_ratio = time_per_step
        if most_efficient["name"] == "" or efficiency_ratio < most_efficient["ratio"]:
            most_efficient = {"name": model_name, "ratio": efficiency_ratio}
    
    # Fun data comparisons
    fun_facts = [
        f"The total raw image data generated ({image_bytes/1024/1024:.1f} MB) is equivalent to approximately {image_bytes/150000:.1f} floppy disks from the 1990s.",
        f"Peak VRAM usage of {stats['total_vram_gb']:.2f} GB is approximately {stats['total_vram_gb']/0.000001:.0f}x the total RAM available in the first IBM PC (1981).",
        f"You generated {stats['total_images']} images in {total_time:.1f} seconds - that's {stats['total_images']/(total_time/60):.1f} images per minute!",
        f"The computational power used would have required a supercomputer the size of a room in the early 2000s.",
        f"In 1995, this generation would have taken approximately {total_time * 1000:.0f} hours on high-end hardware."
    ]
    
    # Print summary report
    print("\n" + "="*80)
    print(f"📊 IMAGE GENERATION BENCHMARK SUMMARY - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("="*80)
    print(f"🖼️  Total Images: {stats['total_images']} images across {len(stats['models'])} models")
    print(f"⏱️  Total Runtime: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")
    print(f"💾  Peak VRAM Usage: {stats['total_vram_gb']:.2f} GB")
    print(f"⚡  Avg Generation Time: {stats['total_gen_time']/stats['total_images']:.2f} seconds per image")
    print(f"🚀  Fastest Model: {fastest_model['name']} ({fastest_model['time']:.2f} seconds avg)")
    print(f"🐢  Slowest Model: {slowest_model['name']} ({slowest_model['time']:.2f} seconds avg)")
    print(f"✨  Most Efficient: {most_efficient['name']} ({most_efficient['ratio']:.4f} sec/step)")
    
    print("\n📈 MODEL COMPARISON:")
    print("-"*80)
    print(f"{'Model':12} | {'Avg Time (s)':12} | {'Avg VRAM (GB)':14} | {'Time/Step (s)':12}")
    print("-"*80)
    for model_name, data in model_avgs.items():
        print(f"{model_name:12} | {data['avg_time']:12.2f} | {data['avg_vram']:14.2f} | {data['time_per_step']:12.4f}")
    
    print("\n🎮 FUN FACTS:")
    for fact in fun_facts:
        print(f"  • {fact}")
    
    print("\n📝 PROMPT:")
    print(f"  \"{prompt[:100]}{'...' if len(prompt) > 100 else ''}\"")
    
    # Save stats to file
    with open(f"benchmark_stats_{int(time.time())}.json", "w") as f:
        json.dump(stats, f)
    
    print("\n✅ Complete! Stats saved to benchmark_stats_[timestamp].json")
    print("="*80)

# Execute all model runs
if __name__ == "__main__":
    run_all_models()


Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

   ... cog4 Prompts embedded.. 20.48 seconds, Max vram: 18.18 GB
   ... Generating 3 Images..


Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

   ... Generated in 23.59 secs, mem/temp/use: 22584 MiB, 70, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/30 [00:00<?, ?it/s]

   ... Generated in 27.99 secs, mem/temp/use: 22673 MiB, 72, 97 %   ... Max mem: 18.18 GB


  0%|          | 0/35 [00:00<?, ?it/s]

   ... Generated in 32.71 secs, mem/temp/use: 22770 MiB, 81, 98 %   ... Max mem: 18.18 GB


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

   ... Generated in 11.91 secs, mem/temp/use: 16677 MiB, 74, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/30 [00:00<?, ?it/s]

   ... Generated in 14.30 secs, mem/temp/use: 16699 MiB, 83, 100 %   ... Max mem: 18.18 GB


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

   ... Generated in 19.24 secs, mem/temp/use: 22473 MiB, 82, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/30 [00:00<?, ?it/s]

   ... Generated in 23.22 secs, mem/temp/use: 22480 MiB, 84, 100 %   ... Max mem: 18.18 GB


Loading pipeline components...:   0%|          | 0/9 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (117 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']
Token indices sequence length is longer than the specified maximum sequence length for this model (117 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']


  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']


   ... Generated in 7.24 secs, mem/temp/use: 21975 MiB, 65, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/35 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']


   ... Generated in 8.01 secs, mem/temp/use: 21975 MiB, 82, 99 %   ... Max mem: 18.18 GB


  0%|          | 0/40 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']


   ... Generated in 9.11 secs, mem/temp/use: 21975 MiB, 78, 99 %   ... Max mem: 18.18 GB


  0%|          | 0/45 [00:00<?, ?it/s]

   ... Generated in 10.23 secs, mem/temp/use: 21975 MiB, 84, 98 %   ... Max mem: 18.18 GB


Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

   ... kolors Prompts embedded.. 12.38 seconds, Max vram: 18.18 GB
   ... Generating 6 Images..


Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

   ... Generated in 3.31 secs, mem/temp/use: 11469 MiB, 79, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/30 [00:00<?, ?it/s]

   ... Generated in 3.95 secs, mem/temp/use: 11469 MiB, 75, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/35 [00:00<?, ?it/s]

   ... Generated in 4.56 secs, mem/temp/use: 11468 MiB, 73, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/40 [00:00<?, ?it/s]

   ... Generated in 5.06 secs, mem/temp/use: 11468 MiB, 82, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/45 [00:00<?, ?it/s]

   ... Generated in 5.66 secs, mem/temp/use: 11476 MiB, 83, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/50 [00:00<?, ?it/s]

   ... Generated in 6.30 secs, mem/temp/use: 11469 MiB, 84, 100 %   ... Max mem: 18.18 GB


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (117 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']
Token indices sequence length is longer than the specified maximum sequence length for this model (117 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['filled sky. in the foreground, a single dewdrop clings precariously to a spiderweb woven with threads of pure silver. the overall atmosphere should be one of serene magic and vibrant detail.']


   ... sd35_large Prompts embedded.. 12.78 seconds, Max vram: 18.18 GB
   ... Generating 3 Images..


Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

   ... Generated in 11.43 secs, mem/temp/use: 22154 MiB, 81, 99 %   ... Max mem: 18.18 GB


  0%|          | 0/25 [00:00<?, ?it/s]

   ... Generated in 13.96 secs, mem/temp/use: 22178 MiB, 84, 100 %   ... Max mem: 18.18 GB


  0%|          | 0/30 [00:00<?, ?it/s]

   ... Generated in 16.78 secs, mem/temp/use: 22169 MiB, 85, 99 %   ... Max mem: 18.18 GB

📊 IMAGE GENERATION BENCHMARK SUMMARY - 2025-04-14 03:58:59
🖼️  Total Images: 20 images across 6 models
⏱️  Total Runtime: 409.48 seconds (6.82 minutes)
💾  Peak VRAM Usage: 18.18 GB
⚡  Avg Generation Time: 12.93 seconds per image
🚀  Fastest Model: kolors (4.81 seconds avg)
🐢  Slowest Model: cog4 (28.10 seconds avg)
✨  Most Efficient: kolors (0.1282 sec/step)

📈 MODEL COMPARISON:
--------------------------------------------------------------------------------
Model        | Avg Time (s) | Avg VRAM (GB)  | Time/Step (s)
--------------------------------------------------------------------------------
cog4         |        28.10 |          18.18 |       0.9367
lumina2      |        13.10 |          18.18 |       0.4765
aura3        |        21.23 |          18.18 |       0.7721
sd35_med     |         8.65 |          18.18 |       0.2306
kolors       |         4.81 |          18.18 |       0.1282
sd35_