In [None]:
from diffusers import StableDiffusionXLPipeline, ControlNetModel,StableDiffusionXLImg2ImgPipeline
import torch
from PIL import Image
import os, json, time, gc, shutil


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

print("CUDA available:", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

CUDA available: True
Device name: NVIDIA GeForce RTX 4070


In [3]:
# Load your dataset (replace with your path)
with open("pokemon_dataset_100.json", "r") as f:
    pokemon_pairs = json.load(f)[:5]

print(type(pokemon_pairs))   # <class 'list'>
print(len(pokemon_pairs))    # 100 entries

<class 'list'>
5


In [4]:
pokemon_pairs = [
    {
        "base_name": "Flamkit",
        "evolved_name": "Pyrolynx",
        "base": {
            "prompt": "small fiery lynx cub, orange and gold fur with ember tips, glowing paws, anime game art, cel-shaded, cute and energetic, full body, vibrant warm lighting, clean lineart",
            "negative": "text, watermark, signature, extra limbs, bad anatomy, lowres, deformed, human form, armor, humanoid"
        },
        "evolved": {
            "prompt": (
                "Evolved warrior version of the creature in the image: bipedal feline warrior "
                "with two legs and two arms, sleek flaming armor forming from its mane, glowing claws and tail embers, "
                "retains orange-gold palette and feline facial features, upright battle stance, anime RPG art style, "
                "cel-shaded, dynamic lighting, clean lineart, coherent humanoid anatomy, high detail"
            ),
            "negative": (
                "quadruped, four legs, deer, dog, cat on all fours, hooves, animal muzzle, flat lighting, blurry, "
                "text, watermark, malformed body, low quality, distorted proportions"
            ),
            "strength": 0.8,
            "guidance_scale": 7.4
        }
    },
    {
        "base_name": "Spriggle",
        "evolved_name": "Verdawn",
        "base": {
            "prompt": "tiny green reptile sprouting leaves along its spine, big eyes, curious expression, anime cel-shaded, vibrant jungle background, full body",
            "negative": "text, watermark, signature, armor, humanoid, metallic, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved humanoid forest guardian version of the creature in the image: tall bipedal lizard with arms and legs wrapped in vines, "
                "wooden bark armor forming on shoulders, leafy antler crown, glowing green eyes, retains bright green and earthy brown palette, "
                "majestic stance, anime fantasy art, cel-shaded, clean lineart, detailed texture work, coherent anatomy"
            ),
            "negative": (
                "quadruped, small creature, insectoid, blob-like, mechanical, metallic, low quality, flat colors, "
                "extra limbs, malformed anatomy, text, watermark"
            ),
            "strength": 0.75,
            "guidance_scale": 7.2
        }
    },
    {
        "base_name": "Aquibbit",
        "evolved_name": "Hydrap",
        "base": {
            "prompt": "small blue amphibian creature with bubble-like skin, wide smile, tiny fins, anime style, cel-shaded, water reflections, full body",
            "negative": "text, watermark, signature, humanoid, armor, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved sea-serpent version of the creature in the image: long aquatic dragon with three flowing fins on each side, "
                "bioluminescent markings, glowing underbelly, water swirling around, retains deep blue and cyan palette, "
                "anime fantasy art, cel-shaded, fluid composition, dynamic lighting, high detail"
            ),
            "negative": (
                "frog-like, bipedal humanoid, mammal features, extra limbs, mechanical, lowres, blurry, bad perspective, text, watermark"
            ),
            "strength": 0.7,
            "guidance_scale": 7.0
        }
    },
    {
        "base_name": "Volpup",
        "evolved_name": "Stormane",
        "base": {
            "prompt": "small electric fox cub, yellow fur with blue lightning streaks, anime cel-shaded, clean lineart, energetic expression, full body",
            "negative": "text, watermark, signature, humanoid, extra limbs, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved humanoid thunder guardian version of the creature in the image: upright fox-like warrior with blue lightning armor, "
                "spiked tail glowing with electric energy, storm aura around body, retains yellow-blue palette, anime RPG art style, "
                "cel-shaded, dramatic pose, coherent humanoid anatomy, heroic silhouette, glowing eyes"
            ),
            "negative": (
                "quadruped fox, four-legged animal, hooves, dog body, flat lighting, messy lineart, lowres, malformed limbs, text, watermark"
            ),
            "strength": 0.85,
            "guidance_scale": 7.6
        }
    },
    {
        "base_name": "Frystail",
        "evolved_name": "Cryovian",
        "base": {
            "prompt": "small ice fox creature, white and cyan fur, frosty breath, anime game art, cel-shaded, full body, snowy background, cute expression",
            "negative": "text, watermark, signature, armor, humanoid, fire, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved spirit form of the creature in the image: ethereal bipedal fox spirit with flowing frost robes, "
                "crystal antlers, glowing cyan eyes, body made of semi-transparent ice shards, retains cyan-white color palette, "
                "anime fantasy art, high detail, cel-shaded, elegant posture, magical aura"
            ),
            "negative": (
                "animal fox, quadruped, solid metal, mechanical, distorted anatomy, melted textures, text, watermark, low quality"
            ),
            "strength": 0.8,
            "guidance_scale": 7.5
        }
    },
    {
        "base_name": "Tuskip",
        "evolved_name": "Ironusk",
        "base": {
            "prompt": "small gray boar creature with tiny tusks and rough fur, muddy terrain, anime cel-shaded, cute yet tough, full body",
            "negative": "text, watermark, signature, humanoid, armor, deformed, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved armored beast form of the creature in the image: massive iron-plated boar with glowing red tusks, molten cracks along armor, "
                "steam rising from nostrils, retains gray and brown palette, anime battle art, cel-shaded, dynamic pose, heavy metallic detail"
            ),
            "negative": (
                "humanoid, bipedal, small piglet, organic flesh armor, distorted limbs, text, watermark, flat lighting"
            ),
            "strength": 0.6,
            "guidance_scale": 7.1
        }
    },
    {
        "base_name": "Lumini",
        "evolved_name": "Auralis",
        "base": {
            "prompt": "tiny floating jellyfish-like creature, soft purple glow, translucent body, anime cel-shaded, underwater lighting, gentle expression",
            "negative": "text, watermark, signature, human face, extra limbs, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved celestial being version of the creature in the image: tall humanoid made of glowing jellyfish light, "
                "flowing tendrils for hair, floating bioluminescent halo, transparent body showing light veins, retains soft purple and pink palette, "
                "anime ethereal art, celestial glow, clean lineart, high detail"
            ),
            "negative": (
                "small jellyfish, fish body, quadruped, opaque textures, mechanical, low quality, overexposed, flat background, text, watermark"
            ),
            "strength": 0.85,
            "guidance_scale": 7.3
        }
    },
    {
        "base_name": "Pebblit",
        "evolved_name": "Georok",
        "base": {
            "prompt": "tiny rock golem with moss patches, round body, glowing eyes, anime cel-shaded, full body, earthy colors",
            "negative": "text, watermark, signature, humanoid, animal, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved guardian version of the creature in the image: towering humanoid golem made of layered stone and moss armor, "
                "ancient rune carvings glowing on chest and arms, retains earthy brown and green palette, anime fantasy art, cel-shaded, "
                "clean lineart, massive presence, coherent humanoid anatomy"
            ),
            "negative": (
                "tiny golem, quadruped, blob shape, metal body, mechanical, distorted proportions, text, watermark, lowres"
            ),
            "strength": 0.8,
            "guidance_scale": 7.4
        }
    },
    {
        "base_name": "Glintbug",
        "evolved_name": "Luminid",
        "base": {
            "prompt": "small insect creature with glowing abdomen, metallic wings, yellow and teal hues, anime cel-shaded, clean lineart, cute design",
            "negative": "text, watermark, signature, humanoid, distorted, bad anatomy"
        },
        "evolved": {
            "prompt": (
                "Evolved luminous insect queen version of the creature in the image: massive moth-dragon hybrid with radiant crystalized wings, "
                "luminescent patterns that pulse with energy, retains yellow and teal palette, elegant posture, glowing aura, "
                "anime fantasy art, cel-shaded, intricate detail, dynamic lighting"
            ),
            "negative": (
                "small bug, flat wings, low detail, humanoid, distorted body, asymmetrical, blurred, text, watermark"
            ),
            "strength": 0.65,
            "guidance_scale": 7.1
        }
    },
    {
        "base_name": "Duskit",
        "evolved_name": "Noctyros",
        "base": {
            "prompt": "small bat-like creature, dark purple fur, glowing magenta eyes, playful pose, anime cel-shaded, clean lineart",
            "negative": "text, watermark, signature, humanoid, deformed, mechanical"
        },
        "evolved": {
            "prompt": (
                "Evolved dark warrior form of the creature in the image: tall humanoid bat knight with cloak-like wings, ornate armor with glowing magenta runes, "
                "sharp claws and piercing eyes, retains purple-magenta palette, anime RPG art style, dynamic lighting, heroic stance, cel-shaded, high detail"
            ),
            "negative": (
                "small bat, quadruped, animal-only, deer, malformed face, distorted proportions, text, watermark, lowres, low quality"
            ),
            "strength": 0.82,
            "guidance_scale": 7.6
        }
    }
]


In [5]:
device = "cuda"
dtype = torch.float16

In [6]:
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=dtype,
    use_safetensors=True
).to(device)

Loading pipeline components...:  43%|████▎     | 3/7 [00:04<00:06,  1.63s/it]`torch_dtype` is deprecated! Use `dtype` instead!
Loading pipeline components...: 100%|██████████| 7/7 [00:06<00:00,  1.11it/s]


In [7]:
# Prevent black outputs on some setups:
pipe.enable_attention_slicing()
pipe.enable_vae_tiling()

In [8]:
output_dir = "pokemon_out"  # ensure defined
try:
    shutil.rmtree(output_dir)
except Exception as e:
    print(e)
    pass
os.makedirs(output_dir, exist_ok=True)

In [9]:
SINGLE_SUBJECT_ADDON = (
    "SINGLE SUBJECT, SOLO, ONE CREATURE ONLY, CENTERED, FULL BODY, "
    "plain neutral background, subject isolated, studio backdrop"
)

SINGLE_SUBJECT_NEG = (
    "multiple creatures, duplicate creature, twins, second creature, extra subject, group, crowd, swarm, "
    "background characters, reflection duplicates, photobomb, collage, split screen, multi-panel"
)

def make_single_subject(prompt: str, negative: str = ""):
    p = f"{prompt}, {SINGLE_SUBJECT_ADDON}"
    n = (negative + ", " if negative else "") + SINGLE_SUBJECT_NEG
    return p, n

In [10]:
start_global = time.perf_counter()
for pair in pokemon_pairs:
    base_name = pair["base_name"]
    
    base_p, base_n = make_single_subject(pair["base"]["prompt"], pair["base"]["negative"])
    print(f"MAKING {base_name}")

    out_folder = os.path.join(output_dir, base_name)
    os.makedirs(out_folder, exist_ok=True)

    base_info = pair["base"]

    t0 = time.perf_counter()

    # Optional: ensure no stale work on the device
    if torch.cuda.is_available():
        torch.cuda.synchronize()

    result = pipe(
        prompt=base_p,
        negative_prompt=base_n + ', text, extra limbs, watermark, multiple bodies, character sheet, concept sheet, turnaround, orthographic, reference sheet, multiple angles, extra head, extra limbs, dismembered parts, split view, multiple views, alternate poses, layout, blueprint, overlay, design board, draft, cutout, outline, diagram, showcase, dissection, duplicated face, extra body',
        num_inference_steps=26,
        guidance_scale=8.0,
        height=832, width=832,
    )

    img = result.images[0]
    img.save(os.path.join(out_folder, f"{base_name}.png"))

    # ---- cleanup to avoid iteration-to-iteration slowdown ----
    del img, result
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()   # free unattached cached blocks
        torch.cuda.synchronize()   # finalize GPU work before timing

    t1 = time.perf_counter()
    print(f"{base_name} took {t1 - t0:.2f}s")

end_global = time.perf_counter()
print(f"Total: {end_global - start_global:.1f}s (avg {(end_global - start_global)/100:.2f}s/img)")

MAKING Flamkit


  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 26/26 [00:07<00:00,  3.41it/s]


Flamkit took 11.33s
MAKING Spriggle


100%|██████████| 26/26 [00:07<00:00,  3.35it/s]


Spriggle took 10.24s
MAKING Aquibbit


100%|██████████| 26/26 [00:07<00:00,  3.69it/s]


Aquibbit took 10.67s
MAKING Volpup


100%|██████████| 26/26 [00:09<00:00,  2.85it/s]


Volpup took 12.55s
MAKING Frystail


100%|██████████| 26/26 [00:06<00:00,  3.77it/s]


Frystail took 9.18s
MAKING Tuskip


100%|██████████| 26/26 [00:07<00:00,  3.70it/s]


Tuskip took 10.70s
MAKING Lumini


100%|██████████| 26/26 [00:06<00:00,  3.78it/s]


Lumini took 10.75s
MAKING Pebblit


100%|██████████| 26/26 [00:06<00:00,  3.75it/s]


Pebblit took 9.71s
MAKING Glintbug


100%|██████████| 26/26 [00:06<00:00,  3.75it/s]


Glintbug took 9.53s
MAKING Duskit


100%|██████████| 26/26 [00:06<00:00,  3.77it/s]


Duskit took 10.59s
Total: 105.3s (avg 1.05s/img)


In [None]:
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=dtype,
    use_safetensors=True
).to(device)


pipe.enable_attention_slicing()   # reduces peak VRAM (slightly slower but more stable)
pipe.enable_vae_tiling()          # helpful if you go larger than 1024

In [None]:
start_global = time.perf_counter()
for pair in pokemon_pairs:
    base_name = pair["base_name"]
    evolved_name = pair["evolved_name"]
    print(f"MAKING {base_name}")

    

    t0 = time.perf_counter()

    # Optional: ensure no stale work on the device
    if torch.cuda.is_available():
        torch.cuda.synchronize()

   
    out_folder = os.path.join(output_dir, base_name)
    init_img = Image.open(os.path.join(out_folder, f"{base_name}.png")).convert("RGB")
    evolved_p, evolved_n = make_single_subject(pair["evolved"]["prompt"], pair["evolved"]["negative"])

    
    result = pipe(
        prompt=evolved_p,
        evolved_n=base_n + ', text, extra limbs, watermark, multiple bodies, character sheet, concept sheet, turnaround, orthographic, reference sheet, multiple angles, extra head, extra limbs, dismembered parts, split view, multiple views, alternate poses, layout, blueprint, overlay, design board, draft, cutout, outline, diagram, showcase, dissection, duplicated face, extra body',
        num_inference_steps=26,
        stength = 0.9,
        image=init_img,
        guidance_scale=8.0,
        height=832, width=832,
    )

    img = result.images[0]
    img.save(os.path.join(out_folder, f"{evolved_name}.png"))

    # ---- cleanup to avoid iteration-to-iteration slowdown ----
    del img, result
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()   # free unattached cached blocks
        torch.cuda.synchronize()   # finalize GPU work before timing

    t1 = time.perf_counter()
    print(f"{evolved_name} took {t1 - t0:.2f}s")

end_global = time.perf_counter()
print(f"Total: {end_global - start_global:.1f}s (avg {(end_global - start_global)/100:.2f}s/img)")

Token indices sequence length is longer than the specified maximum sequence length for this model (99 > 77). Running this sequence through the model will result in indexing errors


The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['subject, solo, one creature only, centered, full body, plain neutral background, subject isolated, studio backdrop']
Token indices sequence length is longer than the specified maximum sequence length for this model (99 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['subject, solo, one creature only, centered, full body, plain neutral background, subject isolated, studio backdrop']


MAKING Flamkit


100%|██████████| 26/26 [00:07<00:00,  3.70it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', one creature only, centered, full body, plain neutral background, subject isolated, studio backdrop']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', one creature only, centered, full body, plain neutral background, subject isolated, studio backdrop']


Flamkit took 8.85s
MAKING Spriggle


  4%|▍         | 1/26 [00:04<01:45,  4.21s/it]


KeyboardInterrupt: 