In [24]:
from diffusers import (
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    StableDiffusionImg2ImgPipeline
)
from PIL import Image
import torch
import os
from tqdm import tqdm  # For progress bars

In [22]:
# --- Setup device ---
device = "cuda" if torch.cuda.is_available() else "cpu"

In [23]:
# --- Load ControlNet for scribble ---
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-scribble", torch_dtype=torch.float16
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16,
).to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [25]:
# --- Prompt and image setup ---
sketch_path = "/content/boy_sketch.jpg"
sketch = Image.open(sketch_path).convert("RGB")

prompt_base = "anime style boy, vibrant colors, highly detailed"
env_start = "in a sunny forest"
env_end = "in a rainy forest"

In [28]:
# --- Output folder
os.makedirs("frames", exist_ok=True)

In [29]:
# --- Generate first frame ---
def generate_first_frame(sketch_img, prompt):
    print(f"Generating first frame with prompt: {prompt}")
    result = pipe(prompt=prompt, image=sketch_img, num_inference_steps=30)
    return result.images[0]

first_frame = generate_first_frame(sketch, f"{prompt_base}, {env_start}")
first_frame.save("frames/frame_000.png")

Generating first frame with prompt: anime style boy, vibrant colors, highly detailed, in a sunny forest


  0%|          | 0/30 [00:00<?, ?it/s]

In [30]:
# Clear pipeline and free memory
del pipe, controlnet
torch.cuda.empty_cache()

In [31]:
# --- Load Img2Img pipeline with LoRA ---
img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(device)
img2img_pipe.load_lora_weights("aionthegrind/anime-lora")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]



In [32]:
# --- Setup CLIP for similarity check ---
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def clip_similarity(img1, img2):
    inputs = clip_processor(images=[img1, img2], return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
        similarity = torch.cosine_similarity(features[0:1], features[1:2]).item()
    return similarity


In [33]:
# --- Smooth prompt blending ---
def blend_prompt(alpha):
    return (
        f"{prompt_base}, anime screencap, "
        f"{env_start}:{1 - alpha:.2f}, {env_end}:{alpha:.2f}"
    )

In [35]:
# --- Frame generation loop with CLIP consistency check ---
frames = [first_frame]
num_frames = 40
base_seed = 42

print("Generating frames with identity consistency...")

for i in range(1, num_frames):
    alpha = i / num_frames
    blended_prompt = blend_prompt(alpha)
    generator = torch.manual_seed(base_seed + i)

    # Generate new frame based on last frame
    new_frame = img2img_pipe(
        prompt=blended_prompt,
        image=frames[-1],
        strength=0.65,
        guidance_scale=7.5,
        num_inference_steps=25,
        generator=generator
    ).images[0]

    # Optional: Enforce minimum similarity to previous frame
    similarity = clip_similarity(frames[-1], new_frame)
    if similarity < 0.85:
        print(f"Low similarity ({similarity:.3f}) detected at frame {i}, regenerating...")
        # You can choose to retry with different seed or prompt tweaks here
        # For now, just keep the new frame and log the similarity

    new_frame.save(f"frames/frame_{i:03d}.png")
    frames.append(new_frame)

Generating frames with identity consistency...


  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

In [36]:
# --- Final CLIP similarity between first and last frame ---
final_similarity = clip_similarity(frames[0], frames[-1])
print(f"CLIP similarity between first and last frame: {final_similarity:.4f}")

CLIP similarity between first and last frame: 0.8407


In [37]:
# --- Create video from frames ---
video_path = "output_animation.mp4"
fps = 8
print(f"Creating video at {fps} fps...")

with imageio.get_writer(video_path, fps=fps) as writer:
    for i in range(len(frames)):
        img = imageio.imread(f"frames/frame_{i:03d}.png")
        writer.append_data(img)

Creating video at 8 fps...


In [38]:
# --- Display video in notebook ---
mp4 = open(video_path, 'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML(f"""
<video width=600 controls>
  <source src="{data_url}" type="video/mp4">
</video>
""")