In [1]:
import torch
from diffusers.utils import load_image, make_image_grid
from diffusers import AutoPipelineForInpainting
from PIL import Image, ImageDraw
import random
import os

In [2]:
def generate_random_mask(image_size):
    mask = Image.new("L", image_size, 0)  # Create a blank (black) mask
    draw = ImageDraw.Draw(mask)
    width, height = image_size
    
    x1, y1 = width//4 + random.randint(-width//10, width//10), height//4 + random.randint(-height//10, height//10)
    x2, y2 = 3*width//4 + random.randint(-width//10, width//10), 3*height//4 + random.randint(-height//10, height//10)
    # Draw a filled rectangle
    draw.rectangle([x1, y1, x2, y2], fill=255)
    return mask, (x1, y1, x2, y2)

Testing every checkpoints of LoRA trained model (`fine_tune_lora.sh`)

In [None]:
# Constants
NUM_CHECKPOINTS = 2  # Number of checkpoints to process
BASE_FOLDER = "results20250111183706/"  # Base folder containing checkpoint folders produced by fine_tune_lora.sh
CHECKPOINT_TEMPLATE = "checkpoint-{500*k}"  # Template for checkpoint folder names

# Create folder paths for checkpoints
folders_temp = [f"checkpoint-{500*k}" for k in range(1, NUM_CHECKPOINTS + 1)]
folders = [os.path.join(BASE_FOLDER, folder, "") for folder in folders_temp]

# Loop over each checkpoint folder
for j, folder in enumerate(folders):
    print(f"Processing {folders_temp[j]}...")

    # Initialize the inpainting pipeline from pre-trained weights
    pipeline = AutoPipelineForInpainting.from_pretrained(
        "stabilityai/stable-diffusion-2-inpainting", 
        torch_dtype=torch.float16, 
        variant="fp16"
    )

    # Load LoRA weights for fine-tuning
    pipeline.load_lora_weights(folder, weight_name="LoRA_2_pytorch_lora_weights.safetensors")

    # Enable efficient memory usage by offloading models to CPU when not in use
    pipeline.enable_model_cpu_offload()
    image_grid = []  # List to store the final grid of images for this checkpoint

    # Generate results for 4 base images
    for i in range(4):
        # Load and resize the base image
        init_image_path = f"bg_images/back{i+1}.jpg"
        init_image = load_image(init_image_path)
        init_image = init_image.resize((512, 512))  # Resize to 512x512 for processing

        # Generate a random mask image and resize it
        mask_image, mask_coords = generate_random_mask(init_image.size)
        mask_image = mask_image.resize((512, 512))  # Resize to 512x512 for inpainting

        # Set up a fixed random generator for reproducibility
        generator = torch.Generator("cuda").manual_seed(92)

        # Define prompts
        prompt = "a piece of trash floating underwater, realistic, blend with the environment"
        negative_prompt = "Unrealistic, clean, blurry"

        # Perform inpainting
        image = pipeline(
            prompt=prompt, 
            negative_prompt=negative_prompt, 
            image=init_image, 
            mask_image=mask_image, 
            strength=1, 
            generator=generator, 
            padding_mask_crop=32
        ).images[0]

        # Draw a red rectangle to visualize the mask area
        draw = ImageDraw.Draw(image)
        draw.rectangle(mask_coords, outline="red", width=3)

        # Create a grid of images (original, mask, result) for visualization
        image_grid.append(make_image_grid([init_image, mask_image, image], rows=1, cols=3))

    # Combine all 4 sets of images into one vertical grid
    image_grid = make_image_grid(image_grid, rows=4, cols=1)

    # Save the grid as a PNG file
    output_path = f"inpainting{folders_temp[j]}.png"
    image_grid.save(output_path)

    print(f"Saved {output_path}")
