In [1]:
import torch
from diffusers import StableDiffusionImg2ImgPipeline
from PIL import Image
from transformers import CLIPImageProcessor  # Updated import

# Load the Stable Diffusion pipeline using the Jiali model (image-to-image mode)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5").to(device)

# Load the CLIP Image Processor (updated from CLIPFeatureExtractor)
image_processor = CLIPImageProcessor.from_pretrained("Jiali/stable-diffusion-1.5")

# Function to load and preprocess images
def load_image(image_path, target_size=(512, 512)):  # Resize images to 512x512 or any smaller resolution
    image = Image.open(image_path).convert("RGB")
    image = image.resize(target_size)
    return image

# Example image paths
image_paths = [
    "../data/results/ma-boston_200250_fake_B.png",
    "../data/results/nc-charlotte_200250_fake_B.png",
    "../data/results/ny-manhattan_200250_fake_B.png",
    "../data/results/pa-pittsburgh_200250_fake_B.png"
]

# Load the images
images = [load_image(path) for path in image_paths]

# Example softmax weights (these should sum to 1)
weights = [0.25, 0.35, 0.15, 0.25]

# Interpolating in latent space with Stable Diffusion
def interpolate_images(images, weights):
    # Encode images into latent space
    latents = []
    for img in images:
        # Use the CLIPImageProcessor (formerly CLIPFeatureExtractor) for image preprocessing
        inputs = image_processor(images=img, return_tensors="pt").to(device)
        latent = pipe.vae.encode(inputs["pixel_values"]).latent_dist.sample()
        latents.append(latent)
    
    # Weighted average of the latent vectors
    interpolated_latent = sum(w * latent for w, latent in zip(weights, latents))
    
    # Decode the interpolated latent vector back to an image
    interpolated_image = pipe.decode_latents(interpolated_latent)

    return interpolated_image

# Generate the output image
output_image = interpolate_images(images, weights)

# Convert and save the output image
output_pil = Image.fromarray((output_image * 255).astype("uint8"))
output_pil.save("interpolated_output.png")
output_pil.show()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]



OSError: Jiali/stable-diffusion-1.5 does not appear to have a file named preprocessor_config.json. Checkout 'https://huggingface.co/Jiali/stable-diffusion-1.5/tree/main' for available files.