<a href="https://colab.research.google.com/github/sujithkumarmp/ai-images/blob/main/decode_encode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import torch
from diffusers import AutoencoderKL
from PIL import Image
import requests
from torchvision import transforms
import io # Import the io module

# 1. Load the VAE from a popular model (Stable Diffusion)
device = "cuda" # Uses your T4 GPU
vae = AutoencoderKL.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="vae").to(device)

# 2. Load and Prepare an Image
url = "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/dog2.jpg" # Updated to a working image URL
# Fix: Download the entire image content and use io.BytesIO, and check for HTTP errors
response = requests.get(url)
response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
raw_image = Image.open(io.BytesIO(response.content)).convert("RGB").resize((512, 512))

# Convert image to a Tensor
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
input_tensor = transform(raw_image).unsqueeze(0).to(device)

# 3. ENCODE: Turning the image into "Latent Space" (The Shorthand)
with torch.no_grad():
    latents = vae.encode(input_tensor).latent_dist.sample()
    # Scaled for stability
    latents = latents * 0.18215

print(f"Original Image Shape: {input_tensor.shape}") # [1, 3, 512, 512]
print(f"Latent (Squeezed) Shape: {latents.shape}")   # [1, 4, 64, 64]

# 4. DECODE: Turning the Latent back into an Image
with torch.no_grad():
    decoded_image = vae.decode(latents / 0.18215).sample

# 5. Show the result
decoded_image = (decoded_image / 2 + 0.5).clamp(0, 1).cpu().permute(0, 2, 3, 1).numpy()[0]
Image.fromarray((decoded_image * 255).astype("uint8")).show()



Original Image Shape: torch.Size([1, 3, 512, 512])
Latent (Squeezed) Shape: torch.Size([1, 4, 64, 64])
