In [None]:
from diffusers import StableDiffusionXLPipeline
import torch
import random
from PIL import Image

# Carica il modello dalla piattaforma Hugging Face
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Configura la pipeline
pipeline = StableDiffusionXLPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16, 
    use_safetensors=True  # Per performance migliori
).to("cuda")

In [None]:
# Prompt di esempio
prompt = "A futuristic cityscape illuminated by neon lights at sunset, with towering skyscrapers made of glass and steel, flying vehicles in the sky, a bustling street market with diverse people, vibrant colors, hyper-realistic details, cinematic lighting, intricate reflections on the buildings, soft pink and orange hues in the sky, ultra-high definition, photorealistic, realistic photo."
negative = "blurry, low quality, pixelated, out of focus, overexposed, underexposed, cartoonish, 2D art, poorly detailed, text artifacts, watermark, deformed buildings, unrealistic proportions, grainy textures, flat colors, monochrome, color bleeding, low resolution, bad composition."

num_inference_steps=50
guidance_scale=7.5

height = 1024  # Altezza in pixel
width = 1024  # Larghezza in pixel

# random seed
seed = random.randint(8, 2**32-1)
print("Using Seed: ",seed)
generator = torch.Generator(device="cpu").manual_seed(seed)

# Generazione dell'immagine
image = pipeline(
    prompt=prompt,
    negative_prompt=negative,                   # Prompt negativo
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,    # Passi di inferenza (maggiore = più dettagli)
    guidance_scale= guidance_scale,              # Peso per il prompt (maggiore = risultati più aderenti)
    generator=generator
).images[0]

# Salva l'immagine generata
image.save("output_image.png")


In [None]:
from diffusers import StableDiffusionXLPipeline
from PIL import Image
import torch

# Carica la pipeline
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
pipeline = StableDiffusionXLPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16,
    use_safetensors=True  # Per performance migliori
).to("cuda")

# Carica l'immagine di input
input_image = Image.open("00045.png")  # Assicurati che sia RGB
input_image = input_image.resize((768, 768))

# Prompt generico (può essere neutro)
prompt = ""

# Applica il processo img2img con strength=0
output = pipeline(
    prompt=prompt, 
    image=input_image, 
    strength=1,  # Non introduce noise aggiuntivo
    num_inference_steps=1
)

# Salva l'immagine risultante
output_image = output.images[0]
output_image.save("laundered_image.png")

In [None]:
import torch
from diffusers import StableDiffusionXLImg2ImgPipeline
from PIL import Image

# Carica la pipeline di Stable Diffusion XL (img2img)
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",  # Modello SDXL base
    torch_dtype=torch.float16,  # Usa float16 per GPU, float32 per CPU
).to("cuda")  # Cambia con "cpu" se non hai GPU

# Carica l'immagine iniziale
init_image = Image.open("00045.png").convert("RGB")
init_image = init_image.resize((1024, 1024))  # Ridimensiona l'immagine a 1024x1024

# Definisci il prompt
prompt = "A beautiful futuristic cityscape at sunset, highly detailed, ultra realistic"

# Genera i text embeds richiesti da SDXL
prompt_embeds = pipe.encode_prompt(prompt)

stren = 0.015
iter = 0
while(stren<=1):
    # Esegui il processo di img2img
    output = pipe(
        image=init_image,
        prompt=prompt,
        strength=stren,  # Influenza dell'immagine iniziale (0=solo testo, 1=solo immagine)
        guidance_scale=7.5,  # Controllo della creatività
    )

    # Salva il risultato
    output.images[0].save("immagine_output" + str(iter) + ".png")

    iter+=1
    stren+=0.1



In [1]:
import torch
from diffusers import StableDiffusionXLPipeline
from PIL import Image
import numpy as np

# Carica la pipeline di Stable Diffusion XL
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16
).to("cuda")  # Usa 'cpu' se non hai una GPU

# Accesso diretto al VAE (Variational Autoencoder)
vae = pipe.vae

# Carica l'immagine di input
image_path = "00045.png"
init_image = Image.open(image_path).convert("RGB")
init_image = init_image.resize((1024, 1024))  # Assicuriamoci che la dimensione sia compatibile
init_image = np.array(init_image).astype(np.float16) / 255.0  # Normalizza i valori tra 0 e 1
init_image = torch.tensor(init_image).permute(2, 0, 1).unsqueeze(0).to("cuda")  # [B, C, H, W]

# Encoding: Passa l'immagine nello spazio latente
latents = vae.encode(init_image).latent_dist.sample()  # Ottieni lo spazio latente
latents = latents * 0.18215  # Scalatura tipica dello spazio latente

# Decoding: Ricostruisce l'immagine dall'encoder
reconstructed_image = vae.decode(latents).sample  # Decodifica dallo spazio latente
reconstructed_image = (reconstructed_image / 2 + 0.5).clamp(0, 1)  # Riscalatura tra 0 e 1
reconstructed_image = (
    reconstructed_image.cpu().permute(0, 2, 3, 1).numpy()[0] * 255
).astype(np.uint8)  # Converte a immagine

# Salva l'immagine ricostruita
output_image = Image.fromarray(reconstructed_image)
output_image.save("reconstructed_image.png")

print("Encoding and decoding completed. Image saved as 'reconstructed_image.png'.")



  from .autonotebook import tqdm as notebook_tqdm
Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00,  4.48it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 15.59 GiB of which 56.75 MiB is free. Including non-PyTorch memory, this process has 14.11 GiB memory in use. Of the allocated memory 13.45 GiB is allocated by PyTorch, and 369.75 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)