In [2]:
from diffusers import StableDiffusionXLPipeline
import torch
from accelerate import infer_auto_device_map
import random
import torch

print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
Number of GPUs: 1
Current device: 0
Device name: NVIDIA GeForce RTX 4080 SUPER


In [3]:
# Carica il modello dalla piattaforma Hugging Face
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Configura la pipeline
pipeline = StableDiffusionXLPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16, 
    use_safetensors=True,  # Per performance migliori
).to("cuda")

Loading pipeline components...: 100%|██████████| 7/7 [00:02<00:00,  2.84it/s]


In [4]:
# Prompt di esempio
prompt = "A futuristic cityscape illuminated by neon lights at sunset, with towering skyscrapers made of glass and steel, flying vehicles in the sky, a bustling street market with diverse people, vibrant colors, hyper-realistic details, cinematic lighting, intricate reflections on the buildings, soft pink and orange hues in the sky, ultra-high definition, photorealistic, 16k resolution."
negative = "blurry, low quality, pixelated, out of focus, overexposed, underexposed, cartoonish, 2D art, poorly detailed, text artifacts, watermark, deformed buildings, unrealistic proportions, grainy textures, flat colors, monochrome, color bleeding, low resolution, bad composition."

num_inference_steps=50
guidance_scale=7.5

height = 1024  # Altezza in pixel
width = 1024  # Larghezza in pixel

# random seed
seed = random.randint(1,1000)
print("Using Seed: ",seed)
generator = torch.Generator(device="cpu").manual_seed(seed)

# Generazione dell'immagine
image = pipeline(
    prompt=prompt,
    negative_prompt=negative,                   # Prompt negativo
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,    # Passi di inferenza (maggiore = più dettagli)
    guidance_scale= guidance_scale,              # Peso per il prompt (maggiore = risultati più aderenti)
    generator=generator
).images[0]

# Salva l'immagine generata
image.save(f"imgs/{str(seed)}.png")

Using Seed:  330


100%|██████████| 50/50 [00:08<00:00,  6.14it/s]


# Cropper

In [28]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
import os

PATCH_SIZE = 1024
DEBUG = False
WRITE = True
FOLDER_PATH = "imgs/"
OUTPUT_FOLDER_PATH = "cropped_imgs/"
FILE_FORMAT = (".png", ".jpg")


# Recupera lista img da FOLDER_PATH
images = [os.path.join(FOLDER_PATH, file) for file in os.listdir(FOLDER_PATH) if file.lower().endswith(FILE_FORMAT)]

print("Total images: ",len(images))
os.makedirs(OUTPUT_FOLDER_PATH, exist_ok=True)  # Crea la directory se non esiste

PROCESSED_IMG = len(images) 
PATCH_FOR_IMG = 4

# Process x images
random_image_list = random.sample(images, PROCESSED_IMG)

counter_tot = PROCESSED_IMG * PATCH_FOR_IMG
current_counter = 1
for file_name in random_image_list:
    img = cv2.imread(file_name)

    max_y = img.shape[0]-PATCH_SIZE
    max_x = img.shape[1]-PATCH_SIZE

    file_name_no_ext, ext = os.path.splitext(os.path.basename(file_name))

    # Crop x patches
    for patch_number in range(0, PATCH_FOR_IMG):
        top_left = (random.randint(0, max_x), random.randint(0, max_y))
        bot_right = (top_left[0]+PATCH_SIZE, top_left[1]+PATCH_SIZE)        

        cropped_image = img[top_left[1]:top_left[1]+PATCH_SIZE, top_left[0]:top_left[0]+1024]

        if(DEBUG):
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            cropped_image_rgb = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)

            cv2.rectangle(img_rgb, top_left, bot_right, (255, 0, 0), 20)  # Drawing in RGB

            plt.figure(figsize=(10, 5))
            plt.subplot(1, 2, 1)
            plt.imshow(img_rgb)
            plt.title("Original Image")
            plt.axis("off")

            plt.subplot(1, 2, 2)
            plt.imshow(cropped_image_rgb)
            plt.title("Cropped Patch")
            plt.axis("off")
            plt.show()

        save_name = OUTPUT_FOLDER_PATH + "/" + file_name_no_ext + "-" + str(patch_number+1) + ".png"

        if WRITE:
            cv2.imwrite(str(save_name), cropped_image)

        print(str(current_counter) + "/" + str(counter_tot))
        current_counter += 1

Total images:  4
1/16
2/16
3/16
4/16
5/16
6/16
7/16
8/16
9/16
10/16
11/16
12/16
13/16
14/16
15/16
16/16
