In [None]:
!pip install diffusers transformers accelerate torch torchvision pillow numpy

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

INPUT_DIR = "/content/drive/MyDrive/pintrest_images"
OUTPUT_DIR = "/content/drive/MyDrive/preprocessed_images"
TARGET_SIZE = (512, 512)

def preprocess_images(input_dir, output_dir, target_size):
    """Resizes and normalizes images for fine-tuning Stable Diffusion."""
    for style in os.listdir(input_dir):
        style_path = os.path.join(input_dir, style)
        if not os.path.isdir(style_path):
            continue

        output_style_path = os.path.join(output_dir, style)
        os.makedirs(output_style_path, exist_ok=True)

        for img_file in tqdm(os.listdir(style_path), desc=f"Processing {style}"):
            img_path = os.path.join(style_path, img_file)

            try:
                # Load and convert to RGB
                img = Image.open(img_path).convert("RGB")
                img = img.resize(target_size, Image.LANCZOS)  # Resize

                # Normalize
                img_array = np.array(img) / 255.0  # [0,1]

                # Convert back to image and save
                processed_img = Image.fromarray((img_array * 255).astype(np.uint8))
                processed_img.save(os.path.join(output_style_path, img_file))

            except Exception as e:
                print(f"Skipping {img_file} due to error: {e}")

preprocess_images(INPUT_DIR, OUTPUT_DIR, TARGET_SIZE)

print(f"Preprocessing complete! Images saved in {OUTPUT_DIR}")


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/pintrest_images'

In [None]:
!pip install diffusers[training] transformers accelerate safetensors datasets bitsandbytes peft torch torchvision

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting protobuf<4,>=3.20.3 (from diffusers[training])
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Co

In [None]:
import torch
from diffusers import StableDiffusionPipeline, UNet2DConditionModel
from peft import get_peft_model, LoraConfig

# Load Stable Diffusion pipeline
pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
pipeline.to("cuda")

# Load UNet model separately
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")
unet.to("cuda")

# Configure LoRA
lora_config = LoraConfig(
    r=16,  # LoRA rank
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["to_q", "to_k", "to_v", "to_out"],  # Ensure correct target layers
)


# Training configuration
training_args = {
    "output_dir": "./lora_sd_finetune",
    "num_train_epochs": 5,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 4,
    "learning_rate": 1e-4,
    "logging_dir": "./logs",
}

# Save fine-tuned model
unet.save_pretrained("/content/lora_sd_model")


In [None]:
from peft import get_peft_model, LoraConfig

# Define LoRA config with correct target layers
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=[
        "query", "key", "value",
        "proj_in", "proj_out"
    ],
)

# Apply LoRA to the UNet model
unet = get_peft_model(unet, lora_config)

training_args = {
    "output_dir": "./lora_sd_finetune",
    "num_train_epochs": 5,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 4,
    "learning_rate": 1e-4,
    "logging_dir": "./logs",
}

# Save fine-tuned model
unet.save_pretrained("/content/lora_sd_model")


In [None]:
from diffusers import StableDiffusionPipeline
from peft import PeftModel
import torch

# Load base Stable Diffusion model
base_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")

# Load fine-tuned LoRA weights
base_model.unet = PeftModel.from_pretrained(base_model.unet, "lora_sd_model")

print("Fine-tuned LoRA model loaded successfully!")

In [None]:
unet.save_pretrained("/content/lora_sd_model")

In [None]:
unet.save_attn_procs("/content/drive/MyDrive/lora_sd_model")

In [None]:
prompt = "A futuristic cyberpunk cityscape at night, neon lights, highly detailed"

image = base_model(prompt).images[0]
image.show()

In [None]:
image.show()

In [None]:
image.save("fine_tuned_output.png")
print("Image saved as fine_tuned_output.png")

In [None]:
# later ussage

from diffusers import StableDiffusionPipeline
from peft import PeftModel

# Load the base model
base_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")

# Load LoRA weights
lora_path = "/content/drive/MyDrive/lora_sd_model"  # Adjust path
base_model.unet = PeftModel.from_pretrained(base_model.unet, lora_path)

# Move to GPU (if available)
base_model.to("cuda")

# Generate an image
image = base_model("A futuristic city with neon lights").images[0]
image.show()


In [None]:
import torch
import numpy as np
import os
from diffusers import StableDiffusionPipeline
from sklearn.cluster import KMeans
from PIL import Image

# Load base model and apply LoRA weights
base_model = "runwayml/stable-diffusion-v1-5"  # Change if needed
lora_model_path = "/content/drive/MyDrive/lora_sd_model"

pipeline = StableDiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.float16).to("cuda")

# Check if LoRA weights exist
if os.path.exists(lora_model_path):
    print("Loading LoRA weights...")
    pipeline.unet.load_attn_procs(lora_model_path)
else:
    print("Warning: LoRA model not found. Using base model.")

# Function to generate multiple images
def generate_images(prompt, num_images=4, output_dir="generated_images"):
    os.makedirs(output_dir, exist_ok=True)
    image_paths = []

    for i in range(num_images):
        print(f"Generating image {i+1}/{num_images}...")
        image = pipeline(prompt).images[0]
        img_path = os.path.join(output_dir, f"image_{i+1}.png")
        image.save(img_path)
        image_paths.append(img_path)

    print("Image generation complete.")
    return image_paths

# Function to extract color palette using K-means
def extract_color_palette(image_paths, num_colors=5):
    all_palettes = {}

    for img_path in image_paths:
        image = Image.open(img_path).convert("RGB")
        image = image.resize((100, 100))  # Resize for faster processing
        pixels = np.array(image).reshape(-1, 3)  # Flatten pixels

        kmeans = KMeans(n_clusters=num_colors, n_init=10, random_state=42)
        kmeans.fit(pixels)

        colors = [tuple(map(int, color)) for color in kmeans.cluster_centers_]
        all_palettes[img_path] = colors

    print("Color extraction complete.")
    return all_palettes

# Example usage
prompt = "A vintage aesthetic shops"
image_paths = generate_images(prompt, num_images=4)
color_palettes = extract_color_palette(image_paths)

print("Generated images saved at:", image_paths)
print("Extracted color palettes:", color_palettes)


In [None]:
# Example usage
prompt = "A vintage aesthetic shops"
image_paths = generate_images(prompt, num_images=5)
color_palettes = extract_color_palette(image_paths,2)

print("Generated images saved at:", image_paths)
print("Extracted color palettes:", color_palettes)

## UI intgration

In [None]:
!pip install gradio --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m50.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import gradio as gr
from PIL import Image
import numpy as np
import torch
import os
import requests
from diffusers import StableDiffusionPipeline
from sklearn.cluster import KMeans

# Load base + LoRA
base_model = "runwayml/stable-diffusion-v1-5"
lora_model_path = "/content/drive/MyDrive/lora_sd_model"

pipeline = StableDiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.float16).to("cuda")
if os.path.exists(lora_model_path):
    pipeline.unet.load_attn_procs(lora_model_path)

# Google Fonts API key
GOOGLE_FONTS_API_KEY = "AIzaSyBmZaJiut9T-Be14UgzV1KB7afH79LvXy0"  # <-- Replace with your actual key
GOOGLE_FONTS_URL = f"https://www.googleapis.com/webfonts/v1/webfonts?key={GOOGLE_FONTS_API_KEY}"

# Get Google Fonts once
def get_fonts():
    response = requests.get(GOOGLE_FONTS_URL)
    if response.status_code == 200:
        return response.json().get("items", [])
    return []

all_fonts = get_fonts()

# Match 2-3 fonts based on prompt keyword matching
def suggest_fonts(prompt, fonts):
    prompt_keywords = prompt.lower().split()
    matched = []
    for font in fonts:
        name = font["family"].lower()
        if any(kw in name for kw in prompt_keywords):
            matched.append(font)
        if len(matched) >= 3:
            break
    if len(matched) < 3:
        matched += fonts[:(3 - len(matched))]  # Fallback
    return matched[:3]

# Generate image and color palette
def generate_image_and_palette(prompt):
    image = pipeline(prompt).images[0].convert("RGB")

    resized = image.resize((100, 100))
    pixels = np.array(resized).reshape(-1, 3)

    kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
    kmeans.fit(pixels)

    colors = [tuple(map(int, c)) for c in kmeans.cluster_centers_]
    hex_colors = ['#%02x%02x%02x' % c for c in colors]
    markdown_palette = "  \n".join([
        f"<div style='display:inline-block;width:30px;height:30px;background:{h};margin-right:5px;'></div> {h}"
        for h in hex_colors
    ])

    return image, markdown_palette

# Gradio interface function
def moodboard_ui(prompt):
    images = []
    palettes = []

    for i in range(4):
        try:
            img, palette = generate_image_and_palette(prompt)
            images.append(img)
            palettes.append(palette)
        except Exception as e:
            print(f"Error in image {i+1}: {e}")
            images.append(None)
            palettes.append(f"**Error:** {str(e)}")

    # Font suggestions
    fonts = suggest_fonts(prompt, all_fonts)
    font_display = ""
    for f in fonts:
        font_name = f["family"]
        font_link = f"https://fonts.googleapis.com/css2?family={font_name.replace(' ', '+')}&display=swap"
        font_display += f"""
        <link href="{font_link}" rel="stylesheet">
        <div style="font-family:'{font_name}', sans-serif; font-size:24px; margin-bottom:10px;">
            {font_name}
        </div>
        """

    return (*images, *palettes, font_display)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("##  Moodboard Generator with LoRA Stable Diffusion")
    prompt_input = gr.Textbox(label="Prompt")
    submit_btn = gr.Button("Generate Moodboard")

    with gr.Row():
        img_outputs = [gr.Image(label=f"Image {i+1}") for i in range(4)]
    with gr.Row():
        palette_outputs = [gr.Markdown(label=f"Palette {i+1}") for i in range(4)]
    font_output = gr.HTML(label="Font Suggestions")

    submit_btn.click(
        fn=moodboard_ui,
        inputs=[prompt_input],
        outputs=[*img_outputs, *palette_outputs, font_output]
    )

demo.launch(share=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5e109e10b5928ebd1d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


