<a href="https://colab.research.google.com/github/trentclat/SDXL-LoRA/blob/main/SDXL_v1_0_firetiger_LORA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# all pips
!pip install --upgrade diffusers[torch] transformers accelerate peft safetensors huggingface_hub
!git clone https://github.com/huggingface/diffusers.git
#install any extras the script needs
!pip install -r diffusers/examples/text_to_image/requirements.txt
!pip uninstall -y diffusers
!pip install --upgrade git+https://github.com/huggingface/diffusers.git@main
!pip install --upgrade accelerate transformers safetensors
# Pull PEFT straight from main so it’s ≥0.15.0
!pip install --upgrade git+https://github.com/huggingface/peft.git@main
!pip uninstall -y xformers


# imports
import torch
import json

from diffusers import StableDiffusionXLPipeline
from peft import LoraConfig, get_peft_model

# pre configure accel defaults
!accelerate config default --mixed_precision="fp16" --num_processes=1 --num_machines=1 --dynamo_backend="no"

# device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✅ Environment ready. PyTorch {torch.__version__}, CUDA: {torch.cuda.is_available()}")


In [None]:
# HF token connection

from google.colab import userdata
mykey = userdata.get('worktoken')
print(mykey)

In [None]:
# HF token and login

from huggingface_hub import login
from google.colab import userdata
hf_token = userdata.get("worktoken")
login(token=hf_token)

In [None]:
# mount google drive

from google.colab import drive
drive.mount("/content/drive")

In [None]:
# load SDXL base & refiner

device = "cuda"

pipe_base = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype = torch.float16,
).to(device)

pipe_refiner = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
).to(device)


In [None]:
# Attach LoRA to base UNet

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["to_q", "to_k", "to_v", "to_out.0"],
    lora_dropout=0.05,
    bias="none",
)

pipe_base.unet = get_peft_model(pipe_base.unet, lora_cfg)


In [None]:
# check for source images
import os

image_dir = "/content/drive/MyDrive/LoRA_Prep/stdized_firetiger"
image_paths = [
    os.path.join(image_dir, f)
    for f in os.listdir(image_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg'))
]
print(f"Found {len(image_paths)} images.")


In [None]:
import os, json

# 1) hand‐code a Python dict
captions = {
    "1347816_Firetiger_MS.jpg": "xyz_fire_tiger, hooks_diagonal_left_downward, side_view, studio_shot, white_background",
    "1373962_Firetiger_MS.jpg": "xyz_fire_tiger, bold_sharp_stripes, side_view, studio_shot, white_background",
    "1373966_Firetiger_MS.jpg": "xyz_fire_tiger, bold_sharp_stripes, side_view, studio_shot, white_background",
    "1454428_Firetiger_MS.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "1454429_Firetiger_MS.jpg": "xyz_fire_tiger, jointed_lure, side_view, studio_shot, white_background",
    "1523078_Firetiger_11_MS.jpg": "xyz_fire_tiger, three_treble_hooks, studio_shot, white_background",
    "1573537_1572465_MS.jpg": "xyz_fire_tiger, black_eye, side_view, studio_shot, white_background",
    "1573537_1572525_MS.jpg": "xyz_fire_tiger, black_eye, side_view, studio_shot, white_background",
    "1601024_1600951_MS.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "1601024_1600971_MS.jpg": "xyz_fire_tiger, three_treble_hooks, studio_shot, white_background",
    "1601025_1600757_MS.jpg": "xyz_fire_tiger, single_dorsal_hook, side_view, studio_shot, white_background",
    "1624396_1623961_MS.jpg": "xyz_fire_tiger, bold_sharp_stripes, no_hooks, side_view, studio_shot, white_background",
    "1624396_1624113_MS.jpg": "xyz_fire_tiger, bold_sharp_stripes, no_hooks, side_view, studio_shot, white_background",
    "1624399_1624251_MS.jpg": "xyz_fire_tiger, single_dorsal_hook, side_view, studio_shot, white_background",
    "Copy of Copy of deep_hit_stick_1.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "Copy of Copy of deep_hit_stick_2.jpg": "xyz_fire_tiger, three_treble_hooks, side_view, studio_shot, white_background",
    "Copy of Copy of finisher.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "Copy of Copy of flicker_minnow.jpg": "xyz_fire_tiger, hooks_diagonal_left_downward, side_view, studio_shot, white_background",
    "Copy of Copy of flicker_shad_jointed.jpg": "xyz_fire_tiger, jointed_lure, side_view, studio_shot, white_background",
    "Copy of Copy of flicker_shad_shallow.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "Copy of Copy of flicker_shad.jpg": "xyz_fire_tiger, hooks_diagonal_left_downward, side_view, studio_shot, white_background",
    "Copy of Copy of hit_stick_1.jpg": "xyz_fire_tiger, three_treble_hooks, side_view, studio_shot, white_background",
    "Copy of Copy of hit_stick_2.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "Copy of Copy of hit_stick_3.jpg": "xyz_fire_tiger, single_treble_hook, side_view, studio_shot, white_background",
    "Copy of deep_hit_stick_1.jpg": "xyz_fire_tiger, side_view, studio_shot, white_background",
    "Copy of deep_hit_stick_2.jpg": "xyz_fire_tiger, three_treble_hooks, studio_shot, white_background",
}

data_dir = "/content/drive/MyDrive/LoRA_Prep/stdized_firetiger"
out_path = os.path.join(data_dir, "metadata.jsonl")

with open(out_path, "w") as f:
    for fn, cap in captions.items():
        img_path = os.path.join(data_dir, fn)
        if os.path.exists(img_path):
            entry = {"image_file_name": fn, "text": cap}
            f.write(json.dumps(entry) + "\n")
        else:
            print("Missing:", fn)

print("metadata.jsonl generated at:", out_path)


In [None]:
# free refiner and clear GPU cache
del pipe_refiner
torch.cuda.empty_cache()
print("Freed refiner and cleared GPU cache.")

In [None]:
%%bash
accelerate launch \
  --num_processes=1 \
  --num_machines=1 \
  --mixed_precision="bf16" \
  --dynamo_backend="no" \
  /content/diffusers/examples/text_to_image/train_text_to_image_lora_sdxl.py \
  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
  --pretrained_vae_model_name_or_path="stabilityai/sdxl-vae" \
  --train_data_dir="/content/drive/MyDrive/LoRA_Prep/stdized_firetiger" \
  --image_column="image" \
  --caption_column="text" \
  --resolution=896 \
  --random_flip \
  --train_batch_size=2 \
  --gradient_accumulation_steps=8 \
  --max_train_steps=2000 \
  --learning_rate=5e-5 \
  --output_dir="/content/drive/MyDrive/LoRA/LoRA_Model/SDXL_firetiger" \
  --rank=16 \
  --resume_from_checkpoint="/content/drive/MyDrive/LoRA/LoRA_Model/SDXL_firetiger/checkpoint-1000"


In [None]:
from diffusers import StableDiffusionXLPipeline
import torch

# 2) Load your first LoRA (e.g. shape adapter)
pipe_base.load_lora_weights("/content/drive/MyDrive/LoRA/LoRA_Model/SDXL_shape/pytorch_lora_weights.safetensors")

# 3) Load your second LoRA (e.g. color adapter)
pipe_base.load_lora_weights("/content/drive/MyDrive/LoRA/LoRA_Model/SDXL_firetiger/pytorch_lora_weights.safetensors")

# 4) Now both adapters are “stacked” on top of the base model:
prompt = "A single photorealistic xyz_firetiger lure held in center of human hand. Hand centerfold with a lake side background"
image = pipe_base(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]

image.save("/content/drive/MyDrive/LoRA/LoRA_Output/sdxl_outputs/demo_handshot_firetiger_combined_loras_test.png")
