In [1]:
# prepare_lora_data.py

import os
import json
from tqdm import tqdm

# ─────────── CONFIG ───────────
SRC_DIR   = "./dataset/processed_dataset"
TOKEN     = "MyPanel"            # the special token your LoRA will learn
OUT_JSON  = "lora_dataset.json"  # output JSON for fine-tuning

CLASS_CAPTIONS = {
    "clean":              "a solar panel with no defects",
    "snow_covered":       "a solar panel covered in fresh snow",
    "dusty":              "a solar panel scattered with dust and dirt",
    "physical_damage":    "a solar panel with physical cracks and chips",
    "bird_drop":          "a solar panel with bird droppings",
    "electrical_damage":  "a solar panel showing electrical burn marks"
}
# ───────────────────────────────

records = []
for cls_folder, base_caption in CLASS_CAPTIONS.items():
    folder = os.path.join(SRC_DIR, cls_folder)
    if not os.path.isdir(folder):
        print(f"⚠️  Skipping missing folder: {folder}")
        continue

    for fn in tqdm(os.listdir(folder), desc=f"Processing {cls_folder}"):
        path = os.path.abspath(os.path.join(folder, fn))
        # guard: only images
        if not fn.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        caption = f"<{TOKEN}> {base_caption}"
        records.append({
            "image":   path,
            "caption": caption
        })

# write out
with open(OUT_JSON, "w") as f:
    json.dump(records, f, indent=2)

print(f"\n✅ Wrote {len(records)} records to {OUT_JSON}")


Processing clean: 100%|██████████| 191/191 [00:00<00:00, 187658.01it/s]
Processing snow_covered: 100%|██████████| 114/114 [00:00<00:00, 80159.37it/s]
Processing dusty: 100%|██████████| 182/182 [00:00<00:00, 204600.20it/s]
Processing physical_damage: 100%|██████████| 66/66 [00:00<00:00, 123306.93it/s]
Processing bird_drop: 100%|██████████| 201/201 [00:00<00:00, 214408.72it/s]
Processing electrical_damage: 100%|██████████| 90/90 [00:00<00:00, 162081.31it/s]


✅ Wrote 844 records to lora_dataset.json





In [2]:
# train_sd3_lora.py
import os
import torch
from datasets import load_dataset
from transformers import CLIPTokenizer, CLIPImageProcessor
from diffusers import UNet2DConditionModel
from peft import LoraConfig, get_peft_model, TaskType
from torch.utils.data import DataLoader
from accelerate import Accelerator
from PIL import Image

# ───────────── CONFIG ─────────────
MODEL_ID      = "stabilityai/stable-diffusion-3-medium-diffusers"
TRAIN_JSON    = "./lora_dataset.json"    # from prepare_lora_data.py
OUTPUT_DIR    = "./lora_sd3_panel"       # where adapters will be saved
BATCH_SIZE    = 4
EPOCHS        = 3
LEARNING_RATE = 1e-4
LR_WARMUP     = 0.03
MAX_LENGTH    = 64
SEED          = 42
# ────────────────────────────────────

def collate_fn(batch):
    px = torch.stack([x["pixel_values"] for x in batch])
    ids = torch.stack([x["input_ids"] for x in batch])
    return {"pixel_values": px, "input_ids": ids}

def main():
    accelerator = Accelerator(mixed_precision="fp16")
    torch.manual_seed(SEED)

    # 1) tokenizer + image-processor
    tokenizer = CLIPTokenizer.from_pretrained(MODEL_ID, subfolder="tokenizer")
    img_proc  = CLIPImageProcessor.from_pretrained(MODEL_ID, subfolder="feature_extractor")

    # 2) load & preprocess JSON dataset
    ds = load_dataset("json", data_files=TRAIN_JSON)["train"]
    def preprocess(ex):
        img = Image.open(ex["image"]).convert("RGB")
        ex["pixel_values"] = img_proc(images=img, return_tensors="pt").pixel_values[0]
        tok = tokenizer(
            ex["caption"],
            padding="max_length",
            truncation=True,
            max_length=MAX_LENGTH,
            return_tensors="pt"
        )
        ex["input_ids"] = tok.input_ids[0]
        return ex

    ds = ds.map(preprocess, remove_columns=["image","caption"])
    ds.set_format(type="torch", columns=["pixel_values","input_ids"])
    dataloader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True,
                            collate_fn=collate_fn, num_workers=4)

    # 3) load SD3 UNet + wrap in LoRA
    unet = UNet2DConditionModel.from_pretrained(
        MODEL_ID, subfolder="unet", torch_dtype=torch.float16
    )
    lora_cfg = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["to_q", "to_k", "to_v", "to_out"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.IMAGE_TEXT_MATCH
    )
    unet_lora = get_peft_model(unet, lora_cfg)

    # 4) optimizer + scheduler
    optimizer = torch.optim.AdamW(unet_lora.parameters(), lr=LEARNING_RATE)
    total_steps = len(dataloader) * EPOCHS
    scheduler = torch.optim.lr_scheduler.LinearLR(
        optimizer, start_factor=LR_WARMUP, total_iters=total_steps
    )

    # 5) prepare with Accelerator
    unet_lora, optimizer, dataloader, scheduler = accelerator.prepare(
        unet_lora, optimizer, dataloader, scheduler
    )

    # 6) training loop
    for epoch in range(1, EPOCHS + 1):
        unet_lora.train()
        running_loss = 0.0
        for batch in dataloader:
            loss = unet_lora(
                images=batch["pixel_values"].to(accelerator.device),
                input_ids=batch["input_ids"].to(accelerator.device)
            ).loss
            accelerator.backward(loss)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            running_loss += loss.item()
        avg = running_loss / len(dataloader)
        print(f"Epoch {epoch}/{EPOCHS} — loss: {avg:.4f}")
        accelerator.wait_for_everyone()

    # 7) save adapters
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    unet_lora.save_pretrained(OUTPUT_DIR)
    print(f"\n✅ LoRA adapters saved to {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers.
401 Client Error. (Request ID: Root=1-6874ba16-12da46495ae49f1341870059;c4c2cb20-35c1-45e1-b69a-381b036523ab)

Cannot access gated repo for url https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers/resolve/main/tokenizer/tokenizer_config.json.
Access to model stabilityai/stable-diffusion-3-medium-diffusers is restricted. You must have access to it and be authenticated to access it. Please log in.

In [None]:
# generate_sd3_lora_images.py
import os
import torch
from diffusers import StableDiffusion3Pipeline

# ───────────── CONFIG ─────────────
MODEL_ID    = "stabilityai/stable-diffusion-3-medium-diffusers"
LORA_DIR    = "./lora_sd3_panel"    # from train_sd3_lora.py
OUTPUT_ROOT = "./outs_sd3_lora"

# prompts per class
PROMPTS = {
    "snow":  "<MyPanel> covered in fresh snow, photorealistic lighting",
    "dusty": "<MyPanel> scattered with dust and dirt, high detail",
    "crack": "<MyPanel> with fine micro-cracks and chipped cells",
    "cover": "<MyPanel> partially covered by debris, realistic"
}

# how many to generate per class
NUM_IMAGES = {
    "snow":  100,
    "dusty": 100,
    "crack": 100,
    "cover": 100
}

NEG_PROMPT = "lowres, bad anatomy, text"
STEPS      = 30
GUIDANCE   = 1.5
SEED       = 42
# ────────────────────────────────────

def main():
    torch.manual_seed(SEED)
    pipe = StableDiffusion3Pipeline.from_pretrained(
        MODEL_ID, torch_dtype=torch.float16
    ).to("cuda")
    pipe.enable_xformers_memory_efficient_attention()

    # load LoRA adapters into UNet
    pipe.unet.load_attn_procs(LORA_DIR)

    os.makedirs(OUTPUT_ROOT, exist_ok=True)
    for cls, prompt in PROMPTS.items():
        out_dir = os.path.join(OUTPUT_ROOT, cls)
        os.makedirs(out_dir, exist_ok=True)
        for i in range(NUM_IMAGES.get(cls, 0)):
            img = pipe(
                prompt=prompt,
                negative_prompt=NEG_PROMPT,
                num_inference_steps=STEPS,
                guidance_scale=GUIDANCE
            ).images[0]
            fname = f"{cls}_{i:04d}.png"
            img.save(os.path.join(out_dir, fname))
            print(f"✓ Saved {cls}/{fname}")

if __name__ == "__main__":
    main()
