In [1]:
# prepare_lora_data.py

import os
import json
from tqdm import tqdm

# ─────────── CONFIG ───────────
SRC_DIR   = "./dataset/processed_dataset"
TOKEN     = "MyPanel"            # the special token your LoRA will learn
OUT_JSON  = "lora_dataset.json"  # output JSON for fine-tuning

CLASS_CAPTIONS = {
    "clean":              "a solar panel with no defects",
    "snow_covered":       "a solar panel covered in fresh snow",
    "dusty":              "a solar panel scattered with dust and dirt",
    "physical_damage":    "a solar panel with physical cracks and chips",
    "bird_drop":          "a solar panel with bird droppings",
    "electrical_damage":  "a solar panel showing electrical burn marks"
}
# ───────────────────────────────

records = []
for cls_folder, base_caption in CLASS_CAPTIONS.items():
    folder = os.path.join(SRC_DIR, cls_folder)
    if not os.path.isdir(folder):
        print(f"⚠️  Skipping missing folder: {folder}")
        continue

    for fn in tqdm(os.listdir(folder), desc=f"Processing {cls_folder}"):
        path = os.path.abspath(os.path.join(folder, fn))
        # guard: only images
        if not fn.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        caption = f"<{TOKEN}> {base_caption}"
        records.append({
            "image":   path,
            "caption": caption
        })

# write out
with open(OUT_JSON, "w") as f:
    json.dump(records, f, indent=2)

print(f"\n✅ Wrote {len(records)} records to {OUT_JSON}")


Processing clean: 100%|██████████| 191/191 [00:00<00:00, 187658.01it/s]
Processing snow_covered: 100%|██████████| 114/114 [00:00<00:00, 80159.37it/s]
Processing dusty: 100%|██████████| 182/182 [00:00<00:00, 204600.20it/s]
Processing physical_damage: 100%|██████████| 66/66 [00:00<00:00, 123306.93it/s]
Processing bird_drop: 100%|██████████| 201/201 [00:00<00:00, 214408.72it/s]
Processing electrical_damage: 100%|██████████| 90/90 [00:00<00:00, 162081.31it/s]


✅ Wrote 844 records to lora_dataset.json





In [4]:
# # in a notebook cell, prefix with !
# !pip install --upgrade pip
# !pip install diffusers transformers accelerate peft safetensors huggingface-hub datasets sentencepiece protobuf torchvision pillow tqdm

[0m

In [6]:
import os
import torch
from datasets import load_dataset
from transformers import CLIPTokenizer
from diffusers import UNet2DConditionModel
from peft import LoraConfig, get_peft_model, TaskType
from torch.utils.data import DataLoader
from accelerate import Accelerator
from torchvision import transforms
from PIL import Image

# ───── CONFIG ─────
MODEL_ID      = "stabilityai/stable-diffusion-3-medium-diffusers"
TRAIN_JSON    = "lora_dataset.json"   # from prepare_lora_data.py
OUTPUT_DIR    = "lora_sd3_panel"
BATCH_SIZE    = 4
EPOCHS        = 3
LEARNING_RATE = 1e-4
LR_WARMUP     = 0.03
MAX_LEN       = 64
SEED          = 42
# ─────────────────

torch.manual_seed(SEED)
accelerator = Accelerator(mixed_precision="fp16")

# 1) Tokenizer & simple transforms
tokenizer = CLIPTokenizer.from_pretrained(MODEL_ID, subfolder="tokenizer")
img_tf    = transforms.Compose([
    transforms.Resize((512,512), transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# 2) Load & preprocess JSON dataset
ds = load_dataset("json", data_files=TRAIN_JSON)["train"]

def prep(ex):
    img = Image.open(ex["image"]).convert("RGB")
    ex["pixel_values"] = img_tf(img)
    ids = tokenizer(
        ex["caption"],
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN,
        return_tensors="pt"
    )
    ex["input_ids"] = ids.input_ids[0]
    return ex

ds = ds.map(prep, remove_columns=["image","caption"])
ds.set_format(type="torch", columns=["pixel_values","input_ids"])

def collate_fn(batch):
    return {
        "pixel_values": torch.stack([b["pixel_values"] for b in batch]),
        "input_ids":    torch.stack([b["input_ids"]    for b in batch]),
    }

dataloader = DataLoader(
    ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=4,
    pin_memory=True
)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/844 [00:00<?, ? examples/s]

OSError: stabilityai/stable-diffusion-3-medium-diffusers does not appear to have a file named config.json.

In [10]:
from diffusers import StableDiffusion3Pipeline
import torch

MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers"

# make sure you're already logged in via notebook_login() or `use_auth_token=True`
pipe = StableDiffusion3Pipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    use_auth_token=True
).to("cuda")

# now grab the UNet directly (it was loaded from `transformer/` under the hood)
unet = pipe.unet


model_index.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

Fetching 26 files:   0%|          | 0/26 [00:00<?, ?it/s]

text_encoder_3/model-00001-of-00002.safe(…):   0%|          | 0.00/4.99G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

text_encoder_3/model-00002-of-00002.safe(…):   0%|          | 0.00/4.53G [00:00<?, ?B/s]

text_encoder_2/model.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/574 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/247M [00:00<?, ?B/s]

tokenizer_3/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/576 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/372 [00:00<?, ?B/s]

transformer/diffusion_pytorch_model.safe(…):   0%|          | 0.00/4.17G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/739 [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

RuntimeError: Data processing error: CAS service error : IO Error: No space left on device (os error 28)

In [None]:
# generate_sd3_lora_images.py
import os
import torch
from diffusers import StableDiffusion3Pipeline

# ───────────── CONFIG ─────────────
MODEL_ID    = "stabilityai/stable-diffusion-3-medium-diffusers"
LORA_DIR    = "./lora_sd3_panel"    # from train_sd3_lora.py
OUTPUT_ROOT = "./outs_sd3_lora"

# prompts per class
PROMPTS = {
    "snow":  "<MyPanel> covered in fresh snow, photorealistic lighting",
    "dusty": "<MyPanel> scattered with dust and dirt, high detail",
    "crack": "<MyPanel> with fine micro-cracks and chipped cells",
    "cover": "<MyPanel> partially covered by debris, realistic"
}

# how many to generate per class
NUM_IMAGES = {
    "snow":  100,
    "dusty": 100,
    "crack": 100,
    "cover": 100
}

NEG_PROMPT = "lowres, bad anatomy, text"
STEPS      = 30
GUIDANCE   = 1.5
SEED       = 42
# ────────────────────────────────────

def main():
    torch.manual_seed(SEED)
    pipe = StableDiffusion3Pipeline.from_pretrained(
        MODEL_ID, torch_dtype=torch.float16
    ).to("cuda")
    pipe.enable_xformers_memory_efficient_attention()

    # load LoRA adapters into UNet
    pipe.unet.load_attn_procs(LORA_DIR)

    os.makedirs(OUTPUT_ROOT, exist_ok=True)
    for cls, prompt in PROMPTS.items():
        out_dir = os.path.join(OUTPUT_ROOT, cls)
        os.makedirs(out_dir, exist_ok=True)
        for i in range(NUM_IMAGES.get(cls, 0)):
            img = pipe(
                prompt=prompt,
                negative_prompt=NEG_PROMPT,
                num_inference_steps=STEPS,
                guidance_scale=GUIDANCE
            ).images[0]
            fname = f"{cls}_{i:04d}.png"
            img.save(os.path.join(out_dir, fname))
            print(f"✓ Saved {cls}/{fname}")

if __name__ == "__main__":
    main()
