In [1]:
# Controlnet

In [2]:
# train_controlnet.py

import os
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from accelerate import Accelerator
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, UniPCMultistepScheduler
from diffusers.optimization import get_scheduler
from transformers import CLIPTokenizer

# ─────────────── CONFIG ───────────────
# (1) Base SDXL + ControlNet IDs
BASE_MODEL     = "stabilityai/stable-diffusion-xl-base-1.0"
CONTROLNET_ID  = "lllyasviel/sdxl-controlnet-canny"

# (2) Your precomputed folders
EDGE_MAP_DIR   = "./maps"                 # your Canny edge masks (512×512 PNGs)
PHOTO_DIR      = "./dataset/processed_dataset/clean"  # matching real photos

# (3) Prompt to describe *all* images (can be generic)
PROMPT         = "a solar panel front view"

# (4) Training hyperparams
OUTPUT_DIR     = "./controlnet_finetuned"
BATCH_SIZE     = 4
EPOCHS         = 3
LR             = 5e-6
LR_WARMUP      = 500
MAX_TRAIN_STEPS= None     # auto = EPOCHS * steps_per_epoch
RESOLUTION     = 512
SEED           = 42
# ─────────────────────────────────────────

torch.manual_seed(SEED)
accelerator = Accelerator(mixed_precision="fp16")
device = accelerator.device

# ─────────────── Dataset ───────────────
class PanelControlDataset(Dataset):
    def __init__(self, photo_dir, edge_dir, tokenizer, resolution):
        self.photo_paths = sorted([
            os.path.join(photo_dir, f)
            for f in os.listdir(photo_dir)
            if f.lower().endswith((".jpg",".png"))
        ])
        self.edge_paths = sorted([
            os.path.join(edge_dir, f)
            for f in os.listdir(edge_dir)
            if f.lower().endswith((".jpg",".png"))
        ])
        assert len(self.photo_paths) == len(self.edge_paths), "Mismatch count"
        self.tokenizer  = tokenizer
        self.transform  = transforms.Compose([
            transforms.Resize((resolution, resolution)),
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3),
        ])

    def __len__(self):
        return len(self.photo_paths)

    def __getitem__(self, idx):
        # load
        img   = Image.open(self.photo_paths[idx]).convert("RGB")
        edge  = Image.open(self.edge_paths[idx]).convert("RGB")
        # to tensor
        img_t  = self.transform(img)
        edge_t = self.transform(edge)
        # tokenize (same prompt for all)
        tokens = self.tokenizer(
            PROMPT, padding="max_length", truncation=True,
            max_length=77, return_tensors="pt"
        ).input_ids[0]
        return img_t, edge_t, tokens

# build tokenizer + dataset + dataloader
tokenizer = CLIPTokenizer.from_pretrained(BASE_MODEL, subfolder="tokenizer", use_auth_token=True)
dataset   = PanelControlDataset(PHOTO_DIR, EDGE_MAP_DIR, tokenizer, RESOLUTION)
dataloader= DataLoader(
    dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=4, pin_memory=True
)

# ─────────────── Models ───────────────
# 1) ControlNet
controlnet = ControlNetModel.from_pretrained(
    CONTROLNET_ID, torch_dtype=torch.float16, use_auth_token=True
)

# 2) SDXL pipeline (only for scheduler + vae + text encoder)
pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
    BASE_MODEL,
    controlnet=controlnet,
    torch_dtype=torch.float16,
    use_auth_token=True
).to(device)
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)

# 3) Freeze everything except ControlNet
for p in pipeline.unet.parameters():       p.requires_grad = False
for p in pipeline.text_encoder.parameters():p.requires_grad = False
for p in pipeline.vae.parameters():        p.requires_grad = False
for p in pipeline.controlnet.parameters(): p.requires_grad = True

# 4) Optimizer & LR scheduler
optimizer = torch.optim.AdamW(
    pipeline.controlnet.parameters(), lr=LR
)
num_update_steps = (
    (len(dataloader) * EPOCHS)
    if MAX_TRAIN_STEPS is None else MAX_TRAIN_STEPS
)
lr_scheduler = get_scheduler(
    "linear", optimizer,
    num_warmup_steps=LR_WARMUP,
    num_training_steps=num_update_steps
)

# 5) Prepare with Accelerator
pipeline.controlnet, optimizer, dataloader, lr_scheduler = accelerator.prepare(
    pipeline.controlnet, optimizer, dataloader, lr_scheduler
)

# ─────────────── Training Loop ───────────────
global_step = 0
for epoch in range(1, EPOCHS+1):
    for batch in dataloader:
        imgs, edges, input_ids = batch
        imgs      = imgs.to(device)
        edges     = edges.to(device)
        input_ids = input_ids.to(device)

        # 1) encode images to latents + add noise
        latents = pipeline.vae.encode(imgs).latent_dist.sample() * pipeline.vae.config.scaling_factor
        noise   = torch.randn_like(latents)
        timesteps = torch.randint(
            0, pipeline.scheduler.config.num_train_timesteps,
            (latents.shape[0],), device=device
        ).long()
        noisy_latents = pipeline.scheduler.add_noise(latents, noise, timesteps)

        # 2) text embeddings
        encoder_hidden_states = pipeline.text_encoder(input_ids)[0]

        # 3) forward ControlNet+UNet to predict the noise residual
        #    ControlNet will be applied inside the UNet call
        model_pred = pipeline.unet(
            noisy_latents,
            timesteps,
            encoder_hidden_states=encoder_hidden_states,
            controlnet_cond=edges
        ).sample

        # 4) compute loss against the true noise
        loss = F.mse_loss(model_pred, noise)
        accelerator.backward(loss)

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        global_step += 1

    print(f"Epoch {epoch}/{EPOCHS} — last loss: {loss.item():.4f}")

# ─────────────── Save ControlNet ───────────────
os.makedirs(OUTPUT_DIR, exist_ok=True)
pipeline.controlnet.save_pretrained(OUTPUT_DIR)
print("✅ Saved fine-tuned ControlNet to", OUTPUT_DIR)




OSError: There was a specific connection error when trying to load stabilityai/stable-diffusion-xl-base-1.0:
401 Client Error: Unauthorized for url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/tokenizer/tokenizer_config.json (Request ID: Root=1-6874c7a7-35ff4a432860546951be3337;8a404af9-0bc8-4826-afd0-cee45b3787b9)

Invalid credentials in Authorization header