# Train a diffusion model

- 작성일 : 24.07.30  
- 작성자 : 유소영  
- 출처 : https://huggingface.co/docs/diffusers/tutorials/basic_training

커뮤니티와 모델을 공유하는 것을 권장하는데. 이를 위해서는 Hugging Face 계정에 로그인해야 함. 
노트북에서 로그인할 수 있으며, 프롬프트가 나타나면 토큰을 입력. <u>(주의)토큰에 write 권한이 있는지 확인필요.</u>

**시작하기 전에 확인사항 사항**:

1. pip install Datasets 설치. 이미지 데이터셋을 로드하고 전처리하는 데 사용됨.
2. pip install accelerate 설치. 여러 GPU에서의 훈련을 단순화함.
3. pip install wandb 설치. 훈련 지표를 시각화하는 데 사용됨. 

In [None]:
import os 
from huggingface_hub import notebook_login
os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"
notebook_login()

# setting > Access Tokens > (create new token) write > invalidate and refresh button 

**(1) Training configuration**  

    편의를 위해 훈련 하이퍼파라미터를 포함하는 TrainingConfig 클래스를 생성함 (필요에 따라 조정 가능)

In [None]:
from dataclasses import dataclass

@dataclass
class TrainingConfig:
    image_size = 128  # the generated image resolution
    train_batch_size = 16
    eval_batch_size = 16  # how many images to sample during evaluation
    num_epochs = 50
    gradient_accumulation_steps = 1
    learning_rate = 1e-4
    lr_warmup_steps = 500
    save_image_epochs = 10
    save_model_epochs = 30
    mixed_precision = "fp16"  # `no` for float32, `fp16` for automatic mixed precision
    output_dir = "ddpm-butterflies-128"  # the model name locally and on the HF Hub

    push_to_hub = True  # whether to upload the saved model to the HF Hub
    hub_model_id = "soyng/my-awesome-model"  # the name of the repository to create on the HF Hub
    hub_private_repo = False
    overwrite_output_dir = True  # overwrite the old model when re-running the notebook
    seed = 0


config = TrainingConfig()
config.hub_model_id

**(2) Load the dataset**  
    Datasets 라이브러리를 사용하여 Smithsonian Butterflies 데이터셋을 불러옴.


In [None]:
from datasets import load_dataset

config.dataset_name = "huggan/smithsonian_butterflies_subset"
dataset = load_dataset(config.dataset_name, split="train")

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 4, figsize=(16, 4))
for i, image in enumerate(dataset[:4]["image"]):
    axs[i].imshow(image)
    axs[i].set_axis_off()
fig.show()

In [None]:
from torchvision import transforms

preprocess = transforms.Compose(
    [
        transforms.Resize((config.image_size, config.image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)

In [None]:
def transform(examples):
    images = [preprocess(image.convert("RGB")) for image in examples["image"]]
    return {"images": images}

dataset.set_transform(transform)

In [None]:
import torch
train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=config.train_batch_size, shuffle=True)

**(3) Create a UNet2DModel**  
Pretrained models in Diffusers are easily created from their model class with the parameters you want. For example, to create a UNet2DModel

In [None]:
from diffusers import UNet2DModel

model = UNet2DModel(
    sample_size=config.image_size,  # the target image resolution
    in_channels=3,  # the number of input channels, 3 for RGB images
    out_channels=3,  # the number of output channels
    layers_per_block=2,  # how many ResNet layers to use per UNet block
    block_out_channels=(128, 128, 256, 256, 512, 512),  # the number of output channels for each UNet block
    down_block_types=(
        "DownBlock2D",  # a regular ResNet downsampling block
        "DownBlock2D",
        "DownBlock2D",
        "DownBlock2D",
        "AttnDownBlock2D",  # a ResNet downsampling block with spatial self-attention
        "DownBlock2D",
    ),
    up_block_types=(
        "UpBlock2D",  # a regular ResNet upsampling block
        "AttnUpBlock2D",  # a ResNet upsampling block with spatial self-attention
        "UpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
    ),
)

It is often a good idea to quickly check the sample image shape matches the model output shape:

In [None]:
sample_image = dataset[0]["images"].unsqueeze(0)
print("Input shape:", sample_image.shape)
print("Output shape:", model(sample_image, timestep=0).sample.shape)

**(4) Create a scheduler**  

In [None]:
import torch
from PIL import Image
from diffusers.utils import make_image_grid

from diffusers import DDPMScheduler

noise_scheduler = DDPMScheduler(num_train_timesteps=1000)
noise = torch.randn(sample_image.shape)
noisy_images = []
for t in [0,20,100,200,300]:
    timesteps = torch.LongTensor([t])
    noisy_image = noise_scheduler.add_noise(sample_image, noise, timesteps)
    noisy_image = Image.fromarray(((noisy_image.permute(0, 2, 3, 1) + 1.0) * 127.5).type(torch.uint8).numpy()[0])
    noisy_images.append(noisy_image)
make_image_grid(noisy_images, 1,5).show()

Accelerate를 사용하여 모든 구성 요소를 훈련 루프로 묶을 수 있음.
이를 통해 다음과 같은 기능을 쉽게 구현할 수 있습니다: 
  
1. 로깅, 2. 그래디언트 누적, 3. 혼합 정밀도 훈련

(https://huggingface.co/docs/accelerate/basic_tutorials/migration)

**(5) Train the model with tracking (wandb : https://kr.wandb.ai/ )**

By now, you have most of the pieces to start training the model and all that’s left is putting everything together. First, you’ll need an optimizer and a learning rate scheduler:
    
(https://huggingface.co/docs/accelerate/usage_guides/tracking)


In [None]:
import os
import wandb
from accelerate import Accelerator

#(예시) 
os.environ["WANDB_API_KEY"] = #내 토큰을 넣음  
wandb.login()

accelerator = Accelerator(log_with="wandb")
hps = {"num_iterations": 5, "learning_rate": 1e-2}
accelerator.init_trackers("my_project", config=hps)

<u>전체 모델 학습 및 로그 추적 기능을 포함한 코드는 아래와 같습니다.</u> :

In [None]:
from pathlib import Path
from tqdm.auto import tqdm
import torch.nn.functional as F
from accelerate import Accelerator
from diffusers.optimization import get_cosine_schedule_with_warmup

accelerator = Accelerator(log_with="wandb")

accelerator.init_trackers("my_project", config=config)

optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
lr_scheduler = get_cosine_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=config.lr_warmup_steps,
    num_training_steps=(len(train_dataloader) * config.num_epochs),
)

# Accelerator로 모델, 옵티마이저, 데이터로더, 스케줄러 준비
model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
    model, optimizer, train_dataloader, lr_scheduler
)

global_step = 0
config.num_epochs = 20

# 에폭 단위로 훈련 시작
for epoch in range(config.num_epochs):
    
    # 진행 상황을 보여주는 프로그레스 바 설정
    progress_bar = tqdm(total=len(train_dataloader), disable=not accelerator.is_local_main_process)
    progress_bar.set_description(f"Epoch {epoch}")

    # 데이터로더에서 배치 단위로 데이터 로드
    for step, batch in enumerate(train_dataloader):
        clean_images = batch["images"]
        
        # 이미지에 추가할 노이즈 샘플링
        noise = torch.randn(clean_images.shape, device=clean_images.device)
        bs = clean_images.shape[0]
        
        # 각 이미지에 대한 랜덤 타임스텝 샘플링
        timesteps = torch.randint(
            0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device,
            dtype=torch.int64
        )
        
        # 클린 이미지에 노이즈 추가 (순방향 확산 과정)
        noisy_images = noise_scheduler.add_noise(clean_images, noise, timesteps)
        
        # 그래디언트 누적을 사용한 모델 훈련
        with accelerator.accumulate(model):
            
            # 노이즈 잔차 예측
            noise_pred = model(noisy_images, timesteps, return_dict=False)[0]
            # 손실 계산
            loss = F.mse_loss(noise_pred, noise)
            # 역전파
            accelerator.backward(loss)
            # 그래디언트 클리핑
            accelerator.clip_grad_norm_(model.parameters(), 1.0)
            # 옵티마이저 스텝
            optimizer.step()
            # 학습률 스케줄러 스텝
            lr_scheduler.step()
            # 그래디언트 초기화
            optimizer.zero_grad()

        # 프로그레스 바 업데이트
        progress_bar.update(1)
        
        # 로그 정보 기록
        logs = {"loss": loss.detach().item()}#, "lr": lr_scheduler.get_last_lr()[0], "step": global_step}
        progress_bar.set_postfix(**logs)
        accelerator.log(logs, step=global_step)
        global_step += 1

accelerator.end_training()

**(6) model evaluate**

In [None]:
from diffusers import DDPMPipeline
from diffusers.utils import make_image_grid
import os

def evaluate(config, epoch, pipeline):
    # Sample some images from random noise (this is the backward diffusion process).
    # The default pipeline output type is `List[PIL.Image]`
    images = pipeline(
        batch_size=config.eval_batch_size,
        generator=torch.Generator(device='cpu').manual_seed(config.seed), # Use a separate torch generator to avoid rewinding the random state of the main training loop
    ).images

    # Make a grid out of the images
    image_grid = make_image_grid(images, rows=4, cols=4)

    # Save the images
    test_dir = os.path.join(config.output_dir, "samples")
    os.makedirs(test_dir, exist_ok=True)
    image_grid.save(f"{test_dir}/{epoch:04d}.png")
    image_grid.show()

In [None]:
# 에폭 종료 후 작업 (메인 프로세스에서만 실행)
if accelerator.is_main_process:
    
    # 파이프라인 생성
    pipeline = DDPMPipeline(unet=accelerator.unwrap_model(model), scheduler=noise_scheduler) # prepare()model 로 추가 가능한 추가 레이어에서 래핑을 풉니다 . 모델을 저장하기 전에 유용합니다.
    
    # 데모 이미지 생성 및 평가
    epoch = config.num_epochs
    evaluate(config, epoch, pipeline)

**(7) Push to hub (option)**

    config.hub_model_id에 설정된 정보를 이용하여 폴더자체를 Hub에 푸시합니다.

In [None]:
from huggingface_hub import create_repo, upload_folder

if config.push_to_hub:
    repo_id = create_repo(repo_id=config.hub_model_id or Path(config.output_dir).name
                          , exist_ok=True).repo_id

    pipeline.save_pretrained(config.output_dir)
    upload_folder(
        repo_id=repo_id,
        folder_path=config.output_dir,
        commit_message=f"Epoch {epoch}",
        ignore_patterns=["epoch_*"],)
    
else:
    # 로컬에 모델 저장
    pipeline.save_pretrained(config.output_dir)