# **Text-to-image**

    - 작성일 : 24.07.30  
    - 작성자 : 유소영  
    - 출처 : 
        https://huggingface.co/docs/diffusers/training/text2image
        https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image.py

In [3]:
import argparse
import logging
import math
import os
import random
import shutil
from pathlib import Path

import accelerate
import datasets
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.state import AcceleratorState
from accelerate.utils import ProjectConfiguration, set_seed
from datasets import load_dataset
from huggingface_hub import create_repo, upload_folder
from packaging import version


from torchvision import transforms
from tqdm.auto import tqdm
from transformers import CLIPTextModel, CLIPTokenizer
from transformers.utils import ContextManagers

import diffusers
from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from diffusers.optimization import get_scheduler
from diffusers.training_utils import EMAModel, compute_snr
from diffusers.utils import check_min_version, deprecate, is_wandb_available, make_image_grid
from diffusers.utils.import_utils import is_xformers_available

if is_wandb_available():
    import wandb


# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.30.0.dev0") #이 함수는 현재 설치된 Diffusers 라이브러리의 버전이 최소 요구 버전("0.30.0.dev0") 이상인지 확인합니다.

logger = get_logger(__name__, log_level="INFO") # 현재 실행 중인 모듈 전체, 로그 레벨은 "INFO"로 설정되어 있어, 정보성 메시지부터 에러까지 모두 기록됩니다.

DATASET_NAME_MAPPING = { 
    "soyng/wheel-web": ("image", "text"), #  데이터셋 이름과 해당 데이터셋의 컬럼 구조를 매핑하는 딕셔너리
}


In [4]:
from huggingface_hub import notebook_login
os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"
notebook_login()

# setting > Access Tokens > (create new token) write > invalidate and refresh button 

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
def save_model_card(args, repo_id: str, images=None, repo_folder=None, ):
    yaml = f"""
            ---
            license: creativeml-openrail-m
            base_model: {args.pretrained_model_name_or_path}
            datasets:
            - {args.dataset_name}
            tags:
            - stable-diffusion
            - stable-diffusion-diffusers
            - text-to-image
            - diffusers
            inference: true
            ---
                """
    model_card = f"""
            # Text-to-image finetuning - {repo_id}
            
            This pipeline was finetuned from **{args.pretrained_model_name_or_path}** on the **{args.dataset_name}** dataset. Below are some example images generated with the finetuned pipeline using the following prompts: {args.validation_prompts}: \n
            {img_str}
            
            ## Pipeline usage
            
            You can use the pipeline like so:
            
            ```python
            from diffusers import DiffusionPipeline
            import torch
            
            pipeline = DiffusionPipeline.from_pretrained("{repo_id}", torch_dtype=torch.float16)
            prompt = "{args.validation_prompts[0]}"
            image = pipeline(prompt).images[0]
            image.save("my_image.png")
            ```

            ## Training info
            
            These are the key hyperparameters used during training:
            
            * Epochs: {args.num_train_epochs}
            * Learning rate: {args.learning_rate}
            * Batch size: {args.train_batch_size}
            * Gradient accumulation steps: {args.gradient_accumulation_steps}
            * Image resolution: {args.resolution}
            * Mixed-precision: {args.mixed_precision}
            
            """
    wandb_info = ""
    if is_wandb_available():
        wandb_run_url = None
        if wandb.run is not None:
            wandb_run_url = wandb.run.url

    if wandb_run_url is not None:
        wandb_info = f"""
                      More information on all the CLI arguments and the environment are available on your [`wandb` run page]({wandb_run_url}).
                     """

    model_card += wandb_info

    with open(os.path.join(repo_folder, "README.md"), "w") as f:
        f.write(yaml + model_card)


def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight_dtype, epoch):
    logger.info("검증 실행 중... ")
    
    # Stable Diffusion 파이프라인 생성
    pipeline = StableDiffusionPipeline.from_pretrained(
        args.pretrained_model_name_or_path,
        vae=accelerator.unwrap_model(vae),
        text_encoder=accelerator.unwrap_model(text_encoder),
        tokenizer=tokenizer,
        unet=accelerator.unwrap_model(unet),
        safety_checker=None,
        revision=args.revision,
        torch_dtype=weight_dtype,
    )
    
    # 파이프라인을 현재 디바이스로 이동
    pipeline = pipeline.to(accelerator.device)
    
    # 진행 바 비활성화
    pipeline.set_progress_bar_config(disable=True)
    
    # xformers 메모리 효율적 어텐션 활성화 (옵션)
    if args.enable_xformers_memory_efficient_attention:
        pipeline.enable_xformers_memory_efficient_attention()
    
    # 랜덤 시드 설정
    if args.seed is None:
        generator = None
    else:
        generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
    
    images = []
    # 각 검증 프롬프트에 대해 이미지 생성
    for i in range(4):
        with torch.autocast("cuda"):
            image = pipeline(args.validation_prompts, num_inference_steps=20, generator=generator).images[0]
        images.append(image)
    
    # 트래커에 이미지 로깅
    for tracker in accelerator.trackers:
        if tracker.name == "tensorboard":
            # TensorBoard에 이미지 로깅
            np_images = np.stack([np.asarray(img) for img in images])
            tracker.writer.add_images("validation", np_images, epoch, dataformats="NHWC")
        elif tracker.name == "wandb":
            # Weights & Biases에 이미지 로깅
            tracker.log(
                {
                    "validation": [
                        wandb.Image(image, caption=f"{i}: {args.validation_prompts}")
                        for i, image in enumerate(images)
                    ]
                }
            )
        else:
            logger.warn(f"{tracker.name}에 대한 이미지 로깅이 구현되지 않았습니다")
    
    # 메모리 정리
    del pipeline
    torch.cuda.empty_cache()
    
    return images


In [6]:
from dataclasses import dataclass, field
from typing import Optional, List

@dataclass
class TrainingConfig:
    input_perturbation: float = 0
    pretrained_model_name_or_path: str = 'CompVis/stable-diffusion-v1-4'  # 사전 훈련된 모델 경로
    revision: Optional[str] = None  # 모델 리비전
    dataset_name: Optional[str] =  None # 데이터셋 이름
    dataset_config_name: Optional[str] = None  # 데이터셋 설정
    train_data_dir: Optional[str] =  '../data'  # 훈련 데이터 디렉토리
    image_column: str = "image"
    caption_column: str = "text"
    max_train_samples: Optional[int] = None  # 최대 훈련 샘플 수
    validation_prompts: Optional[List[str]] = 'High-performance car wheel rim, detailed 3D rendering'  # 검증 프롬프트
    output_dir: str = 'sd-model-finetuned'
    cache_dir: Optional[str] = None  # 캐시 디렉토리
    seed: Optional[int] = None  # 랜덤 시드
    resolution: int = 512
    center_crop: bool = False
    random_flip: bool = False
    train_batch_size: int = 32
    num_train_epochs: int = 60
    max_train_steps: Optional[int] = None  # 최대 훈련 스텝 수
    gradient_accumulation_steps: int = 1
    gradient_checkpointing: bool = False
    learning_rate: float = 1e-5
    scale_lr: bool = False
    lr_scheduler: str = "constant"
    lr_warmup_steps: int = 500
    snr_gamma: Optional[float] = None  # SNR 가중치 감마
    use_8bit_adam: bool = False
    allow_tf32: bool = False
    use_ema: bool = False
    non_ema_revision: Optional[str] = None  # 비 EMA 모델 리비전
    dataloader_num_workers: int = 0
    adam_beta1: float = 0.9
    adam_beta2: float = 0.999
    adam_weight_decay: float = 1e-2
    adam_epsilon: float = 1e-08
    max_grad_norm: float = 1.0
    push_to_hub: bool = True
    hub_token: Optional[str] = None  # 모델 허브 토큰
    prediction_type: Optional[str] = None  # 예측 타입
    hub_model_id: Optional[str] = 'soyng/photorealistic-wheel-v1-0' # 모델 허브 ID
    logging_dir: str = 'logs'
    mixed_precision: Optional[str] = None  # 혼합 정밀도 설정
    report_to: str = 'wandb'
    local_rank: int = -1
    checkpointing_steps: int = 500
    checkpoints_total_limit: Optional[int] = None  # 최대 체크포인트 수
    resume_from_checkpoint: Optional[str] = 'checkpoint-8500'  # 체크포인트에서 재개
    enable_xformers_memory_efficient_attention: bool = False
    noise_offset: float = 0
    validation_epochs: int = 5
    tracker_project_name: str = 'CompVis_stable-diffusion-v1-4-fine-tune'
    
# 설정 인스턴스 생성
config = TrainingConfig()

# 환경 변수에서 LOCAL_RANK 가져오기
import os
env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
if env_local_rank != -1 and env_local_rank != config.local_rank:
    config.local_rank = env_local_rank

# Sanity checks
if config.dataset_name is None and config.train_data_dir is None:
    raise ValueError("Need either a dataset name or a training folder.")

# default to using the same revision for the non-ema model if not specified
if config.non_ema_revision is None:
    config.non_ema_revision = config.revision

In [None]:
def main():
    args = TrainingConfig()

    # non_ema_revision이 None이 아닌 경우 경고 메시지 출력
    if args.non_ema_revision is not None:
        deprecate(
            "non_ema_revision!=None",
            "0.15.0",
            message=(
                "Hub에서 'non_ema' 가중치를 리비전 브랜치에서 다운로드하는 것은 더 이상 사용되지 않습니다. "
                "`--variant=non_ema`를 대신 사용해주세요."
            ),
        )
    
    # 로깅 디렉토리 설정
    logging_dir = os.path.join(args.output_dir, args.logging_dir)

    # Accelerator 프로젝트 설정
    accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)

    # Accelerator 초기화
    accelerator = Accelerator(
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        mixed_precision=args.mixed_precision,
        log_with=args.report_to,
        project_config=accelerator_project_config,
    )

    # 로깅 설정
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    logger.info(accelerator.state, main_process_only=False)
    
    # 메인 프로세스에서만 로깅 레벨 설정
    if accelerator.is_local_main_process:
        datasets.utils.logging.set_verbosity_warning()
        transformers.utils.logging.set_verbosity_warning()
        diffusers.utils.logging.set_verbosity_info()
    else:
        datasets.utils.logging.set_verbosity_error()
        transformers.utils.logging.set_verbosity_error()
        diffusers.utils.logging.set_verbosity_error()

    # 시드 설정
    if args.seed is not None:
        set_seed(args.seed)

    # 출력 디렉토리 생성 및 Hub 관련 설정
    if accelerator.is_main_process:
        if args.output_dir is not None:
            os.makedirs(args.output_dir, exist_ok=True)

        if args.push_to_hub:
            repo_id = create_repo(
                repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token
            ).repo_id

    # 스케줄러, 토크나이저, 모델 로드
    noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
    tokenizer = CLIPTokenizer.from_pretrained(
        args.pretrained_model_name_or_path, subfolder="tokenizer", revision=args.revision
    )

    # Deepspeed ZeRO 초기화 비활성화 컨텍스트 매니저 정의
    def deepspeed_zero_init_disabled_context_manager():
        deepspeed_plugin = AcceleratorState().deepspeed_plugin if accelerate.state.is_initialized() else None
        if deepspeed_plugin is None:
            return []
        return [deepspeed_plugin.zero3_init_context_manager(enable=False)]

    # 텍스트 인코더와 VAE 모델 로드
    with ContextManagers(deepspeed_zero_init_disabled_context_manager()):
        text_encoder = CLIPTextModel.from_pretrained(
            args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision
        )
        vae = AutoencoderKL.from_pretrained(
            args.pretrained_model_name_or_path, subfolder="vae", revision=args.revision
        )

    # UNet 모델 로드
    unet = UNet2DConditionModel.from_pretrained(
        args.pretrained_model_name_or_path, subfolder="unet", revision=args.non_ema_revision
    )

    # VAE와 텍스트 인코더를 고정하고 UNet을 훈련 가능하게 설정
    vae.requires_grad_(False)
    text_encoder.requires_grad_(False)
    unet.train()

    # UNet의 EMA 모델 생성 (옵션)
    if args.use_ema:
        ema_unet = UNet2DConditionModel.from_pretrained(
            args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision
        )
        ema_unet = EMAModel(ema_unet.parameters(), model_cls=UNet2DConditionModel, model_config=ema_unet.config)

    # xformers 메모리 효율적 어텐션 활성화 (옵션)
    if args.enable_xformers_memory_efficient_attention:
        if is_xformers_available():
            import xformers
            xformers_version = version.parse(xformers.__version__)
            if xformers_version == version.parse("0.0.16"):
                logger.warn(
                    "xFormers 0.0.16은 일부 GPU에서 훈련에 사용할 수 없습니다. 훈련 중 문제가 발생하면 xFormers를 최소 0.0.17 버전으로 업데이트하세요."
                )
            unet.enable_xformers_memory_efficient_attention()
        else:
            raise ValueError("xformers를 사용할 수 없습니다. 올바르게 설치되었는지 확인하세요.")

    # 커스텀 저장 및 로딩 훅 정의
    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
        def save_model_hook(models, weights, output_dir):
            if accelerator.is_main_process:
                if args.use_ema:
                    ema_unet.save_pretrained(os.path.join(output_dir, "unet_ema"))

                for i, model in enumerate(models):
                    model.save_pretrained(os.path.join(output_dir, "unet"))
                    weights.pop()

        def load_model_hook(models, input_dir):
            if args.use_ema:
                load_model = EMAModel.from_pretrained(os.path.join(input_dir, "unet_ema"), UNet2DConditionModel)
                ema_unet.load_state_dict(load_model.state_dict())
                ema_unet.to(accelerator.device)
                del load_model

            for i in range(len(models)):
                model = models.pop()
                load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet")
                model.register_to_config(**load_model.config)
                model.load_state_dict(load_model.state_dict())
                del load_model

        accelerator.register_save_state_pre_hook(save_model_hook)
        accelerator.register_load_state_pre_hook(load_model_hook)

    # 그래디언트 체크포인팅 활성화 (옵션)
    if args.gradient_checkpointing:
        unet.enable_gradient_checkpointing()

    # TF32 정밀도 허용 (옵션)
    if args.allow_tf32:
        torch.backends.cuda.matmul.allow_tf32 = True

    # 학습률 스케일링 (옵션)
    if args.scale_lr:
        args.learning_rate = (
            args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
        )

    # 8비트 Adam 옵티마이저 사용 (옵션)
    if args.use_8bit_adam:
        try:
            import bitsandbytes as bnb
        except ImportError:
            raise ImportError("8비트 Adam을 사용하려면 bitsandbytes를 설치하세요. `pip install bitsandbytes`로 설치할 수 있습니다.")
        optimizer_cls = bnb.optim.AdamW8bit
    else:
        optimizer_cls = torch.optim.AdamW

    # 옵티마이저 초기화
    optimizer = optimizer_cls(
        unet.parameters(),
        lr=args.learning_rate,
        betas=(args.adam_beta1, args.adam_beta2),
        weight_decay=args.adam_weight_decay,
        eps=args.adam_epsilon,
    )

    # 데이터셋 로드
    if args.dataset_name is not None:
        dataset = load_dataset(
            args.dataset_name,
            args.dataset_config_name,
            cache_dir=args.cache_dir,
            data_dir=args.train_data_dir,
        )
    else:
        data_files = {}
        if args.train_data_dir is not None:
            data_files["train"] = os.path.join(args.train_data_dir, "**")
        dataset = load_dataset(
            "imagefolder",
            data_files=data_files,
            cache_dir=args.cache_dir,
        )

    # 데이터셋 컬럼 이름 설정
    column_names = dataset["train"].column_names

    dataset_columns = DATASET_NAME_MAPPING.get(args.dataset_name, None)
    if args.image_column is None:
        image_column = dataset_columns[0] if dataset_columns is not None else column_names[0]
    else:
        image_column = args.image_column
        if image_column not in column_names:
            raise ValueError(f"--image_column 값 '{args.image_column}'은 다음 중 하나여야 합니다: {', '.join(column_names)}")
    if args.caption_column is None:
        caption_column = dataset_columns[1] if dataset_columns is not None else column_names[1]
    else:
        caption_column = args.caption_column
        if caption_column not in column_names:
            raise ValueError(f"--caption_column 값 '{args.caption_column}'은 다음 중 하나여야 합니다: {', '.join(column_names)}")

    # 캡션 토큰화 함수 정의
    def tokenize_captions(examples, is_train=True):
        captions = []
        for caption in examples[caption_column]:
            if isinstance(caption, str):
                captions.append(caption)
            elif isinstance(caption, (list, np.ndarray)):
                captions.append(random.choice(caption) if is_train else caption[0])
            else:
                raise ValueError(f"캡션 컬럼 `{caption_column}`은 문자열 또는 문자열 리스트를 포함해야 합니다.")
        inputs = tokenizer(
            captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        return inputs.input_ids

    # 훈련 데이터 변환 정의
    train_transforms = transforms.Compose(
        [
            transforms.Resize(args.resolution, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(args.resolution) if args.center_crop else transforms.RandomCrop(args.resolution),
            transforms.RandomHorizontalFlip() if args.random_flip else transforms.Lambda(lambda x: x),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),
        ]
    )

    # 훈련 데이터 전처리 함수 정의
    def preprocess_train(examples):
        images = [image.convert("RGB") for image in examples[image_column]]
        examples["pixel_values"] = [train_transforms(image) for image in images]
        examples["input_ids"] = tokenize_captions(examples)
        return examples

    # 데이터셋 전처리 및 샘플링
    with accelerator.main_process_first():
        if args.max_train_samples is not None:
            dataset["train"] = dataset["train"].shuffle(seed=args.seed).select(range(args.max_train_samples))
        train_dataset = dataset["train"].with_transform(preprocess_train)

    # 데이터 로더용 콜레이트 함수 정의
    def collate_fn(examples):
        pixel_values = torch.stack([example["pixel_values"] for example in examples])
        pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
        input_ids = torch.stack([example["input_ids"] for example in examples])
        return {"pixel_values": pixel_values, "input_ids": input_ids}

    # 데이터 로더 생성
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        shuffle=True,
        collate_fn=collate_fn,
        batch_size=args.train_batch_size,
        num_workers=args.dataloader_num_workers,
    )

    # 훈련 스텝 수 계산
    overrode_max_train_steps = False
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
    if args.max_train_steps is None:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
        overrode_max_train_steps = True

    # 학습률 스케줄러 초기화
    lr_scheduler = get_scheduler(
        args.lr_scheduler,
        optimizer=optimizer,
        num_warmup_steps=args.lr_warmup_steps * accelerator.num_processes,
        num_training_steps=args.max_train_steps * accelerator.num_processes,
    )

    # Accelerator를 사용하여 모델, 옵티마이저, 데이터 로더, 학습률 스케줄러 준비
    unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
        unet, optimizer, train_dataloader, lr_scheduler
    )

    # EMA 모델을 디바이스로 이동 (옵션)
    if args.use_ema:
        ema_unet.to(accelerator.device)

    # 가중치 데이터 타입 설정
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":
        weight_dtype = torch.float16
        args.mixed_precision = accelerator.mixed_precision
    elif accelerator.mixed_precision == "bf16":
        weight_dtype = torch.bfloat16
        args.mixed_precision = accelerator.mixed_precision

    # 텍스트 인코더와 VAE를 지정된 데이터 타입으로 변환
    text_encoder.to(accelerator.device, dtype=weight_dtype)
    vae.to(accelerator.device, dtype=weight_dtype)

    # 훈련 파라미터 재계산
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
    if overrode_max_train_steps:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
    args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)

    # 트래커 초기화 (메인 프로세스에서만)
    if accelerator.is_main_process:
        tracker_config = dict(vars(args))
        tracker_config.pop("validation_prompts")
        accelerator.init_trackers(args.tracker_project_name, tracker_config)

    # 훈련 시작
    total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

    logger.info("***** 훈련 시작 *****")
    logger.info(f"  총 샘플 수 = {len(train_dataset)}")
    logger.info(f"  에포크 수 = {args.num_train_epochs}")
    logger.info(f"  디바이스당 배치 크기 = {args.train_batch_size}")
    logger.info(f"  총 훈련 배치 크기 (병렬, 분산 & 누적) = {total_batch_size}")
    logger.info(f"  그래디언트 누적 단계 = {args.gradient_accumulation_steps}")
    logger.info(f"  총 최적화 단계 = {args.max_train_steps}")
    
    global_step = 0
    first_epoch = 0

    # 체크포인트에서 재개 (옵션)
    if args.resume_from_checkpoint:
        # 체크포인트 경로 설정
        if args.resume_from_checkpoint != "latest":
            path = os.path.basename(args.resume_from_checkpoint)
        else:
            # 가장 최근 체크포인트 찾기
            dirs = os.listdir(args.output_dir)
            dirs = [d for d in dirs if d.startswith("checkpoint")]
            dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
            path = dirs[-1] if len(dirs) > 0 else None

        if path is None:
            accelerator.print(f"체크포인트 '{args.resume_from_checkpoint}'가 존재하지 않습니다. 새로운 훈련을 시작합니다.")
            args.resume_from_checkpoint = None
            initial_global_step = 0
        else:
            accelerator.print(f"체크포인트 {path}에서 재개합니다")
            accelerator.load_state(os.path.join(args.output_dir, path))
            global_step = int(path.split("-")[1])

            initial_global_step = global_step
            first_epoch = global_step // num_update_steps_per_epoch
    else:
        initial_global_step = 0

    progress_bar = tqdm(
        range(0, args.max_train_steps),
        initial=initial_global_step,
        desc="Steps",
        disable=not accelerator.is_local_main_process,
    )

    # 훈련 루프
    for epoch in range(first_epoch, args.num_train_epochs):
        train_loss = 0.0
        for step, batch in enumerate(train_dataloader):
            with accelerator.accumulate(unet):
                # 이미지를 잠재 공간으로 변환
                latents = vae.encode(batch["pixel_values"].to(weight_dtype)).latent_dist.sample()
                latents = latents * vae.config.scaling_factor

                # 노이즈 샘플링
                noise = torch.randn_like(latents)
                if args.noise_offset:
                    # https://www.crosslabs.org//blog/diffusion-with-offset-noise
                    noise += args.noise_offset * torch.randn(
                        (latents.shape[0], latents.shape[1], 1, 1), device=latents.device
                    )
                if args.input_perturbation:
                    new_noise = noise + args.input_perturbation * torch.randn_like(noise)
                
                bsz = latents.shape[0]
                # 각 이미지에 대해 랜덤 타임스텝 샘플링
                timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
                timesteps = timesteps.long()

                # 노이즈 추가 (forward diffusion process)
                if args.input_perturbation:
                    noisy_latents = noise_scheduler.add_noise(latents, new_noise, timesteps)
                else:
                    noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

                # 텍스트 임베딩 얻기
                encoder_hidden_states = text_encoder(batch["input_ids"])[0]

                # 예측 타입에 따른 타겟 설정
                if args.prediction_type is not None:
                    noise_scheduler.register_to_config(prediction_type=args.prediction_type)

                if noise_scheduler.config.prediction_type == "epsilon":
                    target = noise
                elif noise_scheduler.config.prediction_type == "v_prediction":
                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
                else:
                    raise ValueError(f"알 수 없는 예측 타입 {noise_scheduler.config.prediction_type}")

                # 노이즈 잔차 예측 및 손실 계산
                model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample

                if args.snr_gamma is None:
                    loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
                else:
                    # SNR 가중치 계산 (https://arxiv.org/abs/2303.09556)
                    snr = compute_snr(noise_scheduler, timesteps)
                    if noise_scheduler.config.prediction_type == "v_prediction":
                        snr = snr + 1
                    mse_loss_weights = (
                        torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
                    )

                    loss = F.mse_loss(model_pred.float(), target.float(), reduction="none")
                    loss = loss.mean(dim=list(range(1, len(loss.shape)))) * mse_loss_weights
                    loss = loss.mean()

                # 손실 평균 계산
                avg_loss = accelerator.gather(loss.repeat(args.train_batch_size)).mean()
                train_loss += avg_loss.item() / args.gradient_accumulation_steps

                # 역전파
                accelerator.backward(loss)
                if accelerator.sync_gradients:
                    accelerator.clip_grad_norm_(unet.parameters(), args.max_grad_norm)
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()

            # Accelerator가 최적화 단계를 수행했는지 확인
            if accelerator.sync_gradients:
                if args.use_ema:
                    ema_unet.step(unet.parameters())
                progress_bar.update(1)
                global_step += 1
                accelerator.log({"train_loss": train_loss}, step=global_step)
                train_loss = 0.0

                # 체크포인트 저장
                if global_step % args.checkpointing_steps == 0:
                    if accelerator.is_main_process:
                        # 체크포인트 제한 확인
                        if args.checkpoints_total_limit is not None:
                            checkpoints = os.listdir(args.output_dir)
                            checkpoints = [d for d in checkpoints if d.startswith("checkpoint")]
                            checkpoints = sorted(checkpoints, key=lambda x: int(x.split("-")[1]))

                            # 체크포인트 제거
                            if len(checkpoints) >= args.checkpoints_total_limit:
                                num_to_remove = len(checkpoints) - args.checkpoints_total_limit + 1
                                removing_checkpoints = checkpoints[0:num_to_remove]

                                logger.info(
                                    f"{len(checkpoints)} 체크포인트가 이미 존재합니다. {len(removing_checkpoints)} 체크포인트를 제거합니다."
                                )
                                logger.info(f"제거할 체크포인트: {', '.join(removing_checkpoints)}")

                                for removing_checkpoint in removing_checkpoints:
                                    removing_checkpoint = os.path.join(args.output_dir, removing_checkpoint)
                                    shutil.rmtree(removing_checkpoint)

                        save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
                        accelerator.save_state(save_path)
                        logger.info(f"체크포인트 저장 위치: {save_path}")

            logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
            progress_bar.set_postfix(**logs)

            if global_step >= args.max_train_steps:
                break

        # 검증 (옵션)
        if accelerator.is_main_process:
            if args.validation_prompts is not None and epoch % args.validation_epochs == 0:
                if args.use_ema:
                    # EMA 매개변수로 임시 교체
                    ema_unet.store(unet.parameters())
                    ema_unet.copy_to(unet.parameters())
                log_validation(
                    vae,
                    text_encoder,
                    tokenizer,
                    unet,
                    args,
                    accelerator,
                    weight_dtype,
                    global_step,
                )
                if args.use_ema:
                    # 원래 UNet 매개변수로 복원
                    ema_unet.restore(unet.parameters())

    # 훈련된 모델을 사용하여 파이프라인 생성 및 저장
    accelerator.wait_for_everyone()
    if accelerator.is_main_process:
        unet = accelerator.unwrap_model(unet)
        if args.use_ema:
            ema_unet.copy_to(unet.parameters())

        pipeline = StableDiffusionPipeline.from_pretrained(
            args.pretrained_model_name_or_path,
            text_encoder=text_encoder,
            vae=vae,
            unet=unet,
            revision=args.revision,
        )
        pipeline.save_pretrained(args.output_dir)

        # 최종 추론 실행
        images = []
        if args.validation_prompts is not None:
            logger.info("생성된 이미지 수집을 위한 추론 실행 중...")
            pipeline = pipeline.to(accelerator.device)
            pipeline.torch_dtype = weight_dtype
            pipeline.set_progress_bar_config(disable=True)

            if args.enable_xformers_memory_efficient_attention:
                pipeline.enable_xformers_memory_efficient_attention()

            if args.seed is None:
                generator = None
                
            else:
                generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)

            for i in range(len(args.validation_prompts)):
                with torch.autocast("cuda"):
                    image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
                images.append(image)

        if args.push_to_hub:
            save_model_card(args, repo_id, images, repo_folder=args.output_dir)
            upload_folder(
                repo_id=repo_id,
                folder_path=args.output_dir,
                commit_message="훈련 종료",
                ignore_patterns=["step_*", "epoch_*"],
            )

    accelerator.end_training()

if __name__ == "__main__":
    main()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
08/01/2024 02:51:06 - INFO - __main__ - Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: no

{'clip_sample_range', 'prediction_type', 'rescale_betas_zero_snr', 'sample_max_value', 'thresholding', 'timestep_spacing', 'variance_type', 'dynamic_thresholding_ratio'} was not found in config. Values will be initialized to default values.
{'use_quant_conv', 'shift_factor', 'use_post_quant_conv', 'norm_num_groups', 'latents_mean', 'latents_std', 'force_upcast'} was not found in config. Values will be initialized to default values.
{'only_cross_attention', 'conv_in_kernel', 'transformer_layers_per_block', 'conv_out_kernel', 'encoder_hid_dim', 'time_embedding_dim', 'dropout', 'time_cond_proj_dim', 'dual_cross_attention', 'num_attention_he

Resolving data files:   0%|          | 0/4967 [00:00<?, ?it/s]

[34m[1mwandb[0m: Currently logged in as: [33msoyoung9306[0m ([33msoyoung9306-slack[0m). Use [1m`wandb login --relogin`[0m to force relogin


08/01/2024 02:51:18 - INFO - __main__ - ***** 훈련 시작 *****
08/01/2024 02:51:18 - INFO - __main__ -   총 샘플 수 = 4962
08/01/2024 02:51:18 - INFO - __main__ -   에포크 수 = 60
08/01/2024 02:51:18 - INFO - __main__ -   디바이스당 배치 크기 = 32
08/01/2024 02:51:18 - INFO - __main__ -   총 훈련 배치 크기 (병렬, 분산 & 누적) = 32
08/01/2024 02:51:18 - INFO - __main__ -   그래디언트 누적 단계 = 1
08/01/2024 02:51:18 - INFO - __main__ -   총 최적화 단계 = 9360
08/01/2024 02:51:18 - INFO - accelerate.accelerator - Loading states from sd-model-finetuned/checkpoint-8500


체크포인트 checkpoint-8500에서 재개합니다


08/01/2024 02:51:19 - INFO - accelerate.checkpointing - All model weights loaded successfully
08/01/2024 02:51:23 - INFO - accelerate.checkpointing - All optimizer states loaded successfully
08/01/2024 02:51:23 - INFO - accelerate.checkpointing - All scheduler states loaded successfully
08/01/2024 02:51:23 - INFO - accelerate.checkpointing - All dataloader sampler states loaded successfully
08/01/2024 02:51:23 - INFO - accelerate.checkpointing - All random states loaded successfully
08/01/2024 02:51:23 - INFO - accelerate.accelerator - Loading in 0 custom states


Steps:  91%|######### | 8500/9360 [00:00<?, ?it/s]

08/01/2024 03:14:18 - INFO - __main__ - 검증 실행 중... 
{'requires_safety_checker', 'image_encoder'} was not found in config. Values will be initialized to default values.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Loaded feature_extractor as CLIPImageProcessor from `feature_extractor` subfolder of CompVis/stable-diffusion-v1-4.
{'timestep_spacing', 'prediction_type'} was not found in config. Values will be initialized to default values.
Loaded scheduler as PNDMScheduler from `scheduler` subfolder of CompVis/stable-diffusion-v1-4.
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
08/01/2024 03:28:12 - INFO

In [None]:
args = TrainingConfig()
if config.push_to_hub:
    repo_id = create_repo(repo_id=config.hub_model_id or Path(config.output_dir).name
                          , exist_ok=True).repo_id
    #save_model_card(args, repo_id, None, repo_folder=args.output_dir)
    upload_folder(
        repo_id=repo_id,
        folder_path=config.output_dir,
        commit_message=f"Epoch {config.num_train_epochs}",
        ignore_patterns=["epoch_*"],)
    


- empty or missing yaml metadata in repo card


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]