In [3]:
import os
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
os.environ['HUGGINGFACEHUB_API_TOKEN'] = user_secrets.get_secret("HUGGINGFACEHUB_API_TOKEN")
os.environ['WANDB_API_KEY'] = user_secrets.get_secret("WANDB_API_KEY ")

from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACEHUB_API_TOKEN")
login(token = hf_token)

In [4]:
pwd

'/kaggle/working'

In [9]:
!mkdir src
!mkdir src/utils

In [10]:
%%writefile src/utils/exp_utils.py
import os

def create_exp_dir(exp_name):
    os.makedirs('exps', exist_ok=True)
    exp_dir = os.path.join('exps', exp_name)
    os.makedirs(exp_dir, exist_ok=True)

    sub_dirs = ['checkpoints', 'configs', 'data', 'results']

    for dir in sub_dirs:
        dir_path = os.path.join(exp_dir, dir)
        os.makedirs(dir_path, exist_ok=True)
    
    results_dir = os.path.join(exp_dir, 'results')
    checkpoints_dir = os.path.join(exp_dir, 'checkpoints')
    data_dir = os.path.join(exp_dir, 'data')
    configs_dir = os.path.join(exp_dir, 'configs')

    return (
        exp_dir, 
        configs_dir,
        data_dir, 
        checkpoints_dir,
        results_dir
    )



def summarize_results():
    pass

Writing src/utils/exp_utils.py


In [11]:
%%writefile src/utils/log_utils.py
import logging
from colorama import Fore, Style, init

def init_logging() -> None:
    """Initialize logging with colored output."""
    init(autoreset=True)  # Initialize colorama for cross-platform compatibility

    class ColorFormatter(logging.Formatter):
        """Custom formatter to add colors to log levels."""
        LOG_COLORS = {
            logging.DEBUG: Fore.CYAN,
            logging.INFO: Fore.GREEN,
            logging.WARNING: Fore.YELLOW,
            logging.ERROR: Fore.RED,
            logging.CRITICAL: Fore.RED + Style.BRIGHT,
        }

        def format(self, record):
            log_color = self.LOG_COLORS.get(record.levelno, "")
            record.levelname = f"{log_color}{record.levelname}{Style.RESET_ALL}"
            return super().format(record)

    handler = logging.StreamHandler()
    formatter = ColorFormatter(
        fmt="%(asctime)s %(levelname)s: %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S"
    )
    handler.setFormatter(formatter)

    logging.basicConfig(level=logging.INFO, handlers=[handler])

# # Example usage
# if __name__ == "__main__":
#     init_logging()
#     logging.debug("This is a debug message")
#     logging.info("This is an info message")
#     logging.warning("This is a warning message")
#     logging.error("This is an error message")
#     logging.critical("This is a critical message")


import logging
from colorama import Fore, Style, init

# Initialize colorama
init(autoreset=True)

class ColorFormatter(logging.Formatter):
    """Custom formatter to add colors to log levels."""
    
    LOG_COLORS = {
        logging.DEBUG: Fore.CYAN,
        logging.INFO: Fore.GREEN,
        logging.WARNING: Fore.YELLOW,
        logging.ERROR: Fore.RED,
        logging.CRITICAL: Fore.RED + Style.BRIGHT,
    }

    def format(self, record):
        log_color = self.LOG_COLORS.get(record.levelno, "")
        record.levelname = f"{log_color}{record.levelname}{Style.RESET_ALL}"
        return super().format(record)

def setup_logger(name: str):
    """Set up a logger with colored output."""
    logger = logging.getLogger(name)
    handler = logging.StreamHandler()
    formatter = ColorFormatter(
        "%(asctime)s - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)
    return logger

# # Example usage
# logger = setup_logger("my_logger")
# logger.debug("This is a debug message")
# logger.info("This is an info message")
# logger.warning("This is a warning message")
# logger.error("This is an error message")
# logger.critical("This is a critical message")


Writing src/utils/log_utils.py


In [12]:
%%writefile prepare_data.py
# prepare_data.py
from datasets import load_dataset
from torchvision import transforms
import torch

def tokenize_captions(examples, tokenizer, caption_column):
    captions = []
    for caption in examples[caption_column]:
        if isinstance(caption, str):
            captions.append(caption)
    inputs = tokenizer(
        captions,
        max_length=tokenizer.model_max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    return inputs.input_ids

def get_train_transforms(resolution, center_crop=True, random_flip=True):
    transform_list = [
        transforms.Resize(resolution, interpolation=transforms.InterpolationMode.BILINEAR)
    ]
    if center_crop:
        transform_list.append(transforms.CenterCrop(resolution))
    else:
        transform_list.append(transforms.RandomCrop(resolution))
    if random_flip:
        transform_list.append(transforms.RandomHorizontalFlip())
    else:
        transform_list.append(transforms.Lambda(lambda x: x))
    transform_list.extend([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    return transforms.Compose(transform_list)

def preprocess_train(examples, tokenizer, image_column, caption_column, train_transforms):
    images = [image.convert("RGB") for image in examples[image_column]]
    examples["pixel_values"] = [train_transforms(image) for image in images]
    examples["input_ids"] = tokenize_captions(examples, tokenizer, caption_column)
    return examples

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
    input_ids = torch.stack([example["input_ids"] for example in examples])
    return {"pixel_values": pixel_values, "input_ids": input_ids}

def prepare_dataset(dataset_name, train_data_dir, n_train_samples, tokenizer):
    if dataset_name:
        dataset = load_dataset(dataset_name)
    else:
        dataset = load_dataset("imagefolder", data_dir=train_data_dir)
    train_data = dataset["train"]
    if train_n_samples>0:
        dataset["train"] = train_data.select(range(train_n_samples))
    return dataset

def get_dataloader(dataset, tokenizer, resolution, center_crop, random_flip, batch_size):
    dataset_columns = list(dataset["train"].features.keys())
    image_column, caption_column = dataset_columns[0], dataset_columns[1]
    train_transforms = get_train_transforms(resolution, center_crop, random_flip)
    def transform_fn(examples):
        return preprocess_train(examples, tokenizer, image_column, caption_column, train_transforms)
    train_dataset = dataset["train"].with_transform(transform_fn)
    dataloader = torch.utils.data.DataLoader(
        train_dataset,
        shuffle=True,
        collate_fn=collate_fn,
        batch_size=batch_size,
        num_workers=0
    )
    return dataloader, image_column, caption_column


Overwriting prepare_data.py


In [32]:
%%writefile utils.py
import torch
from datetime import datetime
from peft import LoraConfig
from peft.utils import get_peft_model_state_dict
from diffusers.utils import convert_state_dict_to_diffusers
from diffusers import StableDiffusionPipeline

def get_formatted_date():
    return datetime.now().strftime(r'%Y%m%d-%H%M%S')

def get_lora_config(lora_rank, lora_alpha, target_modules):
    return LoraConfig(
        r=lora_rank,
        lora_alpha=lora_alpha,
        init_lora_weights="gaussian",
        target_modules=target_modules # ["to_k", "to_q", "to_v", "to_out.0"]
    )

def freeze_parameters(model):
    for param in model.parameters():
        param.requires_grad = False

def save_lora_weights(unet, output_dir, pretrained_model_name_or_path, lora_rank, max_train_steps, resolution, formatted_date, accelerator):
    unet = unet.to(torch.float32)
    unwrapped_unet = accelerator.unwrap_model(unet)
    unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unwrapped_unet))
    weight_name = f"lora_{pretrained_model_name_or_path.split('/')[-1]}_rank{lora_rank}_s{max_train_steps}_r{resolution}_{formatted_date}.safetensors"
    StableDiffusionPipeline.save_lora_weights(
        save_directory=output_dir,
        unet_lora_layers=unet_lora_state_dict,
        safe_serialization=True,
        weight_name=weight_name
    )


Overwriting utils.py


In [96]:
%%writefile finetune.py
#!/usr/bin/env python
"""
finetune.py

Fine-tuning script for Stable Diffusion with LoRA adaptation.
This script loads experiment configuration using Hydra, sets up the experiment
environment, loads the model and dataset, and runs the training loop.
"""

import os
import shutil
import argparse
import math

import torch
import torch.nn.functional as F
from tqdm.auto import tqdm

from omegaconf import OmegaConf
from hydra import initialize, compose
from hydra.utils import instantiate

from accelerate import Accelerator, utils as accel_utils
from transformers import set_seed

from diffusers import DDPMScheduler, StableDiffusionPipeline
from diffusers.optimization import get_scheduler

# Local modules
from prepare_data import prepare_dataset, get_dataloader
from utils import get_formatted_date, get_lora_config, freeze_parameters, save_lora_weights
from src.utils.log_utils import setup_logger
from src.utils.exp_utils import create_exp_dir


def main():
    """Main function for fine-tuning Stable Diffusion with LoRA adaptation."""
    logger = setup_logger("ft_llm")
    logger.info("Setting up environment...")

    # Parse command-line arguments for configuration.
    parser = argparse.ArgumentParser(description="Process experiment configurations.")
    parser.add_argument(
        "--config_path",
        type=str,
        required=True,
        help="Path to the configuration file for the experiment.",
    )
    args, override_args = parser.parse_known_args()

    # Normalize and validate configuration path.
    config_path = os.path.normpath(args.config_path)
    if not os.path.isfile(config_path):
        raise FileNotFoundError(f"Configuration file not found at: {config_path}")
    config_dir = os.path.dirname(config_path)
    config_fn = os.path.splitext(os.path.basename(config_path))[0]

    # Load configuration using Hydra.
    try:
        with initialize(version_base=None, config_path=config_dir):
            cfg = compose(config_name=config_fn, overrides=override_args)
    except Exception as e:
        raise RuntimeError(f"Failed to load configuration from {config_path}: {e}")

    logger.info("Loaded configuration:\n%s", OmegaConf.to_yaml(cfg))

    # Ensure experiment name consistency.
    expected_exp_name = os.path.basename(config_path).replace(".yaml", "")
    assert expected_exp_name == cfg.exp_manager.exp_name, (
        f"Experiment name mismatch: expected {expected_exp_name} but got {cfg.exp_manager.exp_name}"
    )

    # Create experiment directories and copy the config file.
    logger.info("Creating experiment directories...")
    exp_name = cfg.exp_manager.exp_name
    exp_dir, configs_dir, data_dir, checkpoints_dir, results_dir = create_exp_dir(exp_name)
    shutil.copy(config_path, configs_dir)

    # Extract configuration parameters.
    exp_args = cfg.exp_manager
    train_args = cfg.train
    data_args = cfg.prepare_data
    model_args = cfg.prepare_model

    # Set random seed for reproducibility.
    seed = exp_args.seed if "seed" in exp_args else 2025
    set_seed(seed)

    # Setup Accelerator.
    accel_utils.write_basic_config()
    accelerator = Accelerator(
        log_with="wandb",
        gradient_accumulation_steps=train_args.train_args.gradient_accumulation_steps, 
        mixed_precision="fp16"
    )
    device = accelerator.device
    logger.info(f"Device: {device}")

    # Load scheduler, tokenizer, and models.
    noise_scheduler = DDPMScheduler.from_pretrained(model_args.pretrained_model_name_or_path, subfolder="scheduler")
    weight_dtype = torch.float16
    pipe = StableDiffusionPipeline.from_pretrained(
        model_args.pretrained_model_name_or_path, torch_dtype=weight_dtype
    ).to(device)
    tokenizer = pipe.tokenizer
    text_encoder = pipe.text_encoder
    vae = pipe.vae
    unet = pipe.unet

    # Freeze parameters of VAE, text encoder, and unet (except LoRA adapters).
    freeze_parameters(unet)
    freeze_parameters(vae)
    freeze_parameters(text_encoder)

    # Configure and add LoRA adapter to unet.
    unet_lora_config = get_lora_config(model_args.lora.r, 
                                       model_args.lora.lora_alpha, 
                                       model_args.lora.target_modules)
    unet.add_adapter(unet_lora_config)
    for param in unet.parameters():
        if param.requires_grad:
            param.data = param.to(torch.float32)

    # Prepare dataset and dataloader.
    dataset = prepare_dataset(
        data_args.dataset.dataset_name,
        data_args.dataset.train_data_dir,
        data_args.dataset.train_n_samples,
        tokenizer,
    )
    train_dataloader, _, _ = get_dataloader(
        dataset,
        tokenizer,
        data_args.image.resolution,
        data_args.image.center_crop,
        data_args.image.random_flip,
        train_args.train_args.per_device_train_batch_size,
    )
    # logger.info("Data Size: %d", len(train_dataloader))

    # max_train_steps = train_args.train_args.num_train_epochs * len(train_dataloader)

    # Scheduler and math around the number of training steps.
    overrode_max_train_steps = False
    
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / train_args.train_args.gradient_accumulation_steps)
    if train_args.train_args.max_train_steps is None:
        train_args.train_args.max_train_steps = train_args.train_args.num_train_epochs * num_update_steps_per_epoch
        overrode_max_train_steps = True

    # Initialize optimizer and learning rate scheduler.
    lora_layers = filter(lambda p: p.requires_grad, unet.parameters())
    optimizer = torch.optim.AdamW(
        lora_layers,
        lr=train_args.optimizer.learning_rate,
        betas=(train_args.optimizer.adam_beta1, train_args.optimizer.adam_beta2),
        weight_decay=train_args.optimizer.adam_weight_decay,
        eps=train_args.optimizer.adam_epsilon,
    )


    lr_scheduler = get_scheduler(
        train_args.train_args.lr_scheduler_name,
        optimizer=optimizer,
        # num_warmup_steps=args.lr_warmup_steps * accelerator.num_processes,
        # num_training_steps=args.max_train_steps * accelerator.num_processes,
        # num_cycles=args.lr_num_cycles,
        # power=args.lr_power,
    )

    # Prepare everything with our `accelerator`.
    unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
        unet, optimizer, train_dataloader, lr_scheduler
    )

    # We need to recalculate our total training steps as the size of the training dataloader may have changed.
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / train_args.train_args.gradient_accumulation_steps)
    if overrode_max_train_steps:
        train_args.train_args.max_train_steps = train_args.train_args.num_train_epochs * num_update_steps_per_epoch
    
    # Afterwards we recalculate our number of training epochs
    train_args.train_args.num_train_epochs = math.ceil(train_args.train_args.max_train_steps / num_update_steps_per_epoch)


    unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
        unet, optimizer, train_dataloader, lr_scheduler
    )

    # Initialise your wandb run, passing wandb parameters and any config information
    accelerator.init_trackers(
        project_name=cfg.exp_manager.wandb.project
        )

    # Train!
    total_batch_size = train_args.train_args.per_device_train_batch_size * accelerator.num_processes * train_args.train_args.gradient_accumulation_steps


    logger.info("***** Running training *****")
    logger.info(f"  Num examples = {len(dataset)}")
    logger.info(f"  Num batches each epoch = {len(train_dataloader)}")
    logger.info(f"  Num Epochs = {train_args.train_args.num_train_epochs}")
    logger.info(f"  Instantaneous batch size per device = {train_args.train_args.per_device_train_batch_size}")
    logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
    logger.info(f"  Gradient Accumulation steps = {train_args.train_args.gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {train_args.train_args.max_train_steps}")

    # import wandb
    # wandb.init(
    #     project=cfg.exp_manager.wandb.project,
    #     # name = cfg.exp_manager.exp_name
    # )
    global_step = 0
    first_epoch = 0

    initial_global_step = 0

    progress_bar = tqdm(
        range(train_args.train_args.max_train_steps), 
        initial=initial_global_step,
        desc="Steps", 
        disable=not accelerator.is_local_main_process
    )

    
    # Training loop.
    for epoch in range(first_epoch, train_args.train_args.num_train_epochs):
        unet.train()
        for step, batch in enumerate(train_dataloader):
            models_to_accumulate = [unet]
            with accelerator.accumulate(models_to_accumulate):
                # Encode images into latent space.
                latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample()
                latents = latents * vae.config.scaling_factor
    
                # Add noise.
                noise = torch.randn_like(latents)
                batch_size = latents.shape[0]
                timesteps = torch.randint(
                    low=0,
                    high=noise_scheduler.config.num_train_timesteps,
                    size=(batch_size,),
                    device=latents.device,
                ).long()
    
                # Get text embeddings for conditioning.
                encoder_hidden_states = text_encoder(batch["input_ids"])[0]
                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
    
                # Determine target based on prediction type.
                if noise_scheduler.config.prediction_type == "epsilon":
                    target = noise
                elif noise_scheduler.config.prediction_type == "v_prediction":
                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
                else:
                    raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
    
                # Forward pass.
                model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample
    
                # Compute loss and perform backpropagation.
                loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
                accelerator.backward(loss)
                
                if accelerator.sync_gradients:
                    accelerator.clip_grad_norm_(lora_layers, train_args.train_args.max_grad_norm)
                
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
            
            # Checks if the accelerator has performed an optimization step behind the scenes
            if accelerator.sync_gradients:
                progress_bar.update(1)
                global_step += 1

            if accelerator.is_main_process:
                if train_args.train_args.checkpointing_steps:
                    if global_step % train_args.train_args.checkpointing_steps == 0:
                        save_path = os.path.join(checkpoints_dir, f"checkpoint-{global_step}")
                        accelerator.save_state(save_path)
                        logger.info(f"Saved state to {save_path}")
            
            logs = {
                "epoch": epoch,
                "step_loss": loss.detach().item(),
                "lr": lr_scheduler.get_last_lr()[0],
            }
            progress_bar.set_postfix(**logs)
            accelerator.log(logs, step=global_step)


    # Save the lora layers
    accelerator.wait_for_everyone()
    if accelerator.is_main_process:
        save_lora_weights(
            unet,
            results_dir,
            model_args.pretrained_model_name_or_path,
            model_args.lora.r,
            train_args.train_args.max_train_steps,
            data_args.image.resolution,
            get_formatted_date(),
            accelerator,
        )


    # Log exp artifact
    if exp_args.wandb.log_artifact == True:
        logger.info("LOGGING EXP ARTIFACTS...")
        # Create an artifact
        import wandb
        artifact = wandb.Artifact(
            name=exp_args.exp_name, 
            type="exp", 
        )

        # Add the directory to the artifact
        artifact.add_dir(exp_dir)

        # wandb_tracker = accelerator.get_tracker("wandb")
        # wandb_tracker.log_artifact(artifact)
        
        wandb.log_artifact(artifact)

    # # Finish the W&B run
    # wandb.finish()

    accelerator.end_training()


if __name__ == "__main__":
    main()


Overwriting finetune.py


In [141]:
%%writefile generate.py
import os
import argparse
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
from diffusers.utils import make_image_grid
from omegaconf import OmegaConf

def main():
    parser = argparse.ArgumentParser(
        description="Generate images using Stable Diffusion with LoRA adaptation."
    )
    parser.add_argument(
        "--config_path",
        type=str,
        required=True,
        help="Path to the configuration YAML file.",
    )
    args = parser.parse_args()

    # Load configuration from the specified YAML file.
    config = OmegaConf.load(args.config_path)
    model_cfg = config.model
    gen_cfg = config.generate

    # Extract model parameters.
    model_name_or_path = model_cfg.model_name_or_path
    # lora_name = model_cfg.lora_name
    # output_dir = model_cfg.output_dir
    # lora_model_path = os.path.join(output_dir, lora_name)
    lora_path = model_cfg.lora_path

    # Extract generation parameters.
    prompt = list(gen_cfg.prompt)
    negative_prompt = gen_cfg.negative_prompt
    num_images_per_prompt = gen_cfg.num_images_per_prompt
    generator_seed = gen_cfg.generator_seed
    width = gen_cfg.width
    height = gen_cfg.height
    guidance_scale = gen_cfg.guidance_scale
    scheduler_type = gen_cfg.scheduler

    # Set device.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load the base Stable Diffusion pipeline.
    pipe = StableDiffusionPipeline.from_pretrained(
        model_name_or_path,
        torch_dtype=torch.bfloat16,
    ).to(device)

    if lora_path:
        # lora_model_path = os.path.join(output_dir, lora_name)
        # Load LoRA weights.
        print("Loading LoRA Adapter...")
        pipe.load_lora_weights(
            pretrained_model_name_or_path_or_dict=lora_path,
            adapter_name="az_lora"
        )

        # Activate the LoRA adapter.
        pipe.set_adapters(["az_lora"], adapter_weights=[1.0])

    # Configure scheduler.
    if scheduler_type == "EulerDiscreteScheduler":
        pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
    else:
        raise ValueError(f"Unsupported scheduler type: {scheduler_type}")

    # Create a random generator for reproducibility.
    generator = torch.Generator(device).manual_seed(generator_seed)

    # Generate images.
    output = pipe(
        prompt=prompt,
        # negative_prompt=negative_prompt,
        num_images_per_prompt=num_images_per_prompt,
        generator=generator,
        width=width,
        height=height,
        guidance_scale=guidance_scale
    )
    images = output.images

    # Create and save an image grid.
    rows = len(prompt)
    import math
    cols = num_images_per_prompt # math.ceil(rows * num_images_per_prompt / rows)
    grid = make_image_grid(images, rows=rows, cols=cols)
    grid.save("output.png")
    print("Output image saved as output.png")

if __name__ == "__main__":
    main()


Overwriting generate.py


In [16]:
!mkdir configs

In [99]:
%%writefile configs/ft_sd15_lora.yaml
exp_manager:
  exp_name: "ft_sd15_lora"
  seed: 202502
  task_name: "stable_diffusion_finetune"
  model_name: "sd-legacy/stable-diffusion-v1-5"
  dataset_name: "yirenlu/heroicons"
  wandb:
    use_wandb: true
    project: "text2icon_ft_sd"
    log_artifact: true
    artifact_types: ['exp', 'data', 'configs', 'results', 'checkpoints']

prepare_data:
  dataset:
    dataset_name: "yirenlu/heroicons"
    train_data_dir: 
    train_n_samples: -1
  image:
    resolution: 256
    center_crop: true
    random_flip: true

prepare_model:
  pretrained_model_name_or_path: "sd-legacy/stable-diffusion-v1-5"
  use_peft: true
  lora:
    r: 4
    lora_alpha: 4
    target_modules: ["to_k", "to_q", "to_v", "to_out.0"]

train:
  optimizer:
    learning_rate: 1e-5
    adam_beta1: 0.9
    adam_beta2: 0.999
    adam_weight_decay: 1e-2
    adam_epsilon: 1e-08
  train_args:
    resume_from_checkpoint:
    per_device_train_batch_size: 4
    gradient_accumulation_steps: 1
    gradient_checkpointing: 
    num_train_epochs: 100
    max_train_steps: 
    checkpointing_steps: 
    lr_scheduler_name: "constant"
    max_grad_norm: 1.0


Overwriting configs/ft_sd15_lora.yaml


In [None]:
%%writefile configs/generate_sd15_lora.yaml
model:
    model_name_or_path: "sd-legacy/stable-diffusion-v1-5"
    lora_path: exps/ft_sd15_lora/results/lora_stable-diffusion-v1-5_rank4_s7300_r256_20250210-103454.safetensors
    output_dir: "./output_dir"

generate:
  prompt: ['an icon of a phone', 'an icon of an laptop', 'an icon of a TV', 'an icon of a headphone', 'an icon of a earphone']
  negative_prompt: "low quality, blur, watermark, words, name"
  num_images_per_prompt: 4
  generator_seed: 202502
  width: 256
  height: 256
  guidance_scale: 8.5
  scheduler: "EulerDiscreteScheduler"

In [19]:
%%writefile requirements.txt
datasets==3.2.0
accelerate==1.2.1
peft==0.14.0
trl==0.14.0
bitsandbytes==0.45.1
git+https://github.com/huggingface/transformers
wandb==0.19.1
omegaconf==2.3.0
pyyaml==6.0.2
hydra-core==1.3.2
python-dotenv==1.0.1
evaluate==0.4.3
rouge_score==0.1.2
py7zr==0.22.0


Writing requirements.txt


In [None]:
!pip install -r requirements.txt

In [None]:
!accelerate launch --num_processes=1 finetune.py --config_path configs/ft_sd15_lora.yaml

In [None]:
!accelerate launch --num_processes=1 generate.py --config_path configs/generate_sd15_lora.yaml

In [103]:
!ls /kaggle/working/exps/ft_sd15_lora/results

lora_stable-diffusion-v1-5_rank4_s7300_r256_20250210-103454.safetensors
