In [2]:
import os
import torch
# import torchvision
import accelerate
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
import os
# import cv2

import warnings
warnings.filterwarnings('ignore')

In [3]:
!pip list | grep torch

torch                    2.7.0
torchaudio               2.7.0
torchvision              0.22.0


In [4]:
os.environ['INSTANCE_DIR'] = "./data/instance_images"
os.environ['CLASS_DIR'] = "./data/class_images"

os.environ['MODEL_NAME'] = "./cache_dir/models/civitai_model"
os.environ['OUTPUT_DIR'] = "./experiments/"

## Define inference prompts and method

In [5]:
def generate_images(
    pipe,
    prompts_list,
    negative_prompt,
    save_dir="outputs",
    name_exp="experiment",
    num_samples=4,
    guidance_scale=7.5,
    num_inference_steps=35,
    height=768,
    width=1024,
    generator=None
):
    exp_dir = os.path.join(save_dir, name_exp)
    os.makedirs(exp_dir, exist_ok=True)
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    for prompt_idx, prompt in enumerate(prompts_list):
        print(f"Generating images for prompt {prompt_idx + 1}/{len(prompts_list)}: {prompt}")
        
        with autocast(device), torch.inference_mode():
            images = pipe(
                prompt,
                height=height,
                width=width,
                negative_prompt=negative_prompt,
                num_images_per_prompt=num_samples,
                num_inference_steps=num_inference_steps,
                guidance_scale=guidance_scale,
                generator=generator
            ).images
        
        for img_idx, img in enumerate(images):
            filename = f"prompt{prompt_idx+1}_img{img_idx+1}.png"
            save_path = os.path.join(exp_dir, filename)
            img.save(save_path)
            print(f"Saved: {save_path}")


In [6]:

prompts = [
    "portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes",
    "portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes",
    "portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes",
    "portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus",
    "portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus",
]
negative_prompt = "naked, nsfw, deformed, distorted, disfigured, poorly drawn, bad anatomy, extra limb, missing limb, floating limbs, mutated hands disconnected limbs, mutation, ugly, blurry, amputation"

# Обучаем

## exp1

In [21]:
output_path = os.path.join(os.environ['OUTPUT_DIR'], "exp1_v1")
os.makedirs(output_path, exist_ok=True)

In [4]:
!python ./diffusers/examples/dreambooth/train_dreambooth.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --instance_data_dir=$INSTANCE_DIR \
  --class_data_dir=$CLASS_DIR \
  --output_dir=$OUTPUT_DIR \
  --instance_prompt="a photo of sks woman face" \
  --class_prompt="a photo of woman face " \
  --with_prior_preservation --prior_loss_weight=1.0 \
  --resolution=512 \
  --train_batch_size=1 \
  --learning_rate=2e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --gradient_accumulation_steps=1 \
  --num_class_images=500 \
  --max_train_steps=800 \
  --checkpointing_steps=800 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --seed 31 > log 2> mistakes

### inference

In [35]:
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
import os

model_path = output_path

pipe = StableDiffusionPipeline.from_pretrained(model_path, safety_checker=None, torch_dtype=torch.float16).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)


generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp1",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:01<00:00,  4.05it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.15it/s]


Saved: ./results/exp1/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.13it/s]


Saved: ./results/exp1/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.17it/s]


Saved: ./results/exp1/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.16it/s]


Saved: ./results/exp1/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.13it/s]


Saved: ./results/exp1/prompt5_img1.png


## exp2

In [6]:
output_path = os.path.join(os.environ['OUTPUT_DIR'], "exp2")
os.makedirs(output_path, exist_ok=True)

In [10]:
output_path

'./experiments/exp2'

In [None]:
# !export CUDA_VISIBLE_DEVICES=0
!python ./diffusers/examples/dreambooth/train_dreambooth.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --instance_data_dir=$INSTANCE_DIR \
  --class_data_dir=$CLASS_DIR \
  --output_dir=output_path \
  --instance_prompt="a photo of sks woman face" \
  --class_prompt="a photo of woman face " \
  --with_prior_preservation --prior_loss_weight=1.0 \
  --resolution=512 \
  --train_batch_size=1 \
  --learning_rate=1e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --gradient_accumulation_steps=1 \
  --num_class_images=500 \
  --max_train_steps=800 \
  --checkpointing_steps=800 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --seed 31 > ./dumps/log2 2> ./dumps/mistakes2

### inference

In [8]:
model_path = output_path
model_path

'./experiments/exp2'

In [11]:
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
import os



pipe = StableDiffusionPipeline.from_pretrained('./experiments/', safety_checker=None, torch_dtype=torch.float16).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)


generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp2",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Loading pipeline components...: 100%|██████████| 6/6 [00:01<00:00,  4.27it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 35/35 [00:04<00:00,  7.73it/s]


Saved: ./results/exp2/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.13it/s]


Saved: ./results/exp2/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.13it/s]


Saved: ./results/exp2/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.14it/s]


Saved: ./results/exp2/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.17it/s]


Saved: ./results/exp2/prompt5_img1.png


## exp3

In [6]:
output_path = os.path.join(os.environ['OUTPUT_DIR'], "exp3")
os.makedirs(output_path, exist_ok=True)
output_path

'./experiments/exp3'

In [None]:
# !export CUDA_VISIBLE_DEVICES=0
!python ./diffusers/examples/dreambooth/train_dreambooth.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --instance_data_dir=$INSTANCE_DIR \
  --class_data_dir=$CLASS_DIR \
  --output_dir=$output_path \
  --instance_prompt="a photo of sks woman face" \
  --class_prompt="a photo of woman face " \
  --with_prior_preservation \
  --prior_loss_weight=1.2 \
  --resolution=512 \
  --train_batch_size=1 \
  --learning_rate=2e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --gradient_accumulation_steps=1 \
  --num_class_images=500 \
  --max_train_steps=800 \
  --checkpointing_steps=800 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --seed 31 > ./dumps/log3 2> ./dumps/mistakes3

### inference

In [8]:
model_path = output_path
model_path

'./experiments/exp3'

In [12]:
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
import os



pipe = StableDiffusionPipeline.from_pretrained(model_path, safety_checker=None, torch_dtype=torch.float16).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)


generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp3",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  6.37it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.20it/s]


Saved: ./results/exp3/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.12it/s]


Saved: ./results/exp3/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  8.13it/s]


Saved: ./results/exp3/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.19it/s]


Saved: ./results/exp3/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  8.16it/s]


Saved: ./results/exp3/prompt5_img1.png


# Lora training

## exp1_lora

rank = 32

In [12]:
!python diffusers/examples/dreambooth/train_dreambooth_lora.py \
  --pretrained_model_name_or_path="./experiments/exp1_v1" \
  --instance_data_dir=$INSTANCE_DIR \
  --output_dir="./lora_outputs/exp1_v1/" \
  --instance_prompt="a photo of sks woman face" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --learning_rate=1e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=200 \
  --checkpointing_steps=20 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --validation_prompt="a photo of sks woman face in NYC, rain, dark, night, full moon, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes" \
  --validation_epochs=50 \
  --seed=31 \
  --rank=32 > ./dumps/log_lora1 2> ./dumps/mistakes_lora1
  # --report_to="clearml"

### inference

In [13]:
def load_lora_pipeline(model_path, lora_path):
    pipe = StableDiffusionPipeline.from_pretrained(
        model_path, 
        safety_checker=None, 
        torch_dtype=torch.float16
    ).to("cuda")
    
    pipe.unet.load_attn_procs(lora_path)
    
    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
    
    return pipe

In [14]:
lora_path = "./lora_outputs/exp1_v1/"  # Путь к обученной LoRA

g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)

pipe = load_lora_pipeline("./experiments/exp1_v1", lora_path)
generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp1_lora",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:07<00:00,  1.27s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.42it/s]


Saved: ./results/exp1_lora/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.38it/s]


Saved: ./results/exp1_lora/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.39it/s]


Saved: ./results/exp1_lora/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.37it/s]


Saved: ./results/exp1_lora/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.35it/s]


Saved: ./results/exp1_lora/prompt5_img1.png


## exp2

In [8]:
!python diffusers/examples/dreambooth/train_dreambooth_lora.py \
  --pretrained_model_name_or_path="./experiments/exp1_v1" \
  --instance_data_dir=$INSTANCE_DIR \
  --output_dir="./lora_outputs/exp2/" \
  --instance_prompt="a photo of sks woman face" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --learning_rate=1e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=200 \
  --checkpointing_steps=20 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --validation_prompt="a photo of sks woman face in NYC, rain, dark, night, full moon, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes" \
  --validation_epochs=50 \
  --seed=31 \
  --rank=64 > ./dumps/log_lora1 2> ./dumps/mistakes_lora1
  # --report_to="clearml"

### inference

In [11]:
lora_path = "./lora_outputs/exp2/"  # Путь к обученной LoRA

g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)

pipe = load_lora_pipeline("./experiments/exp1_v1", lora_path)
generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp2_lora",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:07<00:00,  1.29s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
  deprecate("load_attn_procs", "0.40.0", deprecation_message)


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 35/35 [00:05<00:00,  6.89it/s]


Saved: ./results/exp2_lora/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.33it/s]


Saved: ./results/exp2_lora/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.32it/s]


Saved: ./results/exp2_lora/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.30it/s]


Saved: ./results/exp2_lora/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.28it/s]


Saved: ./results/exp2_lora/prompt5_img1.png


## exp3

In [16]:
!python diffusers/examples/dreambooth/train_dreambooth_lora.py \
  --pretrained_model_name_or_path="./experiments/exp1_v1" \
  --instance_data_dir=$INSTANCE_DIR \
  --output_dir="./lora_outputs/exp3/" \
   --instance_prompt="a photo of sks woman face" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --learning_rate=1e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=200 \
  --checkpointing_steps=20 \
  --use_8bit_adam \
  --mixed_precision="no"\
  --train_text_encoder \
  --validation_prompt="a photo of sks woman face in NYC, rain, dark, night, full moon, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes" \
  --validation_epochs=50 \
  --seed=31 \
  --rank=16 > ./dumps/log_lora1 2> ./dumps/mistakes_lora1

### inference

In [17]:
lora_path = "./lora_outputs/exp3/"  # Путь к обученной LoRA

g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)

pipe = load_lora_pipeline("./experiments/exp1_v1", lora_path)
generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp3_lora",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:07<00:00,  1.27s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.37it/s]


Saved: ./results/exp3_lora/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.32it/s]


Saved: ./results/exp3_lora/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.32it/s]


Saved: ./results/exp3_lora/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.29it/s]


Saved: ./results/exp3_lora/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.30it/s]


Saved: ./results/exp3_lora/prompt5_img1.png


In [25]:
!python diffusers/examples/dreambooth/train_dreambooth_lora.py \
  --pretrained_model_name_or_path="./experiments/exp1_v1" \
  --instance_data_dir=$INSTANCE_DIR \
  --output_dir="./lora_outputs/exp4/" \
  --instance_prompt="a photo of sks woman face" \
  --resolution=520 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --learning_rate=2e-5 \
  --lr_scheduler="cosine_with_restarts" \
  --lr_warmup_steps=20 \
  --max_train_steps=300 \
  --checkpointing_steps=50 \
  --use_8bit_adam \
  --mixed_precision="no" \
  --train_text_encoder \
  --validation_prompt="a photo of sks woman face in NYC, rain, dark, night, full moon" \
  --validation_epochs=25 \
  --seed=31 \
  --rank=96 > ./dumps/log_lora1 2> ./dumps/mistakes_lora1

In [26]:
lora_path = "./lora_outputs/exp4/"  # Путь к обученной LoRA

g_cuda = torch.Generator(device='cuda')
seed = 31
g_cuda.manual_seed(seed)

pipe = load_lora_pipeline("./experiments/exp1_v1", lora_path)
generate_images(
    pipe=pipe,
    prompts_list=prompts,
    negative_prompt=negative_prompt,
    save_dir="./results/",
    name_exp="exp4_lora",
    num_samples=1,
    generator=g_cuda
)

Loading pipeline components...: 100%|██████████| 6/6 [00:08<00:00,  1.48s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
  deprecate("load_attn_procs", "0.40.0", deprecation_message)


Generating images for prompt 1/5: portrait of sks woman face, on the street, lights, midnight, NY, standing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.35it/s]


Saved: ./results/exp4_lora/prompt1_img1.png
Generating images for prompt 2/5: portrait of sks woman face, in the desert, wearing a dress, sun, pyramids, сamels, Egypt, standing, standing alone, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.31it/s]


Saved: ./results/exp4_lora/prompt2_img1.png
Generating images for prompt 3/5: portrait of sks woman face, in the game of thrones, wearing a dress, holding a knife, standing alone aside, dragons nearby, sad face, snowing, 4K, raw, hrd, hd, high quality, realism, sharp focus, beautiful eyes, detailed eyes


100%|██████████| 35/35 [00:04<00:00,  7.31it/s]


Saved: ./results/exp4_lora/prompt3_img1.png
Generating images for prompt 4/5: portrait of sks woman face, cinderella, in the princess castle with rainbow, smiling, extremely happy, standing in the full height, cartoon, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.30it/s]


Saved: ./results/exp4_lora/prompt4_img1.png
Generating images for prompt 5/5: portrait of sks tall woman, astronaut, moon, space, standing in full height, 4K, raw, hrd, hd, high quality, sharp focus


100%|██████████| 35/35 [00:04<00:00,  7.29it/s]


Saved: ./results/exp4_lora/prompt5_img1.png


# ControlNet

In [7]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers import UNet2DConditionModel
from diffusers.utils import load_image
from PIL import Image
import numpy as np

In [24]:
def prepare_control_image(image_path, control_type="canny"):
    """
    Подготовка контрольного изображения для ControlNet
    Args:
        image_path (str): путь к исходному изображению
        control_type (str): тип ControlNet
    Returns:
        control_image: обработанное изображение
    """
    image = load_image(image_path)
    image = np.array(image)
    
    if control_type == "canny":
        import cv2
        image = cv2.Canny(image, 100, 200)
        image = image[:, :, None]
        image = np.concatenate([image, image, image], axis=2)
        control_image = Image.fromarray(image)
    elif control_type == "depth":
        from transformers import pipeline
        depth_estimator = pipeline("depth-estimation")
        control_image = depth_estimator(Image.fromarray(image))["depth"]
    elif control_type == "pose":
        from controlnet_aux import OpenposeDetector
        processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
        control_image = processor(Image.fromarray(image))
    else:
        control_image = image
    
    return control_image

In [27]:
def load_controlnet_pipeline(unet_path=None, lora_path=None, controlnet_type="canny"):
    # Load base ControlNet model
    controlnet = ControlNetModel.from_pretrained(
        f"lllyasviel/sd-controlnet-{controlnet_type}",
        torch_dtype=torch.float16
    ).to("cuda")
    
    # Initialize pipeline
    pipe = StableDiffusionControlNetPipeline.from_pretrained(
        unet_path,
        controlnet=controlnet,
        torch_dtype=torch.float16,
        safety_checker=None
    ).to("cuda")
    
    # Load LoRA weights if provided
    if lora_path is not None:
        pipe.load_lora_weights(lora_path)
    
    # Optimizations
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_model_cpu_offload()
    pipe.enable_xformers_memory_efficient_attention()
    
    return pipe


def generate_images(
    pipe,
    base_prompts,
    control_image,
    negative_prompt=None,
    output_dir="controlnet_outputs",
    exp_name='experiment',
    seed=42,
    num_samples=4,
    num_inference_steps=20,
    guidance_scale=7.5,
    controlnet_conditioning_scale=0.8
):
    output_path = os.path.join(output_dir, exp_name)
    os.makedirs(output_path, exist_ok=True)
    
    # Prepare prompts
    prompt_suffix = ", best quality, extremely detailed, 4k, hdr, super resolution"
    prompts = [t + prompt_suffix for t in base_prompts]
    
    # Prepare negative prompt
    if negative_prompt is None:
        negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
    negative_prompts = [negative_prompt] * len(prompts)
    
    # Prepare generators
    generators = [torch.Generator(device="cuda").manual_seed(seed + i) for i in range(len(prompts))]
    
    # Generate images
    results = pipe(
        prompt=prompts,
        image=control_image,
        negative_prompt=negative_prompts,
        generator=generators,
        num_images_per_prompt=num_samples,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        controlnet_conditioning_scale=controlnet_conditioning_scale
    )
    
    # Save images
    for i, image in enumerate(results.images):
        image.save(os.path.join(output_path, f"prompt{i+1}_image{i+1}.png"))

### exp1 - canny

In [28]:
pipe1 = load_controlnet_pipeline(
        unet_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/experiments/exp1_v1",
        lora_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/lora_outputs/exp4/",
        controlnet_type="canny"
    )

Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  8.21it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [None]:
input_image_path = "./control_images/style_arcane.jpg"  # Replace with your image path
canny_image = prepare_control_image(input_image_path)




No model was supplied, defaulted to Intel/dpt-large and revision bc15f29 (https://huggingface.co/Intel/dpt-large).
Using a pipeline without specifying a model name and revision in production is not recommended.


Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [29]:
generate_images(
        pipe=pipe1,
        base_prompts=prompts,
        negative_prompt=negative_prompt,
        control_image=canny_image,
        num_samples=1,
        output_dir="./results/",
        exp_name='exp1_controlnet',
        seed=31,
        num_inference_steps=35,
    )

100%|██████████| 35/35 [00:05<00:00,  5.99it/s]


### exp2 - depth

In [None]:
depth_image = prepare_control_image(input_image_path, control_type="depth")
pipe2 = load_controlnet_pipeline(
        unet_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/experiments/exp1_v1",
        lora_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/lora_outputs/exp4/",
        controlnet_type="depth"
    )


generate_images(
        pipe=pipe2,
        base_prompts=prompts,
        negative_prompt=negative_prompt,
        control_image=depth_image,
        num_samples=1,
        output_dir="./results/",
        exp_name='exp2_controlnet',
        seed=31,
        num_inference_steps=35,
    )

Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  7.63it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
100%|██████████| 35/35 [00:05<00:00,  5.86it/s]


### exp3 - pose

In [32]:
pose_image = prepare_control_image(input_image_path, control_type="pose")

pipe3 = load_controlnet_pipeline(
        unet_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/experiments/exp1_v1",
        lora_path="/netapp/a.gorokhova/itmo/DeepGenModels/hw4/lora_outputs/exp4/",
        controlnet_type="openpose"
    )

generate_images(
        pipe=pipe3,
        base_prompts=prompts,
        negative_prompt=negative_prompt,
        control_image=pose_image,
        num_samples=1,
        output_dir="./results/",
        exp_name='exp3_controlnet',
        seed=31,
        num_inference_steps=35,
    )

Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  7.57it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
100%|██████████| 35/35 [00:19<00:00,  1.76it/s]
