In [1]:
import model_loader
import pipeline_copy
from PIL import Image
from pathlib import Path
from transformers import CLIPTokenizer
from encoder import VAE_Encoder
from decoder import VAE_Decoder
from diffusion_copy import Diffusion
from torchvision import transforms

import torch
import os

DEVICE = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

# Tokenizer 설정
tokenizer = CLIPTokenizer("../data/vocab.json", merges_file="../data/merges.txt")
model_file = "/home/NAS_mount/seunghan/v1-5-pruned-emaonly.ckpt"

# 저장된 모델 파일 경로
checkpoint_path = "/home/fall/latent-diffusion-homemade/ldms/checkpoints/model_epoch_33.pth"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

# Fine-tuned Encoder 및 Decoder 가중치 경로
clear_encoder_weight_path = "/home/fall/latent-diffusion-homemade/ldms/checkpoints/sharp_encoder_bilinear_epoch_100.pth"
blur_encoder_weight_path ="/home/fall/latent-diffusion-homemade/ldms/checkpoints/blur_encoder_conv_epoch_220.pth"
decoder_weight_path = "/home/fall/latent-diffusion-homemade/ldms/checkpoints/sharp_decoder_bilinear_epoch_100.pth"
# 모델 로드
def load_finetuned_model(models, checkpoint_path, clear_encoder_weight_path, blur_encoder_weight_path, decoder_weight_path, device):    
    # Diffusion 모델에 저장된 가중치 적용
    diffusion_model = Diffusion().to(device)
    diffusion_model.load_state_dict(torch.load(checkpoint_path, map_location=device), strict=False)
    models['diffusion'] = diffusion_model.to(device)
    
    # Encoder와 Decoder 로드 및 추가
    clear_encoder = VAE_Encoder().to(device)
    blur_encoder = VAE_Encoder().to(device)
    decoder = VAE_Decoder().to(device)
    
    # 가중치 로드 함수
    def load_state_dict_without_module(model, state_dict):
        new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
        model.load_state_dict(new_state_dict)
    
    # Encoder와 Decoder 가중치 로드
    clear_encoder_state_dict = torch.load(clear_encoder_weight_path, map_location=device)
    blur_encoder_state_dict = torch.load(blur_encoder_weight_path, map_location=device)
    decoder_state_dict = torch.load(decoder_weight_path, map_location=device)
    load_state_dict_without_module(clear_encoder, clear_encoder_state_dict)
    load_state_dict_without_module(blur_encoder, blur_encoder_state_dict)
    load_state_dict_without_module(decoder, decoder_state_dict)
    
    # 모델 딕셔너리에 Encoder와 Decoder 추가
    models['clear encoder'] = clear_encoder
    models['blur encoder'] = blur_encoder
    models['decoder'] = decoder
        
    return models

# 저장된 모델 로드
models = load_finetuned_model(models, checkpoint_path, clear_encoder_weight_path, blur_encoder_weight_path, decoder_weight_path, DEVICE)

print("Fine-tuned models successfully loaded.")


def center_crop(image, crop_size):
    width, height = image.size
    new_width, new_height = crop_size
    left = (width - new_width) // 2
    top = (height - new_height) // 2
    right = (width + new_width) // 2
    bottom = (height + new_height) // 2
    return image.crop((left, top, right, bottom))




  from .autonotebook import tqdm as notebook_tqdm
  diffusion_model.load_state_dict(torch.load(checkpoint_path, map_location=device), strict=False)
  clear_encoder_state_dict = torch.load(clear_encoder_weight_path, map_location=device)
  blur_encoder_state_dict = torch.load(blur_encoder_weight_path, map_location=device)
  decoder_state_dict = torch.load(decoder_weight_path, map_location=device)


Fine-tuned models successfully loaded.


In [2]:

# 이미지 디블러링을 위한 샘플링 파라미터
do_cfg = True
cfg_scale = 8  # Prompt에 집중하는 정도
image_path = "../images/blur_image.png"  # 블러 이미지 경로

# 이미지 로드
input_image = Image.open(image_path)
input_image = center_crop(input_image, (256, 256))
# 이미지 중앙 부분 crop
transform = transforms.Compose([
    transforms.CenterCrop((256, 256)), 
    transforms.ToTensor()
])

input_tensor = transform(input_image).unsqueeze(0).to(DEVICE)

strength = 0.8 # Higher: More noise, Lower: Less noise

# 샘플러 설정
sampler = "ddpm"
num_inference_steps = 50
seed = 42

output_path = "../images/output_deblurred_image.png"


# 이미지 생성
output_image = pipeline_copy.generate(
    prompt = "clean image",
    uncond_prompt = "",
    # condition_image=input_image,
    strength=strength,
    do_cfg=do_cfg,
    cfg_scale=cfg_scale,
    sampler_name=sampler,
    n_inference_steps=num_inference_steps,
    seed=seed,
    models=models.module if isinstance(models, torch.nn.DataParallel) else models,
    device=DEVICE,
    idle_device="cpu",
    tokenizer=tokenizer,
)

# 결과 이미지 저장
output_path = "../images/output_deblurred_image.png"
Image.fromarray(output_image).save(output_path)
print(f"Deblurred image saved at {output_path}")

  0%|          | 0/50 [00:00<?, ?it/s]

latent shape: torch.Size([2, 4, 32, 32]), blur_latent shape: torch.Size([2, 4, 32, 32])


  2%|▏         | 1/50 [00:00<00:11,  4.30it/s]


latent shape: torch.Size([2, 4, 32, 32]), blur_latent shape: torch.Size([4, 4, 32, 32])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2 but got size 4 for tensor number 1 in the list.

In [None]:
Image.fromarray(output_image)