# LaMa+UNet 코드

### 1. 라이브러리 설치

In [None]:
!pip install umap-learn hdbscan transformers

In [None]:
!pip install modelscope
!pip install datasets==2.16.0
!pip install oss2
!pip install addict
!pip install albumentations==0.4.6
!pip install sortedcontainers
!pip install yapf==0.40.1
!pip install kornia -U
!pip install torchvision

In [None]:
!pip install polygenerator lightning segmentation-models-pytorch

In [2]:
!pwd

/home/kwy00/song


### 2. 기본 설정및 데이터셋 로드

In [3]:
import os
import cv2
import random
import numpy as np
import skimage
import umap
import hdbscan
import pandas as pd
import torch

from glob import glob
from collections import Counter
from tqdm.auto import tqdm
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from matplotlib import pyplot as plt
from sklearn.preprocessing import normalize

  from .autonotebook import tqdm as notebook_tqdm
2024-12-07 12:35:55.035568: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-07 12:35:55.049860: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-07 12:35:55.174773: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-07 12:35:55.175913: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# 기본 설정
BATCH_SIZE = 8  # RTX 3080 10GB 메모리에 맞게 조정
SEED = 42

In [5]:
# 수정 제안
BATCH_SIZE = 8 # RTX 3080 10GB에 최적화
IMAGE_SIZE = 256  # 이미지 크기 명시
NUM_WORKERS = 8 # 데이터 로딩 최적화
PIN_MEMORY = True  # GPU 메모리 전송 최적화
SEED = 42

In [6]:
# CUDA 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [7]:
image_paths = sorted(glob('/home/kwy00/song/lama-with-refiner/extracted_files/train_gt/*.png'))

In [9]:
import os
import math
import random
import zipfile
import cv2
import numpy as np
import skimage
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import lightning as L
import segmentation_models_pytorch as smp

from tqdm.auto import tqdm
from glob import glob
from PIL import Image
from polygenerator import (
    random_polygon,
    random_star_shaped_polygon,
    random_convex_polygon,
)
from sklearn.model_selection import KFold
from skimage.metrics import structural_similarity as ski_ssim
from torch.utils.data import Dataset, DataLoader
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping

from matplotlib import pyplot as plt

In [10]:
torch.set_float32_matmul_precision('medium')

In [11]:
def get_input_image(image, min_polygon_bbox_size=50):
    # 입력 이미지의 크기 가져오기
    width, height = image.size

    while True:
        # 랜덤한 바운딩 박스 좌표 생성
        bbox_x1 = random.randint(0, width-min_polygon_bbox_size)
        bbox_y1 = random.randint(0, height-min_polygon_bbox_size)
        bbox_x2 = random.randint(bbox_x1, width)  # x1보다 큰 x2 좌표
        bbox_y2 = random.randint(bbox_y1, height)  # y1보다 큰 y2 좌표

        # 바운딩 박스가 최소 크기보다 작으면 다시 생성
        if (bbox_x2-bbox_x1)<min_polygon_bbox_size or (bbox_y2-bbox_y1)<min_polygon_bbox_size:
            continue

        # 바운딩 박스 정보 저장
        mask_bbox = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
        mask_width = bbox_x2-bbox_x1
        mask_height = bbox_y2-bbox_y1

        # 랜덤한 다각형 생성을 위한 설정
        num_points = random.randint(3,20)  # 3~20개의 꼭지점
        # 다각형 생성 함수 랜덤 선택
        polygon_func = random.choice([
            random_polygon,
            random_star_shaped_polygon,
            random_convex_polygon
        ])

        # 0~1 스케일로 다각형 생성 후 실제 크기로 변환
        polygon = polygon_func(num_points=num_points) #scaled 0~1
        polygon = [(round(r*mask_width), round(c*mask_height)) for r,c in polygon]

        # 다각형 마스크 생성
        polygon_mask = skimage.draw.polygon2mask((mask_width, mask_height), polygon)

        # 다각형 크기가 최소 크기 조건을 만족하면 루프 종료
        if np.sum(polygon_mask)>(min_polygon_bbox_size//2)**2:
            break

    # 전체 이미지 크기의 마스크 생성
    full_image_mask = np.zeros((width, height), dtype=np.uint8)
    full_image_mask[bbox_x1:bbox_x2, bbox_y1:bbox_y2] = polygon_mask

    # 그레이스케일 이미지 생성 및 마스크 적용
    image_gray = image.convert('L')  # RGB를 그레이스케일로 변환
    image_gray_array = np.array(image_gray)
    random_color = random.randint(0, 255)  # 랜덤한 그레이스케일 값 생성
    # 마스크 영역에 랜덤 색상 적용
    image_gray_array[full_image_mask == 1] = random_color
    image_gray_masked = Image.fromarray(image_gray_array)

    # 결과 반환
    return {
        'image_gt': image,               # 원본 이미지
        'mask': full_image_mask,         # 생성된 마스크
        'image_gray': image_gray,        # 그레이스케일 이미지
        'image_gray_masked': image_gray_masked  # 마스크가 적용된 그레이스케일 이미지
    }

In [12]:
def get_ssim_score(true, pred):
    # 전체 RGB 이미지를 사용해 SSIM 계산 (channel_axis=-1)
    ssim_value = ski_ssim(true, pred, channel_axis=-1, data_range=pred.max() - pred.min())
    return ssim_value

def get_masked_ssim_score(true, pred, mask):
    # 손실 영역의 좌표에서만 RGB 채널별 픽셀 값 추출
    true_masked_pixels = true[mask > 0]
    pred_masked_pixels = pred[mask > 0]

    # 손실 영역 픽셀만으로 SSIM 계산 (채널축 사용)
    ssim_value = ski_ssim(
        true_masked_pixels,
        pred_masked_pixels,
        channel_axis=-1,
        data_range=pred.max() - pred.min()
    )
    return ssim_value

def get_histogram_similarity(true, pred, cvt_color=cv2.COLOR_RGB2HSV):
    # BGR 이미지를 HSV로 변환
    true_hsv = cv2.cvtColor(true, cvt_color)
    pred_hsv = cv2.cvtColor(pred, cvt_color)

    # H 채널에서 히스토그램 계산 및 정규화
    hist_true = cv2.calcHist([true_hsv], [0], None, [180], [0, 180])
    hist_pred = cv2.calcHist([pred_hsv], [0], None, [180], [0, 180])
    hist_true = cv2.normalize(hist_true, hist_true).flatten()
    hist_pred = cv2.normalize(hist_pred, hist_pred).flatten()

    # 히스토그램 간 유사도 계산 (상관 계수 사용)
    similarity = cv2.compareHist(hist_true, hist_pred, cv2.HISTCMP_CORREL)
    return similarity

In [13]:
# 실험의 재현성을 위한 랜덤 시드 설정
SEED = 42
# K-fold 교차 검증을 위한 분할 수
N_SPLIT = 5
# 모델 학습시 한 번에 처리할 데이터 개수
BATCH_SIZE = 8
NUM_WORKERS = 8  # Ryzen 5800x의 코어 수를 고려
PIN_MEMORY = True  # GPU 메모리 전송 최적화
# 이미지 전처리를 위한 정규화 파라미터
# 일반적으로 ImageNet 데이터셋의 평균과 표준편차 값을 사용
IMAGE_PREPROC_MEAN = 0.5    # 이미지 픽셀값의 평균
IMAGE_PREPROC_STD = 0.225   # 이미지 픽셀값의 표준편차

# 다각형 마스크 생성시 최소 바운딩 박스 크기
MIN_POLYGON_BBOX_SIZE = 64  # 픽셀 단위

# 학습 관련 파라미터 (새로 추가)
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5

In [14]:
# 데이터 경로 설정
TRAIN_DATA_DIR = '/home/kwy00/song/lama-with-refiner/extracted_files/train_gt'  # 학습용 원본 이미지 경로
VALID_DATA_DIR = f'/home/kwy00/song/data/valid_input/{SEED=}-{MIN_POLYGON_BBOX_SIZE=}'  # 검증 데이터 경로
TEST_DATA_DIR = '/home/kwy00/song/lama-with-refiner/extracted_files/test_input'  # 테스트 데이터 경로
SUBMISSON_DATA_DIR = '/home/kwy00/song/lama-with-refiner/submission'  # 제출 파일 저장 경로

# 실험 설정
EXPERIMENT_NAME = 'seventh'  # 현재 실험의 이름

In [15]:
L.seed_everything(SEED)

Seed set to 42


42

In [16]:
train_df = pd.read_csv('/home/kwy00/song/train_preproc.csv')
test_df = pd.read_csv('/home/kwy00/song/test_preproc.csv')

In [None]:
# 검증 데이터 저장을 위한 디렉토리 생성
os.makedirs(VALID_DATA_DIR, exist_ok=True)

# 학습 데이터프레임의 모든 이미지에 대해 반복
for idx, row in tqdm(train_df.iterrows(), total=len(train_df)):
    # 원본 이미지 경로 가져오기
    img_path = train_df.iloc[idx, 0]
    img_path = os.path.join(TRAIN_DATA_DIR, img_path)

    # 저장할 파일명 생성 (TRAIN -> VALID, png -> npy로 변환)
    save_image_name = os.path.basename(img_path).replace('TRAIN', 'VALID').replace('png','npy')
    save_image_path = f'{VALID_DATA_DIR}/{save_image_name}'

    # 이미 처리된 파일은 건너뛰기
    if os.path.exists(save_image_path):
        continue

    # 이미지 열기 및 마스크 생성
    image = Image.open(img_path)
    valid_input_image = get_input_image(image, MIN_POLYGON_BBOX_SIZE)

    # 생성된 데이터를 numpy 배열로 저장
    np.save(save_image_path, valid_input_image)

In [18]:
train_df_outlier = train_df[train_df['label']==-1]
train_df = train_df[train_df['label']!=-1]

In [19]:
kf = KFold(n_splits=N_SPLIT, shuffle=True, random_state=SEED)

In [20]:
# K-fold 교차 검증을 위한 데이터 분할
for fold_idx, (train_indices, valid_indices) in enumerate(kf.split(train_df['image'], train_df['label'])):
    # 학습용 데이터와 검증용 데이터 분리
    train_fold_df = train_df.iloc[train_indices].reset_index(drop=True)
    valid_fold_df = train_df.iloc[valid_indices].reset_index(drop=True)

    # 검증 데이터의 파일명 변환 (TRAIN -> VALID, png -> npy)
    valid_fold_df['image'] = valid_fold_df['image'].apply(lambda x: x.replace('TRAIN', 'VALID').replace('png', 'npy'))

    # 검증 속도 향상을 위해 각 레이블당 하나의 샘플만 유지
    valid_fold_df = valid_fold_df.drop_duplicates('label')
    # train_fold_df = pd.concat([train_fold_df,train_df_outlier],axis=0).reset_index(drop=True)
    # 첫 번째 폴드만 사용
    break

### 3. LaMa git clone

In [None]:
!git clone https://github.com/geomagical/lama-with-refiner.git

### 4. train 데이터셋으로 학습시킨 모델로 만든 test 데이터셋 마스크 이미지 다운

In [None]:
import zipfile
import os

def unzip_file(zip_file_path, extract_to_path):
    """
    ZIP 파일을 지정된 폴더에 압축 해제하는 함수.

    Args:
        zip_file_path (str): 압축 해제할 ZIP 파일 경로.
        extract_to_path (str): 파일을 풀어놓을 폴더 경로.

    Returns:
        None
    """
    # ZIP 파일 열기
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # 압축 해제
        zip_ref.extractall(extract_to_path)
    print(f"압축 해제 완료: {extract_to_path}")

# 사용 예시
zip_file_path = "/home/kwy00/song/predicted_mask3.zip"
extract_to_path ="./lama-with-refiner/predicted_masks3"
unzip_file(zip_file_path, extract_to_path)

### 5. 데이터 클래스 정의

In [24]:

class CustomImageDataset(Dataset):
    def __init__(self, df, data_dir='/home/kwy00/song/content/extracted_files/train_gt', mode='train',mask_dir = "/home/kwy00/song/lama-with-refiner/predicted_masks3" ,min_polygon_bbox_size=MIN_POLYGON_BBOX_SIZE):
        self.df = df
        self.data_dir = data_dir
        self.mode = mode
        self.min_polygon_bbox_size = min_polygon_bbox_size
        self.mask_dir = mask_dir

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Get image path and label
        img_path = self.df.iloc[idx, 0]  # Assuming first column is the path
        img_path = os.path.join(self.data_dir, img_path)

        # Apply augmentation if in training mode
        if self.mode == 'train':
            image = Image.open(img_path)
            image_input = get_input_image(image, self.min_polygon_bbox_size)
            return image_input

        elif self.mode == 'valid':
            image_input = self.load_input_image(img_path)
            return image_input
        elif self.mode == 'test':
            image = Image.open(img_path)
            img_name = os.path.basename(img_path)
            mask_path = os.path.join(self.mask_dir, img_name)
            mask = Image.open(mask_path)
            return {
                'image_gray_masked':image,
                'mask':mask
            }

    def load_input_image(self, img_input_path):
        image_input = np.load(img_input_path, allow_pickle=True)
        return image_input.item()


In [25]:
train_dataset = CustomImageDataset(train_fold_df, data_dir=TRAIN_DATA_DIR, mode='train')
valid_dataset = CustomImageDataset(valid_fold_df, data_dir=VALID_DATA_DIR, mode='valid')
test_dataset = CustomImageDataset(test_df, data_dir=TEST_DATA_DIR, mode='test')

In [26]:
class CollateFn:
    def __init__(self, mean=IMAGE_PREPROC_MEAN, std=IMAGE_PREPROC_STD, mode='train'):
        self.mode = mode
        self.mean = mean
        self.std = std

    def __call__(self, examples):
        if self.mode =='train' or self.mode=='valid':
            # Initialize lists to store each component of the batch
            masks= []
            images_gray = []
            images_gray_masked = []
            images_gt = []

            for example in examples:
                # Assuming each example is a dictionary with keys 'mask', 'image_gray', 'image_gray_masked', 'image_gt'
                masks.append(example['mask'])
                images_gray.append(self.normalize_image(example['image_gray']))
                images_gray_masked.append(self.normalize_image(example['image_gray_masked']))
                images_gt.append(self.normalize_image(np.array(example['image_gt'])))

            return {
                'masks': torch.from_numpy(np.stack(masks)).long(),
                'images_gray': torch.from_numpy(np.stack(images_gray)).unsqueeze(1).float(),
                'images_gray_masked': torch.from_numpy(np.stack(images_gray_masked)).unsqueeze(1).float(),
                'images_gt': torch.from_numpy(np.stack(images_gt)).permute(0,3,1,2).float()
            }

        elif self.mode == 'test':
            images_gray_masked = []
            masks=[]
            for example in examples:
                images_gray_masked.append(self.normalize_image(example['image_gray_masked']))
                masks.append(example['mask'])
            return {
                'images_gray_masked': torch.from_numpy(np.stack(images_gray_masked)).unsqueeze(1).float(),
                'mask': torch.from_numpy(np.stack(masks)).long()
            }

    def normalize_image(self, image):
        return (np.array(image)/255-self.mean)/self.std

In [27]:
%cd lama-with-refiner

/home/kwy00/song/lama-with-refiner


In [28]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,  # Ryzen 5800x의 8코어 활용
    pin_memory=True,  # GPU 메모리 전송 최적화
    collate_fn=CollateFn(mode='train'),
    persistent_workers=True  # 워커 재사용으로 성능 향상
)

valid_dataloader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE*2,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
    collate_fn=CollateFn(mode='valid'),
    persistent_workers=True
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE*2,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
    collate_fn=CollateFn(mode='test'),
    persistent_workers=True
)

### 6. LaMa pretrained 된 가중치 다운

In [None]:
!curl -LJO https://huggingface.co/smartywu/big-lama/resolve/main/big-lama.zip
!unzip big-lama.zip -d ./weights/

In [None]:

!mkdir -p ade20k/ade20k-resnet50dilated-ppm_deepsup/
!wget -P ade20k/ade20k-resnet50dilated-ppm_deepsup/ http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth


In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
!pip install -r requirements.txt

### 7. LaMa에 필요한 라이브러리 import

In [35]:
from saicinpainting.training.trainers import make_training_model
from saicinpainting.training.trainers import load_checkpoint

In [None]:



import logging
import os
import sys
import traceback

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'

import hydra
from omegaconf import OmegaConf
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger


from saicinpainting.training.trainers import make_training_model
from saicinpainting.utils import register_debug_signal_handlers, handle_ddp_subprocess, handle_ddp_parent_process, \
    handle_deterministic_config


LOGGER = logging.getLogger(__name__)

In [37]:
config_path='./weights/big-lama'
config_name='config.yaml'

In [38]:
config = OmegaConf.load(os.path.join(config_path, config_name))

In [None]:
import logging
import torch
from saicinpainting.training.trainers.default import DefaultInpaintingTrainingModule

IMAGE_PREPROC_MEAN=0.5
IMAGE_PREPROC_STD=0.225

### 8. 평가지표 정의

In [40]:
from skimage.metrics import structural_similarity as ski_ssim
import cv2
def get_ssim_score(true, pred):
    # 전체 RGB 이미지를 사용해 SSIM 계산 (channel_axis=-1)
    ssim_value = ski_ssim(true, pred, channel_axis=-1, data_range=pred.max() - pred.min())
    return ssim_value

def get_masked_ssim_score(true, pred, mask):
    # 손실 영역의 좌표에서만 RGB 채널별 픽셀 값 추출
    true_masked_pixels = true[mask > 0]
    pred_masked_pixels = pred[mask > 0]

    # 손실 영역 픽셀만으로 SSIM 계산 (채널축 사용)
    ssim_value = ski_ssim(
        true_masked_pixels,
        pred_masked_pixels,
        channel_axis=-1,
        data_range=pred.max() - pred.min()
    )
    return ssim_value

def get_histogram_similarity(true, pred, cvt_color=cv2.COLOR_RGB2HSV):
    # BGR 이미지를 HSV로 변환
    true_hsv = cv2.cvtColor(true, cvt_color)
    pred_hsv = cv2.cvtColor(pred, cvt_color)

    # H 채널에서 히스토그램 계산 및 정규화
    hist_true = cv2.calcHist([true_hsv], [0], None, [180], [0, 180])
    hist_pred = cv2.calcHist([pred_hsv], [0], None, [180], [0, 180])
    hist_true = cv2.normalize(hist_true, hist_true).flatten()
    hist_pred = cv2.normalize(hist_pred, hist_pred).flatten()

    # 히스토그램 간 유사도 계산 (상관 계수 사용)
    similarity = cv2.compareHist(hist_true, hist_pred, cv2.HISTCMP_CORREL)
    return similarity

### 10. LaMa용 로드 함수 정의

In [41]:
import logging
import torch
from saicinpainting.training.trainers.default import DefaultInpaintingTrainingModule


def get_training_model_class(kind):
    if kind == 'default':
        return DefaultInpaintingTrainingModule

    raise ValueError(f'Unknown trainer module {kind}')


def make_training_model(config):
    kind = config.training_model.kind
    kwargs = dict(config.training_model)
    kwargs.pop('kind')
    kwargs['use_ddp'] = config.trainer.kwargs.get('accelerator', None) == 'ddp'

    logging.info(f'Make training model {kind}')

    cls = get_training_model_class(kind)
    return cls(config, **kwargs)


def load_checkpoint(train_config, path, map_location='cuda', strict=True):
    model: torch.nn.Module = make_training_model(train_config)
    state = torch.load(path, map_location=map_location)
    model.load_state_dict(state['state_dict'], strict=strict)
    model.on_load_checkpoint(state)
    return model

### 11. 텐서보드 실행

In [42]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/ --port=0


Reusing TensorBoard on port 45399 (pid 57988), started 17:04:00 ago. (Use '!kill 57988' to kill it.)

### 12. 체크포인트와 earlystoppig 정의

In [45]:
from pytorch_lightning.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
# 체크포인트 저장 경로와 규칙 정의
checkpoint_callback = ModelCheckpoint(
    dirpath="./checkpointss/",           # 체크포인트 저장 디렉토리
    filename=f'best2-{fold_idx=}-{SEED=}'+'-{epoch:02d}-{val_score:.4f}',  # 파일 이름 형식
    save_top_k=1,                     # 가장 낮은 검증 손실을 기록한 3개만 저장
    monitor="val_score",               # 모니터링할 메트릭
    mode="max",                       # 손실이 작을수록 좋음
    save_weights_only=False,          # 전체 모델 상태를 저장
    verbose=True                      # 저장 시 메시지 출력
)
earlystopping_callback = EarlyStopping(monitor="val_score",min_delta=1e-4, mode="max", patience=5,verbose=True)

In [46]:
from lightning.pytorch import Trainer
trainer = Trainer(max_epochs=40, precision='32', callbacks=[checkpoint_callback,earlystopping_callback,], detect_anomaly=False)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [47]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'  # 메모리 단편화 방지

In [48]:
from saicinpainting.training.trainers import load_checkpoint

### 13. 합친 모델 클래스 정의

In [52]:
import segmentation_models_pytorch as smp
model_2 = smp.Unet(
    encoder_name="efficientnet-b3",
    encoder_weights="imagenet",
    in_channels=1,
    classes=3,
)


In [None]:
import lightning
class placemodel(lightning.pytorch.LightningModule):
  def __init__(self,  image_mean=IMAGE_PREPROC_MEAN, image_std=IMAGE_PREPROC_STD):
    super().__init__()
    self.training = True
    checkpoint_path = "/home/kwy00/song/lama-with-refiner/weights/big-lama/models/best.ckpt"
    self.model_1 = load_checkpoint(config,checkpoint_path,strict=False)
    # 모든 파라미터 고정
    for param in self.model_1.generator.parameters():
        param.requires_grad = False
    for param in self.model_1.generator.model[22].parameters():
        param.requires_grad = True
    for param in self.model_1.discriminator.parameters():
        param.requires_grad = False

    # 특정 레이어만 학습 가능
    
    self.model_2 = model_2
    self.image_mean=image_mean
    self.image_std=image_std

  def forward(self, images_gray_masked):
      if self.training == True:


        images_gray_masked['image'] = torch.cat([images_gray_masked['image'],images_gray_masked['image'],images_gray_masked['image']],dim=1)
        igm = self.model_1(images_gray_masked)

        rgb_images = igm['inpainted']
        rgb_onechannel = rgb_images[:, 0:1, :, :]

        mask_min =rgb_onechannel.min()
        mask_max =rgb_onechannel.max()

        # 정규화 해제하여 원래 값 복원
        images_gray_restored= rgb_onechannel * (mask_max - mask_min + 1e-8) + mask_min



        images_restored = self.model_2(rgb_onechannel)
        #텐서보드에 이미지 볼수있는 코드 
        #self.logger.experiment.add_images('Training/Model2 output', images_restored, self.global_step) 
        return rgb_onechannel, images_restored
      else :
        with torch.no_grad():

            images_gray_masked['image'] = torch.cat([images_gray_masked['image'],images_gray_masked['image'],images_gray_masked['image']],dim=1)
            
            #self.logger.experiment.add_images('Testing/Model1 mask', images_gray_masked['mask'], self.global_step)
            igm = self.model_1(images_gray_masked)
            rgb_images = igm['inpainted']
            #self.logger.experiment.add_images('Testing/Model1 output', rgb_images[:, 0:1, :, :], self.global_step)
            rgb_onechannel = rgb_images[:, 0:1, :, :]
            images_gray_restored = rgb_onechannel

            mask_min =images_gray_restored.min()
            mask_max =images_gray_restored.max()

            # 정규화 해제하여 원래 값 복원
            images_gray_restored= images_gray_restored * (mask_max - mask_min + 1e-8) + mask_min
            images_restored = self.model_2(images_gray_restored)
            #self.logger.experiment.add_images('Testing/Model2 output', images_restored, self.global_step)
        return rgb_onechannel, images_restored


  def unnormalize(self, output, round=False):
        image_restored = ((output*self.image_std+self.image_mean)*255).clamp(0,255)
        if round:
            image_restored = torch.round(image_restored)
        return image_restored

  def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='max',
            factor=0.5,
            patience=3,
            verbose=True
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_score"
            }
        }


  def training_step(self, batch, batch_idx):
        self.training = True
        opt = self.optimizers()
        opt.zero_grad()
        batch['masks'] = (batch['masks'] -batch['masks'].min()) / (batch['masks'].max() - batch['masks'].min() + 1e-8)
        batch['images_gray_masked'] = (batch['images_gray_masked'] -batch['images_gray_masked'].min()) / (batch['images_gray_masked'].max() - batch['images_gray_masked'].min() + 1e-8)
        batch['images_gray'] = (batch['images_gray'] -batch['images_gray'].min()) / (batch['images_gray'].max() -batch['images_gray'].min() + 1e-8)
        batch['images_gt'] = (batch['images_gt'] -batch['images_gt'].min()) / (batch['images_gt'].max() -batch['images_gt'].min() + 1e-8)

        masks, images_gray_masked, images_gray, images_gt = batch['masks'], batch['images_gray_masked'], batch['images_gray'], batch['images_gt']
        batch2 ={}

        batch2['image'] = batch['images_gray']
        batch2['mask'] = batch['masks']
        batch2['mask'] = batch2['mask'].unsqueeze(1)


        images_gray_restored, images_restored = self(batch2)

        mask_min =images_gray_restored.min()
        mask_max =images_gray_restored.max()

        # 정규화 해제하여 원래 값 복원
        images_gray_restored= images_gray_restored * (mask_max - mask_min + 1e-8) + mask_min


        mask_min =images_gray.min()
        mask_max =images_gray.max()

        # 정규화 해제하여 원래 값 복원
        images_gray = images_gray * (mask_max - mask_min + 1e-8) + mask_min

        loss_pixel_gray = F.l1_loss(images_gray, images_gray_restored, reduction='mean') * 0.3 + F.mse_loss(images_gray, images_gray_restored, reduction='mean') * 0.7
        loss_pixel = F.l1_loss(images_gt, images_restored, reduction='mean') * 0.3 + F.mse_loss(images_gt, images_restored, reduction='mean') * 0.7
        loss = loss_pixel_gray * 0.3 + loss_pixel * 0.7

        self.log("train_loss", loss, on_step=True, on_epoch=False)
        self.log("train_loss_pixel_gray", loss_pixel_gray, on_step=True, on_epoch=False)
        self.log("train_loss_pixel", loss_pixel, on_step=True, on_epoch=False)





        return loss


  def validation_step(self, batch, batch_idx):
      self.training = True
      batch['images_gray_masked'] = (batch['images_gray_masked'] -batch['images_gray_masked'].min()) / (batch['images_gray_masked'].max() - batch['images_gray_masked'].min() + 1e-8)
      batch['images_gt'] = (batch['images_gt'] -batch['images_gt'].min()) / (batch['images_gt'].max() -batch['images_gt'].min() + 1e-8)

      masks, images_gray_masked, images_gt = batch['masks'], batch['images_gray_masked'], batch['images_gt']

      batch2 = {}
      batch2['image'] = batch['images_gray_masked']
      batch2['mask'] = batch['masks']
      batch2['mask'] = batch2['mask'].unsqueeze(1)  # (Batch, 1, Height, Width)

      images_gray_restored, images_restored = self(batch2)

      images_gt, images_restored = self.unnormalize(images_gt, round=True), self.unnormalize(images_restored, round=True)
      masks_np = masks.detach().cpu().numpy()
      images_gt_np = images_gt.detach().cpu().permute(0,2,3,1).float().numpy().astype(np.uint8)
      images_restored_np = images_restored.detach().cpu().permute(0,2,3,1).float().numpy().astype(np.uint8)
      total_ssim_score = 0
      masked_ssim_score = 0
      hist_sim_score = 0
      for image_gt_np, image_restored_np, mask_np in zip(images_gt_np, images_restored_np, masks_np):
          total_ssim_score += get_ssim_score(image_gt_np, image_restored_np) / len(images_gt)
          masked_ssim_score += get_masked_ssim_score(image_gt_np, image_restored_np, mask_np)/ len(images_gt)
          hist_sim_score += get_histogram_similarity(image_gt_np, image_restored_np, cv2.COLOR_RGB2HSV)/ len(images_gt)
      score = total_ssim_score * 0.2 + masked_ssim_score * 0.4 + hist_sim_score * 0.4
      self.log(f"val_score", score, on_step=False, on_epoch=True)
      self.log(f"val_total_ssim_score", total_ssim_score, on_step=False, on_epoch=True)
      self.log(f"val_masked_ssim_score", masked_ssim_score, on_step=False, on_epoch=True)
      self.log(f"val_hist_sim_score", hist_sim_score, on_step=False, on_epoch=True)


      return score





  def predict_step(self, batch, batch_idx):
        self.training = False
        batch['images_gray_masked'] = (batch['images_gray_masked'] -batch['images_gray_masked'].min()) / (batch['images_gray_masked'].max() - batch['images_gray_masked'].min() + 1e-8)
        batch['mask'] = (batch['mask'] -batch['mask'].min()) / (batch['mask'].max() - batch['mask'].min() + 1e-8)
        images_gray_masked = batch['images_gray_masked']
        batch2 ={}
        batch2['image'] = images_gray_masked

  
        batch2['mask'] = batch['mask'].unsqueeze(1)

        images_gray_restored, images_restored = self(batch2)
        #self.logger.experiment.add_images('Training/Model1 images_restored', images_restored, self.global_step)
        images_restored = self.unnormalize(images_restored, round=True)

        #self.logger.experiment.add_images('Training/Model1 unnormalize', images_restored, self.global_step)
        images_restored_np = images_restored.detach().cpu().permute(0,2,3,1).float().numpy().astype(np.uint8)
        #self.logger.experiment.add_images('Training/Model1 images_restored', images_restored_np, self.global_step)
        return images_restored_np


### 14. 모델 학습

In [53]:
trainer.fit(placemodel(), train_dataloader, valid_dataloader)

PermissionError: Unable to create directory at /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples. Using fallback path.


  self.net.load_state_dict(torch.load(model_path, **kw), strict=False)


Loading weights for net_encoder


  torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
  state = torch.load(path, map_location=map_location)
/home/kwy00/anaconda3/envs/song/lib/python3.8/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /home/kwy00/song/lama-with-refiner/checkpointss exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                            | Params | Mode 
--------------------------------------------------------------------
0 | model_1 | DefaultInpaintingTrainingModule | 132 M  | train
1 | model_2 | Unet                            | 13.2 M | train
--------------------------------------------------------------------
15.8 M    Trainable params
130 M     Non-trainable params
145 M     Total params
583.786   Total estimated model params size (MB)


Epoch 0: 100%|██████████| 2738/2738 [54:33<00:00,  0.84it/s, v_num=22]     

Metric val_score improved. New best score: 0.513
Epoch 0, global step 2738: 'val_score' reached 0.51333 (best 0.51333), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=00-val_score=0.5133.ckpt' as top 1


Epoch 1: 100%|██████████| 2738/2738 [54:04<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.031 >= min_delta = 0.0001. New best score: 0.544
Epoch 1, global step 5476: 'val_score' reached 0.54442 (best 0.54442), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=01-val_score=0.5444.ckpt' as top 1


Epoch 2: 100%|██████████| 2738/2738 [54:01<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.019 >= min_delta = 0.0001. New best score: 0.563
Epoch 2, global step 8214: 'val_score' reached 0.56333 (best 0.56333), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=02-val_score=0.5633.ckpt' as top 1


Epoch 3: 100%|██████████| 2738/2738 [54:00<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.013 >= min_delta = 0.0001. New best score: 0.577
Epoch 3, global step 10952: 'val_score' reached 0.57651 (best 0.57651), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=03-val_score=0.5765.ckpt' as top 1


Epoch 4: 100%|██████████| 2738/2738 [54:02<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.009 >= min_delta = 0.0001. New best score: 0.586
Epoch 4, global step 13690: 'val_score' reached 0.58566 (best 0.58566), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=04-val_score=0.5857.ckpt' as top 1


Epoch 5: 100%|██████████| 2738/2738 [54:01<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.007 >= min_delta = 0.0001. New best score: 0.593
Epoch 5, global step 16428: 'val_score' reached 0.59303 (best 0.59303), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=05-val_score=0.5930.ckpt' as top 1


Epoch 6: 100%|██████████| 2738/2738 [54:03<00:00,  0.84it/s, v_num=22]

Metric val_score improved by 0.008 >= min_delta = 0.0001. New best score: 0.601
Epoch 6, global step 19166: 'val_score' reached 0.60092 (best 0.60092), saving model to '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=06-val_score=0.6009.ckpt' as top 1


Epoch 7:  20%|█▉        | 545/2738 [15:24<1:01:59,  0.59it/s, v_num=22]

: 

In [54]:
###############################재학습코드#############################################

In [54]:
from lightning.pytorch import Trainer
# Path to the checkpoint file
ckpt_path = '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=07-val_score=0.6105.ckpt' #'/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=06-val_score=0.6009.ckpt'#'/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=07-val_score=0.6086.ckpt'#"/content/lama-with-refiner/check/epoch=05-val_score=0.5443.ckpt" #"/content/drive/MyDrive/딥러닝심화/이미지복원/epoch=04-val_score=0.5106.ckpt"

# Initialize the trainer and resume from checkpoint
trainer = Trainer(
    max_epochs=40,
    precision='32',
    callbacks=[checkpoint_callback,earlystopping_callback,],
    detect_anomaly=False
)

# Resume training
trainer.fit(placemodel(), train_dataloader, valid_dataloader, ckpt_path=ckpt_path)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


PermissionError: Unable to create directory at /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples. Using fallback path.


  self.net.load_state_dict(torch.load(model_path, **kw), strict=False)


Loading weights for net_encoder


  torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
  state = torch.load(path, map_location=map_location)
/home/kwy00/anaconda3/envs/song/lib/python3.8/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /home/kwy00/song/lama-with-refiner/checkpointss exists and is not empty.
Restoring states from the checkpoint path at /home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=07-val_score=0.6105.ckpt
/home/kwy00/anaconda3/envs/song/lib/python3.8/site-packages/lightning/fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to

Epoch 8: 100%|██████████| 2738/2738 [49:37<00:00,  0.92it/s, v_num=24]     

: 

In [None]:
########################################################################################3

### 15. 학습된 모델 불러오기

In [54]:
lit_ir_model = placemodel.load_from_checkpoint(
    '/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=07-val_score=0.6105.ckpt',
    #'/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=12-val_score=0.6332.ckpt',
    #'/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=07-val_score=0.6086.ckpt',
    #'/home/kwy00/song/lama-with-refiner/checkpointss/best2-fold_idx=0-SEED=42-epoch=01-val_score=0.5435.ckpt',
    model_2=model_2,
)

/home/kwy00/anaconda3/envs/song/lib/python3.8/site-packages/lightning/fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


PermissionError: Unable to create directory at /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples. Using fallback path.


  self.net.load_state_dict(torch.load(model_path, **kw), strict=False)


Loading weights for net_encoder


  torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
  state = torch.load(path, map_location=map_location)


### 16. 추론 및 이미지 저장

In [55]:
predictions = trainer.predict(lit_ir_model, test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 7/7 [00:09<00:00,  0.78it/s]


In [56]:
predictions = np.concatenate(predictions)

In [57]:
submission_dir = os.path.join(SUBMISSON_DATA_DIR, EXPERIMENT_NAME)
submission_file = f'{SUBMISSON_DATA_DIR}/{EXPERIMENT_NAME}.zip'
os.makedirs(submission_dir, exist_ok=True)

In [58]:
for idx, row in tqdm(test_df.iterrows(), total=len(test_df)):
    image_pred = Image.fromarray(predictions[idx])
    image_pred.save(os.path.join(submission_dir, row['image']), "PNG")

100%|██████████| 100/100 [00:05<00:00, 19.63it/s]


In [59]:
# Step 3: Compress the directory into a ZIP file using glob
with zipfile.ZipFile(submission_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for file_path in glob(f"{submission_dir}/*.png"):
        arcname = os.path.relpath(file_path, submission_dir)
        zipf.write(file_path, arcname)