# Library 설정 및 필요 함수 정의
**이는 수정할 필요 없음**

In [8]:
import h5py
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms
import torch
from torch import nn
import os
import PIL.Image as pil_image

import argparse
import copy

import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm
import csv



def convert_rgb_to_y(img):
    if type(img) == np.ndarray:
        return 16. + (64.738 * img[:, :, 0] + 129.057 * img[:, :, 1] + 25.064 * img[:, :, 2]) / 256.
    elif type(img) == torch.Tensor:
        if len(img.shape) == 4:
            img = img.squeeze(0)
        return 16. + (64.738 * img[0, :, :] + 129.057 * img[1, :, :] + 25.064 * img[2, :, :]) / 256.
    else:
        raise Exception('Unknown Type', type(img))


def convert_rgb_to_ycbcr(img):
    if type(img) == np.ndarray:
        y = 16. + (64.738 * img[:, :, 0] + 129.057 * img[:, :, 1] + 25.064 * img[:, :, 2]) / 256.
        cb = 128. + (-37.945 * img[:, :, 0] - 74.494 * img[:, :, 1] + 112.439 * img[:, :, 2]) / 256.
        cr = 128. + (112.439 * img[:, :, 0] - 94.154 * img[:, :, 1] - 18.285 * img[:, :, 2]) / 256.
        return np.array([y, cb, cr]).transpose([1, 2, 0])
    elif type(img) == torch.Tensor:
        if len(img.shape) == 4:
            img = img.squeeze(0)
        y = 16. + (64.738 * img[0, :, :] + 129.057 * img[1, :, :] + 25.064 * img[2, :, :]) / 256.
        cb = 128. + (-37.945 * img[0, :, :] - 74.494 * img[1, :, :] + 112.439 * img[2, :, :]) / 256.
        cr = 128. + (112.439 * img[0, :, :] - 94.154 * img[1, :, :] - 18.285 * img[2, :, :]) / 256.
        return torch.cat([y, cb, cr], 0).permute(1, 2, 0)
    else:
        raise Exception('Unknown Type', type(img))


def convert_ycbcr_to_rgb(img):
    if type(img) == np.ndarray:
        r = 298.082 * img[:, :, 0] / 256. + 408.583 * img[:, :, 2] / 256. - 222.921
        g = 298.082 * img[:, :, 0] / 256. - 100.291 * img[:, :, 1] / 256. - 208.120 * img[:, :, 2] / 256. + 135.576
        b = 298.082 * img[:, :, 0] / 256. + 516.412 * img[:, :, 1] / 256. - 276.836
        return np.array([r, g, b]).transpose([1, 2, 0])
    elif type(img) == torch.Tensor:
        if len(img.shape) == 4:
            img = img.squeeze(0)
        r = 298.082 * img[0, :, :] / 256. + 408.583 * img[2, :, :] / 256. - 222.921
        g = 298.082 * img[0, :, :] / 256. - 100.291 * img[1, :, :] / 256. - 208.120 * img[2, :, :] / 256. + 135.576
        b = 298.082 * img[0, :, :] / 256. + 516.412 * img[1, :, :] / 256. - 276.836
        return torch.cat([r, g, b], 0).permute(1, 2, 0)
    else:
        raise Exception('Unknown Type', type(img))


def calc_psnr(img1, img2):
    return 10. * torch.log10(1. / torch.mean((img1 - img2) ** 2))


class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


class TrainDataset(Dataset):
    def __init__(self, image_dir, is_train=1, scale=4):
        super(TrainDataset, self).__init__()
        lr_dir = os.path.join(image_dir, "lr/")
        hr_dir = os.path.join(image_dir, "hr/")
        self.lr_list = sorted([os.path.join(lr_dir, f) for f in os.listdir(lr_dir)])
        self.hr_list = sorted([os.path.join(hr_dir, f) for f in os.listdir(hr_dir)])
        cur_len = len(self.hr_list)
        train_len = round(cur_len*0.9)

        if is_train == 1:
            self.lr_list = self.lr_list[:train_len]
            self.hr_list = self.hr_list[:train_len]
        else:
            self.lr_list = self.lr_list[train_len:]
            self.hr_list = self.hr_list[train_len:]

        self.scale = scale
        self.crop_size = 33
        self.is_train = is_train

    def __getitem__(self, idx):
        image = pil_image.open(self.lr_list[idx]).convert('RGB')
        image = np.array(image).astype(np.float32)
        ycbcr = convert_ycbcr_to_rgb(image)

        lr = ycbcr[..., 0]
        lr /= 255.
        lr = torch.from_numpy(lr)
        h, w = lr.size()

        hr = pil_image.open(self.hr_list[idx]).convert('RGB')
        hr = np.array(hr).astype(np.float32)
        ycbcr = convert_ycbcr_to_rgb(hr)

        hr = ycbcr[..., 0]
        hr /= 255.
        hr = torch.from_numpy(hr)

        # random crop
        if self.is_train:
            rand_h = torch.randint(h - (self.crop_size), [1, 1])
            rand_w = torch.randint(w - (self.crop_size), [1, 1])
            lr = lr[rand_h:rand_h + self.crop_size, rand_w:rand_w + self.crop_size]
            hr = hr[rand_h:rand_h + self.crop_size, rand_w:rand_w + self.crop_size]

        lr = lr.unsqueeze(0)
        hr = hr.unsqueeze(0)
         # lr hr pair
        return lr, hr

    def __len__(self):
       return len(self.hr_list)


class TestDataset(Dataset):
    def __init__(self, image_dir, scale=4):
        super(TestDataset, self).__init__()
        lr_dir = os.path.join(image_dir, "lr/")
        self.lr_list = sorted([os.path.join(lr_dir, f) for f in os.listdir(lr_dir)])

        self.scale = scale
        self.crop_size = 33

    def __getitem__(self, idx):
        image = pil_image.open(self.lr_list[idx]).convert('RGB')
        image = np.array(image).astype(np.float32)
        ycbcr = convert_ycbcr_to_rgb(image)

        lr = ycbcr[..., 0]
        lr /= 255.
        lr = torch.from_numpy(lr)

        lr = lr.unsqueeze(0)

        return lr

    def __len__(self):
       return len(self.lr_list)

# SRCNN 모델

3개의 convolution layer로 구성되어 있음

이를 수정하여 성능을 개선할 수 있음

In [2]:
class SRCNN(nn.Module):
    def __init__(self, num_channels=1):
        super(SRCNN, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=9, padding=9 // 2)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=5 // 2)
        self.conv3 = nn.Conv2d(32, num_channels, kernel_size=5, padding=5 // 2)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.conv3(x)
        return x

# 학습 코드 시작

가로, 세로 4배 크기의 영상으로 super-resolution 수행

train dataset과 test dataset의 디렉토리 설정

In [3]:

## fixed options (수정하지 말 것)
scale = 4
train_dir = '/USER/Kaggle/kaggle/jbnu/image/data/train/'
test_dir = '/USER/Kaggle/kaggle/jbnu/image/data/test/'



# Training / Test options 설정

./models/ 에 학습될 모델 파일 저장

learning rate는 1e-4 이용

batch size=16

학습은 100 eopchs 수행

use_pretrain = True인 경우 pretrain된 모델인 pretrained.pth를 불러와 학습 수행

복원 영상의 경로는 ./restored_images/ 로 설정

그리고 각 디렉토리 생성

In [5]:
## Training options
outputs_dir = './models/'
lr = 1e-4
batch_size = 16
num_workers = 4
seed = 123
num_epochs = 100

use_pretrain = False
pretrained = '/kaggle/input/jbnu-bigdathub-super-resolution/pretrained.pth'

## Test options
outimg_dir = './restored_images/'

if not os.path.exists(outputs_dir):
    os.makedirs(outputs_dir)
if not os.path.exists(outimg_dir):
    os.makedirs(outimg_dir)



# Train을 수행하기 위한 전처리 과정

cuda 사용 설정

위에서 정의한 SRCNN 모델 이용

학습 시 loss는 MSE (Mean-Squared Error) loss 사용

train/validation dataset에 대한 dataloader 설정

pretrain된 모델을 불러옴

In [6]:
cudnn.benchmark = True
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(seed)
model = SRCNN().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam([
    {'params': model.conv1.parameters()},
    {'params': model.conv2.parameters()},
    {'params': model.conv3.parameters(), 'lr': lr * 0.1}
], lr=lr)

train_dataset = TrainDataset(train_dir, is_train=1, scale=scale)
train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers,
                              pin_memory=True,
                              drop_last=True)
eval_dataset = TrainDataset(train_dir, is_train=0, scale=scale)
eval_dataloader = DataLoader(dataset=eval_dataset, batch_size=1)

# load pretrained model
if use_pretrain is True:
    state_dict = model.state_dict()
    for n, p in torch.load(pretrained, map_location=lambda storage, loc: storage).items():
        if n in state_dict.keys():
            state_dict[n].copy_(p)
        else:
            raise KeyError(n)

# Training 및 validation 수행

매 epoch에 대한 모델이 ./models/epoch_{}.pth에 저장

5 epochs 마다 validation 수행하여 PSNR이 가장 높게 나오는 모델을 ./models/best.pth에 저장

In [7]:
best_weights = copy.deepcopy(model.state_dict())
best_epoch = 0
best_psnr = 0.0

for epoch in range(num_epochs):

    model.train()
    epoch_losses = AverageMeter()

    with tqdm(total=(len(train_dataset) - len(train_dataset) % batch_size)) as t:
        t.set_description('epoch: {}/{}'.format(epoch, num_epochs - 1))

        for data in train_dataloader:
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            preds = model(inputs)

            loss = criterion(preds, labels)

            epoch_losses.update(loss.item(), len(inputs))

            # set grad to zero
            optimizer.zero_grad()
            # backward
            loss.backward()
            # optimizer next step
            optimizer.step()

            t.set_postfix(loss='{:.6f}'.format(epoch_losses.avg))
            t.update(len(inputs))
    torch.save(model.state_dict(), "{}/epoch_{}.pth".format(outputs_dir, epoch))

    model.eval()
    epoch_psnr = AverageMeter()

    if (epoch + 1) % 5 == 0:
        for data in eval_dataloader:
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.no_grad():
                preds = model(inputs).clamp(0.0, 1.0)

            epoch_psnr.update(calc_psnr(preds, labels), len(inputs))

        print('eval psnr: {:.2f}'.format(epoch_psnr.avg))

        if epoch_psnr.avg > best_psnr:
            best_epoch = epoch
            best_psnr = epoch_psnr.avg
            best_weights = copy.deepcopy(model.state_dict())

print('best epoch: {}, psnr: {:.2f}'.format(best_epoch, best_psnr))
torch.save(best_weights, os.path.join(outputs_dir, 'best.pth'))


epoch: 0/99:   0% 0/5552 [00:01<?, ?it/s]


ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_112782/2844198570.py", line 116, in __getitem__
    h, w = lr.size()
ValueError: not enough values to unpack (expected 2, got 1)


# Test 수행

학습된 모델로 test 수행

위의 100 epochs 학습된 모델을 이용

성능 평가를 위해 영상의 가운데 20x20 크기의 patch를 이용

20x20 크기의 patch의 YCbCr중 Y 채널에 대한 값을 submission.csv에 저장

In [7]:
## Test 코드
test_dataset = TestDataset(test_dir, scale=scale)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

model.eval()

img_num = len(test_dataset.lr_list)
psnr = torch.zeros(img_num)

f = open('submission.csv', 'w', newline='')
wr = csv.writer(f)
wr.writerow(['Id', 'Predicted'])

n = -1
for data in test_dataloader:
    n = n + 1
    inputs = data
    inputs = inputs.to(device)

    with torch.no_grad():
        preds = model(inputs).clamp(0.0, 1.0)

    # crop image into 20 x 20
    _, _, h, w = preds.size()
    h2 = int(h / 2)
    w2 = int(w / 2)
    preds_crop = preds[:, :, h2:h2 + 20, w2:w2 + 20] * 255

    preds_crop_array = preds_crop.reshape(20 * 20, 1)
    preds_crop_array = preds_crop_array.cpu()
    preds_crop_array = preds_crop_array.numpy()

    idx_list = list(range(n * 400 + 1, (n + 1) * 400 + 1))
    idx_list = np.array(idx_list)
    print("{}".format(n))
    for ii in range(0, 400):
        wr.writerow([idx_list[ii], preds_crop_array[ii, 0]])

    preds = preds.mul(255.0).cpu().numpy().squeeze(0).squeeze(0)

    image = pil_image.open(test_dataset.lr_list[n]).convert('RGB')
    image = np.array(image).astype(np.float32)
    ycbcr = convert_rgb_to_ycbcr(image)

    output = np.array([preds, ycbcr[..., 1], ycbcr[..., 2]]).transpose([1, 2, 0])
    output = np.clip(convert_ycbcr_to_rgb(output), 0.0, 255.0).astype(np.uint8)
    output = pil_image.fromarray(output)

    # split
    _, fname = os.path.split(test_dataset.lr_list[n])
    filename = "{}/{}".format(outimg_dir, fname)
    output.save(filename)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27