## 1. 사용할 패키지 불러오기

In [1]:
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from torch.optim import Adam
from torch.nn import L1Loss
from data_gen.data_gen import DatasetGenerator
import torch
import torchvision.models as models
import torch.nn as nn
import numpy as np
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## 2. 데이터 불러오기

### (1) Table 데이터

In [2]:
table_data = pd.read_excel('data/final_data.xlsx')
table_data.head()

Unnamed: 0,작가생존여부_사망,작가생존여부_생존,작가생존여부_알수없음,판매계절_가을,판매계절_겨울,판매계절_봄,판매계절_여름,재료_견본채색,재료_기타,재료_브론즈,...,판매처_칸옥션,판매처_케이옥션,판매처_헤럴드아트데이,가로,세로,작품 판매 횟수,판매가격,작가명,제목,이미지 고유 번호
0,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0.065372,0.065372,0.0,600000,임상진 Lim SangChin (1935~2013),무제,380410
1,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0.026429,0.026429,0.0,400000,정술원 Jung SulWon (1885~1959),화조,380460
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0.018878,0.018878,0.0,100000,정주상 Jeong JuSang (1925~2012),심정흥장 (선면),380491
3,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0.025351,0.025351,0.0,360000,이양원 Lee YangWon (1944~),풍속도,380417
4,0,1,0,1,0,0,0,0,1,0,...,0,0,0,0.019957,0.019957,0.0,240000,이외수 Lee OiSoo (1946~),사람과 사람들,380391


In [3]:
remove_files = pd.read_excel('겹치는애들.xlsx')
table_data = table_data.loc[-table_data['이미지 고유 번호'].isin(remove_files['이미지 고유 번호']), :].reset_index(drop = True)
table_data.shape

(15197, 43)

### (2) 이미지 데이터

In [4]:
folder_list = sorted(os.listdir('data/image'))[3:] + sorted(os.listdir('data/image'))[:3]
folder_list

['1월_files',
 '2월_files',
 '3월_files',
 '4월_files',
 '5월_files',
 '6월_files',
 '7월_files',
 '8월_files',
 '9월_files',
 '10월_files',
 '11월_files',
 '12월_files']

In [5]:
serial = []
image_dir = []

for folder_name in folder_list:
    file_list = os.listdir(os.path.join('data/image', folder_name))
    for fname in file_list:
        if os.path.getsize(os.path.join(os.path.join('data/image', folder_name), fname)) > 3200:
            serial.append(fname[:-4])
            image_dir.append(os.path.join(os.path.join('data/image', folder_name), fname))

image_df = pd.DataFrame({'이미지 고유 번호': serial, '이미지경로': image_dir})
image_df.head()

Unnamed: 0,이미지 고유 번호,이미지경로
0,335298,data/image/1월_files/335298.jpg
1,340721,data/image/1월_files/340721.jpg
2,357485,data/image/1월_files/357485.jpg
3,357663,data/image/1월_files/357663.jpg
4,155,data/image/1월_files/155.jpg


### (3) 합치기

In [6]:
table_data['이미지 고유 번호'] = table_data['이미지 고유 번호'].map(str)
final_data = pd.merge(table_data, image_df)
final_data.head()

Unnamed: 0,작가생존여부_사망,작가생존여부_생존,작가생존여부_알수없음,판매계절_가을,판매계절_겨울,판매계절_봄,판매계절_여름,재료_견본채색,재료_기타,재료_브론즈,...,판매처_케이옥션,판매처_헤럴드아트데이,가로,세로,작품 판매 횟수,판매가격,작가명,제목,이미지 고유 번호,이미지경로
0,1,0,0,1,0,0,0,0,0,0,...,0,0,0.065372,0.065372,0.0,600000,임상진 Lim SangChin (1935~2013),무제,380410,data/image/11월_files/380410.jpg
1,1,0,0,1,0,0,0,0,0,0,...,0,0,0.026429,0.026429,0.0,400000,정술원 Jung SulWon (1885~1959),화조,380460,data/image/11월_files/380460.jpg
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0.018878,0.018878,0.0,100000,정주상 Jeong JuSang (1925~2012),심정흥장 (선면),380491,data/image/11월_files/380491.jpg
3,0,1,0,1,0,0,0,0,0,0,...,0,0,0.025351,0.025351,0.0,360000,이양원 Lee YangWon (1944~),풍속도,380417,data/image/11월_files/380417.jpg
4,0,1,0,1,0,0,0,0,1,0,...,0,0,0.019957,0.019957,0.0,240000,이외수 Lee OiSoo (1946~),사람과 사람들,380391,data/image/11월_files/380391.jpg


### (4) 최종 데이터 저장

In [41]:
final_data.to_excel('final_data.xlsx', index = False, encoding = 'euc-kr')

## 3. Image 모델링

### (1) Dataset 생성

In [4]:
final_data = pd.read_excel('final_data_rgb_hsv.xlsx')
image_dir = final_data['이미지경로']
target = np.log10(final_data['판매가격'])

In [5]:
train_image_dir, test_image_dir, train_target, test_target = train_test_split(image_dir, target, train_size = 0.8, random_state = 1004)

In [6]:
train_image_dataset_generator = DatasetGenerator(list(train_image_dir), list(train_target), batch_size = 16, phase = 'train', train_valid_split = False)
train_dataloader = train_image_dataset_generator.dataloader()

test_image_dataset_generator = DatasetGenerator(list(test_image_dir), list(test_target), batch_size = 1, phase = 'test', train_valid_split = False)
test_dataloader = test_image_dataset_generator.dataloader()

### (2) Model 생성

In [7]:
model_name = 'resnet18_cbam'

In [8]:
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo


__all__ = ['ResNet', 'resnet18_cbam', 'resnet34_cbam', 'resnet50_cbam', 'resnet101_cbam',
           'resnet152_cbam']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
           
        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 16, 1, bias=False),
                               nn.ReLU(),
                               nn.Conv2d(in_planes // 16, in_planes, 1, bias=False))
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.ca = ChannelAttention(planes)
        self.sa = SpatialAttention()

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out = self.ca(out) * out
        out = self.sa(out) * out

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)

        self.ca = ChannelAttention(planes * 4)
        self.sa = SpatialAttention()

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out = self.ca(out) * out
        out = self.sa(out) * out

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [9]:
def resnet18_cbam(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
        now_state_dict        = model.state_dict()
        now_state_dict.update(pretrained_state_dict)
        model.load_state_dict(now_state_dict)
    return model

In [10]:
# build model
vision_model = resnet18_cbam(pretrained=True)
num_ftrs = vision_model.fc.in_features
vision_model.fc = nn.Linear(num_ftrs, 1)


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/jongwook95.lee/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
100%|██████████| 44.7M/44.7M [00:03<00:00, 12.9MB/s]


### (3) 학습 파라미터 지정

In [11]:
epoch = 10
learning_rate = 0.001
weight_decay = 0.0001
result_dir = './result/'

### (4) Loss, Optimizer 생성

In [12]:
# get loss function from LossFactory
loss_fn = L1Loss()

# get optimizer from OptimizerFactory
optimizer = Adam(params = vision_model.parameters(),
                lr=learning_rate,
                weight_decay = weight_decay)

### (5) 학습

In [13]:
print("{} start training!".format(model_name))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vision_model.to(device)
min_valid_loss = np.inf

# training
for e in range(epoch):
    train_loss = 0.0
    vision_model.train()     # Optional when not using Model Specific layer
    for data in tqdm(train_dataloader['train']):
        if torch.cuda.is_available():
            images, labels = data['image'].float().to(device), data['target'].float().to(device)
        
        optimizer.zero_grad()
        target = vision_model(images)
        loss = loss_fn(target,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() / len(images)
            
    valid_loss = 0.0
    vision_model.eval()     # Optional when not using Model Specific layer
    for data in tqdm(test_dataloader['test']):
        if torch.cuda.is_available():
            data, labels = data['image'].float().to(device), data['target'].float().to(device)
        
        target = vision_model(data)
        loss = loss_fn(target,labels)
        valid_loss = loss.item() * len(data)

    print("Epoch: {}, Training Loss: {}, Test Loss: {}".format(e+1, train_loss / len(train_dataloader['train']), valid_loss))   
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
        # Saving State Dict
        torch.save(vision_model.state_dict(), result_dir + 'Best_image_resnet18_cbam_model.pth')    

resnet18_cbam start training!


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 752/752 [01:52<00:00,  6.69it/s]
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 3007/3007 [00:49<00:00, 60.62it/s]


Epoch: 1, Training Loss: 0.03556527331164305, Test Loss: 0.1539602279663086
Validation Loss Decreased(inf--->0.153960) 	 Saving The Model


100%|██████████| 752/752 [01:51<00:00,  6.74it/s]
100%|██████████| 3007/3007 [00:49<00:00, 60.30it/s]


Epoch: 2, Training Loss: 0.03259622950126675, Test Loss: 0.3431525230407715


100%|██████████| 752/752 [01:52<00:00,  6.70it/s]
100%|██████████| 3007/3007 [00:49<00:00, 60.89it/s]


Epoch: 3, Training Loss: 0.03258954741362918, Test Loss: 0.09937429428100586
Validation Loss Decreased(0.153960--->0.099374) 	 Saving The Model


100%|██████████| 752/752 [01:53<00:00,  6.62it/s]
100%|██████████| 3007/3007 [00:49<00:00, 60.67it/s]


Epoch: 4, Training Loss: 0.03255728754720234, Test Loss: 0.22851800918579102


100%|██████████| 752/752 [02:24<00:00,  5.20it/s]
100%|██████████| 3007/3007 [01:09<00:00, 43.01it/s]


Epoch: 5, Training Loss: 0.03244505220469325, Test Loss: 0.3336982727050781


100%|██████████| 752/752 [02:51<00:00,  4.38it/s]
100%|██████████| 3007/3007 [00:49<00:00, 60.40it/s]


Epoch: 6, Training Loss: 0.03255249671142628, Test Loss: 0.48142147064208984


100%|██████████| 752/752 [01:52<00:00,  6.70it/s]
100%|██████████| 3007/3007 [00:49<00:00, 60.86it/s]


Epoch: 7, Training Loss: 0.03253323993536624, Test Loss: 0.5584230422973633


100%|██████████| 752/752 [01:50<00:00,  6.82it/s]
100%|██████████| 3007/3007 [00:48<00:00, 61.47it/s]


Epoch: 8, Training Loss: 0.03239539593155991, Test Loss: 0.14046669006347656


100%|██████████| 752/752 [01:52<00:00,  6.69it/s]
100%|██████████| 3007/3007 [00:49<00:00, 61.07it/s]


Epoch: 9, Training Loss: 0.0324107420442507, Test Loss: 0.02844524383544922
Validation Loss Decreased(0.099374--->0.028445) 	 Saving The Model


100%|██████████| 752/752 [01:51<00:00,  6.74it/s]
100%|██████████| 3007/3007 [00:50<00:00, 59.97it/s]

Epoch: 10, Training Loss: 0.032515031065633325, Test Loss: 0.34773969650268555





### (6) Load Best Model

In [14]:
vision_model.load_state_dict(torch.load('result/Best_image_resnet18_cbam_model.pth'))

<All keys matched successfully>

### (7) 성능 평가

#### - 학습 데이터에 대한 성능

In [15]:
print('start prediction')
predictions = []
vision_model.to(device)

with torch.no_grad():  
    for data in train_dataloader['train']:
        images, labels = data['image'].float().to(device), data['target'].float().to(device)
        images = images.to(device)  
        labels = labels.to(device)  
        vision_model.eval()  
        yhat = vision_model(images)  
        pred = list(yhat.cpu().numpy())
        predictions = predictions + list(np.hstack(pred))

start prediction


In [16]:
from sklearn.metrics import mean_squared_error, r2_score

print("RMSE: {}".format(np.sqrt(mean_squared_error(train_target, predictions))))
print("R2 Score: {}".format(r2_score(train_target, predictions)))

RMSE: 0.685689313259617
R2 Score: -0.016464722102463636


#### - 테스트 데이터에 대한 성능

In [17]:
print('start prediction')
predictions = []
vision_model.to(device)

with torch.no_grad():  
    for data in test_dataloader['test']:
        images, labels = data['image'].float().to(device), data['target'].float().to(device)
        images = images.to(device)  
        labels = labels.to(device)  
        vision_model.eval()  
        yhat = vision_model(images)  
        pred = list(yhat.cpu().numpy())
        predictions.append(pred[0][0])

start prediction


In [18]:
from sklearn.metrics import mean_squared_error, r2_score

print("RMSE: {}".format(np.sqrt(mean_squared_error(test_target, predictions))))
print("R2 Score: {}".format(r2_score(test_target, predictions)))

RMSE: 0.7056034779930921
R2 Score: -0.015133135549176613


: 