## 1. 사용할 패키지 불러오기

In [1]:
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from torch.optim import Adam
from torch.nn import L1Loss
import torch.nn.functional as F
from data_gen.data_gen import TotalDatasetGenerator
import torch
import torchvision.models as models
import torch.nn as nn
import numpy as np
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


## 2. 데이터 불러오기

In [None]:
final_data = pd.read_excel('final_data.xlsx')
final_data.head()

## 3. 이미지 별 RGB 평균 및 HSV 평균 변수 추가

In [None]:
final_data['R'] = 0
final_data['G'] = 0
final_data['B'] = 0
final_data['H'] = 0
final_data['S'] = 0
final_data['V'] = 0

for i in range(len(final_data['이미지경로'])):
    if i % 500 == 0:
        print(i)
    bgr = cv2.imread(final_data['이미지경로'][i])
    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    

    final_data['R'][i] = np.mean(bgr[:, :, 2][bgr[:, :, 2] != 255])
    final_data['G'][i] = np.mean(bgr[:, :, 1][bgr[:, :, 1] != 255])
    final_data['B'][i] = np.mean(bgr[:, :, 0][bgr[:, :, 0] != 255])
    final_data['H'][i] = np.mean(hsv[:, :, 0][hsv[:, :, 0] != 255])
    final_data['S'][i] = np.mean(hsv[:, :, 1][hsv[:, :, 1] != 255])
    final_data['V'][i] = np.mean(hsv[:, :, 2][hsv[:, :, 2] != 255])
    

In [None]:
final_data.to_excel('final_data_rgb_hsv.xlsx', index = False, encoding = 'euc-kr')

## 4. 이미지외 변수 및 이미지 모두활용한 분석

### (1) Dataset 생성

In [2]:
final_data = pd.read_excel('final_data_rgb_hsv.xlsx')

In [3]:
all_variable = ['작가생존여부_사망', '작가생존여부_생존', '작가생존여부_알수없음', '판매계절_가을', '판매계절_겨울', '판매계절_봄',
       '판매계절_여름', '재료_견본채색', '재료_기타', '재료_브론즈', '재료_비단에수묵담채', '재료_석판화',
       '재료_실크스크린', '재료_알수없음', '재료_오프셋석판화', '재료_종에이수묵담채', '재료_종이에먹', '재료_종이에수묵',
       '재료_종이에수묵담채', '재료_종이에수묵채색', '재료_종이에수채', '재료_종이에유채', '재료_지본묵서',
       '재료_지본수묵', '재료_지본채색', '재료_캔버스에아크릴', '재료_캔버스에유채', '재료_캔버스에혼합재료',
       '판매처_꼬모옥션', '판매처_마이아트옥션', '판매처_서울옥션', '판매처_아이옥션', '판매처_에이옥션', '판매처_칸옥션',
       '판매처_케이옥션', '판매처_헤럴드아트데이', '가로', '세로', '작품 판매 횟수', '이미지경로', 'R', 'G', 'B', 'H', 'S', 'V']

table_variable = ['작가생존여부_사망', '작가생존여부_생존', '작가생존여부_알수없음', '판매계절_가을', '판매계절_겨울', '판매계절_봄',
       '판매계절_여름', '재료_견본채색', '재료_기타', '재료_브론즈', '재료_비단에수묵담채', '재료_석판화',
       '재료_실크스크린', '재료_알수없음', '재료_오프셋석판화', '재료_종에이수묵담채', '재료_종이에먹', '재료_종이에수묵',
       '재료_종이에수묵담채', '재료_종이에수묵채색', '재료_종이에수채', '재료_종이에유채', '재료_지본묵서',
       '재료_지본수묵', '재료_지본채색', '재료_캔버스에아크릴', '재료_캔버스에유채', '재료_캔버스에혼합재료',
       '판매처_꼬모옥션', '판매처_마이아트옥션', '판매처_서울옥션', '판매처_아이옥션', '판매처_에이옥션', '판매처_칸옥션',
       '판매처_케이옥션', '판매처_헤럴드아트데이', '가로', '세로', '작품 판매 횟수', 'R', 'G', 'B', 'H', 'S', 'V']

In [4]:
X = final_data.loc[:, all_variable]
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
X.loc[:, ['R', 'G', 'B', 'H', 'S', 'V']] = min_max_scaler.fit_transform(X.loc[:, ['R', 'G', 'B', 'H', 'S', 'V']] )
X.head()


y = final_data['판매가격']

In [5]:
train_dataset, test_dataset, train_target, test_target = train_test_split(X, y, train_size = 0.8, random_state = 1004)

### (2) 이미지 데이터와 이미지외 변수 데이터로 분할

In [6]:
train_image = train_dataset['이미지경로']
train_table = train_dataset.loc[:, table_variable]
train_table = np.array(train_table)

test_image = test_dataset['이미지경로']
test_table = test_dataset.loc[:, table_variable]
test_table = np.array(test_table)

### (3) Dataloader 생성

In [7]:
train_dataset_generator = TotalDatasetGenerator(list(train_image), train_table, list(train_target), batch_size = 16, phase = 'train')
train_dataloader = train_dataset_generator.dataloader()

test_dataset_generator = TotalDatasetGenerator(list(test_image), test_table, list(test_target), batch_size = 1, phase = 'test')
test_dataloader = test_dataset_generator.dataloader()

### (4) Model 생성

In [19]:
class TotalModel(nn.Module):
    def __init__(self):
        super(TotalModel, self).__init__()
        self.vision_model = models.resnet50(pretrained=True)
        self.num_ftrs = self.vision_model.fc.in_features
        self.vision_model.fc = nn.Linear(self.num_ftrs, 128)
        
        self.fc1 = nn.Linear(45, 128)

        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 16)
        self.fc4 = nn.Linear(16, 1)
        
    def forward(self, image, table):
        x1 = self.vision_model(image)
        x2 = self.fc1(table)
        x = torch.cat((x1, x2), dim=1)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [25]:
total_model = TotalModel()

### (3) 학습 파라미터 지정

In [26]:
epoch = 10
learning_rate = 0.01
weight_decay = 0.0001
result_dir = './result/'

### (4) Loss, Optimizer 생성

In [27]:
# get loss function from LossFactory
loss_fn = L1Loss()

# get optimizer from OptimizerFactory
optimizer = Adam(params = total_model.parameters(),
                lr=learning_rate,
                weight_decay = weight_decay)

### (5) 학습

In [28]:
print("{} start training!".format('resnet50'))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
total_model.to(device)
min_valid_loss = np.inf

# training
for e in range(epoch):
    train_loss = 0.0
    total_model.train()   
    for data in tqdm(train_dataloader['train']):
        if torch.cuda.is_available():
            images, table, labels = data['image'].float().to(device),  data['table'].float().to(device), data['target'].float().to(device)
        
        optimizer.zero_grad()
        target = total_model(images, table)
        loss = loss_fn(target,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() / len(images)
            
    valid_loss = 0.0
    total_model.eval()  
    for data in tqdm(test_dataloader['test']):
        if torch.cuda.is_available():
            images, table, labels = data['image'].float().to(device),  data['table'].float().to(device), data['target'].float().to(device)
        
        target = total_model(images, table)
        loss = loss_fn(target,labels)
        valid_loss = loss.item() * len(data)

    print("Epoch: {}, Training Loss: {}, Test Loss: {}".format(e+1, train_loss / len(train_dataloader['train']), valid_loss))   
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
        # Saving State Dict
        torch.save(total_model.state_dict(), result_dir + 'Best_total_model.pth')    

resnet50 start training!


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 752/752 [02:46<00:00,  4.53it/s]
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 3007/3007 [01:09<00:00, 43.05it/s]


Epoch: 1, Training Loss: 408118.2570956616, Test Loss: 1644306.0
Validation Loss Decreased(inf--->1644306.000000) 	 Saving The Model


100%|██████████| 752/752 [02:22<00:00,  5.29it/s]
100%|██████████| 3007/3007 [00:47<00:00, 62.88it/s]


Epoch: 2, Training Loss: 407675.2355957031, Test Loss: 2092782.1875


100%|██████████| 752/752 [02:23<00:00,  5.24it/s]
100%|██████████| 3007/3007 [00:47<00:00, 63.80it/s]


Epoch: 3, Training Loss: 407253.7497818318, Test Loss: 1985781.9375


100%|██████████| 752/752 [02:24<00:00,  5.21it/s]
100%|██████████| 3007/3007 [00:48<00:00, 62.04it/s]


Epoch: 4, Training Loss: 407509.0155413688, Test Loss: 1761969.375


100%|██████████| 752/752 [02:24<00:00,  5.21it/s]
100%|██████████| 3007/3007 [00:48<00:00, 62.60it/s]


Epoch: 5, Training Loss: 407056.5029307264, Test Loss: 1416482.8125
Validation Loss Decreased(1644306.000000--->1416482.812500) 	 Saving The Model


100%|██████████| 752/752 [02:26<00:00,  5.12it/s]
100%|██████████| 3007/3007 [00:48<00:00, 62.48it/s]


Epoch: 6, Training Loss: 407246.84811440326, Test Loss: 1421740.5


100%|██████████| 752/752 [02:25<00:00,  5.19it/s]
100%|██████████| 3007/3007 [00:45<00:00, 65.89it/s]


Epoch: 7, Training Loss: 407220.2935037816, Test Loss: 1277477.4375
Validation Loss Decreased(1416482.812500--->1277477.437500) 	 Saving The Model


100%|██████████| 752/752 [02:25<00:00,  5.17it/s]
100%|██████████| 3007/3007 [00:48<00:00, 61.46it/s]


Epoch: 8, Training Loss: 411890.61073907086, Test Loss: 1544281.3125


100%|██████████| 752/752 [02:22<00:00,  5.27it/s]
100%|██████████| 3007/3007 [00:48<00:00, 61.64it/s]


Epoch: 9, Training Loss: 407485.7064349235, Test Loss: 1000955.0625
Validation Loss Decreased(1277477.437500--->1000955.062500) 	 Saving The Model


100%|██████████| 752/752 [02:25<00:00,  5.17it/s]
100%|██████████| 3007/3007 [00:45<00:00, 66.32it/s]

Epoch: 10, Training Loss: 407460.603405502, Test Loss: 1720182.75





### (6) Load Best Model

In [33]:
total_model.load_state_dict(torch.load('result/Best_total_model.pth'))

<All keys matched successfully>

### (7) 성능 평가

In [34]:
print('start prediction')
predictions = []
total_model.to(device)

with torch.no_grad():  
    for data in test_dataloader['test']:
        images, table, labels = data['image'].float().to(device),  data['table'].float().to(device), data['target'].float().to(device)
        total_model.eval()  
        yhat = total_model(images, table)  
        pred = list(yhat.cpu().numpy())
        predictions.append(pred[0][0])

start prediction


In [35]:
from sklearn.metrics import mean_squared_error, r2_score

print("RMSE: {}".format(np.sqrt(mean_squared_error(test_target, predictions))))
print("R2 Score: {}".format(r2_score(test_target, predictions)))

RMSE: 42297489.95510885
R2 Score: -0.02131562515023333
