In [1]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

In [2]:
from glob import glob

import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

def extract_day(images):
    day = int(images.split('.')[-2][-2:])
    return day

def make_day_array(images):
    day_array = np.array([extract_day(x) for x in images])
    return day_array

def make_combination(length, species, data_frame, direct_name):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        
        # 하위 폴더 중에서 랜덤하게 선택을 한다.
        direct = random.randrange(0,len(direct_name))
        # 위에서 결정된 폴더를 선택한다. 
        temp = data_frame[data_frame['version'] == direct_name[direct]]
    
        # 밑은 기존의 코드와 동일합니다.
        sample = temp[temp['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df

class KistDataset(Dataset):
    def __init__(self, combination_df, is_test=None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([                            
            transforms.ToTensor(),
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

In [3]:
import torch
from torch import nn
from torchvision.models import resnet50


class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.resnet = resnet50(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1) 

    def forward(self, input):
        x = self.resnet(input)
        output = self.fc_layer(x)
        return output



class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta

In [4]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [5]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2048)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-5
epochs = 10
batch_size = 32
valid_batch_size = 50

model = CompareNet().to(device)

# 학습 데이터가 있는 폴더 위치
root_path = './drive/MyDrive/open_224/train_dataset/'

# BC 폴더와 LT 폴더에 있는 하위 폴더를 저장한다.
bc_direct = glob(root_path + '/BC/*')
bc_direct_name = [x[-5:] for x in bc_direct]
lt_direct = glob(root_path + '/LT/*')
lt_direct_name = [x[-5:] for x in lt_direct]

# 하위 폴더에 있는 이미지들을 하위 폴더 이름과 매칭시켜서 저장한다.
bc_images = {key : glob(name + '/*.png') for key,name in zip(bc_direct_name, bc_direct)}
lt_images = {key : glob(name + '/*.png') for key,name in zip(lt_direct_name, lt_direct)}

# 하위 폴더에 있는 이미지들에서 날짜 정보만 따로 저장한다.
bc_dayes = {key : make_day_array(bc_images[key]) for key in bc_direct_name}
lt_dayes = {key : make_day_array(lt_images[key]) for key in lt_direct_name}

bc_dfs = []
for i in bc_direct_name:
    bc_df = pd.DataFrame({
        'file_name':bc_images[i],
        'day':bc_dayes[i],
        'species':'bc',
        'version':i
    })
    bc_dfs.append(bc_df)
    
lt_dfs = []
for i in lt_direct_name:
    lt_df = pd.DataFrame({
        'file_name':lt_images[i],
        'day':lt_dayes[i],
        'species':'lt',
        'version':i
    })
    lt_dfs.append(lt_df)

bc_dataframe = pd.concat(bc_dfs).reset_index(drop=True)
lt_dataframe = pd.concat(lt_dfs).reset_index(drop=True)
total_dataframe = pd.concat([bc_dataframe, lt_dataframe]).reset_index(drop=True)

bc_combination = make_combination(5000, 'bc', total_dataframe, bc_direct_name)
lt_combination = make_combination(5000, 'lt', total_dataframe, lt_direct_name)

bc_train = bc_combination.iloc[:4500]
bc_valid = bc_combination.iloc[4500:]

lt_train = lt_combination.iloc[:4500]
lt_valid = lt_combination.iloc[4500:]

train_set = pd.concat([bc_train, lt_train])
valid_set = pd.concat([bc_valid, lt_valid])



train_dataset = KistDataset(train_set)
valid_dataset = KistDataset(valid_set)

optimizer = optim.Adam(model.parameters(), lr=lr)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)

In [6]:
for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)
        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
                      torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 15 == 0:
            print('\n=====================loss=======================')
            print(f'\n=====================EPOCH: {epoch}=======================')
            print(f'\n=====================step: {step}=======================')
            print('MAE_loss : ', train_loss.detach().cpu().numpy())

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())


    print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')
    checkpoint = {
        'model': model.state_dict(),

    }

    torch.save(checkpoint, 'resnet50_v1.pt')

  0%|          | 0/10 [00:00<?, ?it/s]

0it [00:00, ?it/s]




MAE_loss :  12.737005



MAE_loss :  8.804378



MAE_loss :  4.8351297



MAE_loss :  4.5114164



MAE_loss :  4.475135



MAE_loss :  2.354857



MAE_loss :  2.4400852



MAE_loss :  2.7042553



MAE_loss :  2.2161512



MAE_loss :  3.2609081



MAE_loss :  1.7877272



MAE_loss :  2.251459



MAE_loss :  1.5555574



MAE_loss :  2.0988193



MAE_loss :  1.1987714



MAE_loss :  1.2389364



MAE_loss :  1.3288059



MAE_loss :  1.177437



MAE_loss :  1.8580672


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.8479191064834595


0it [00:00, ?it/s]




MAE_loss :  2.6303794



MAE_loss :  1.9491479



MAE_loss :  2.2543743



MAE_loss :  1.5592685



MAE_loss :  1.257949



MAE_loss :  3.484218



MAE_loss :  1.4661969



MAE_loss :  1.9499276



MAE_loss :  1.39532



MAE_loss :  1.8191592



MAE_loss :  1.2707853



MAE_loss :  2.3665853



MAE_loss :  1.1959652



MAE_loss :  1.8280516



MAE_loss :  2.153027



MAE_loss :  1.194313



MAE_loss :  2.2835858



MAE_loss :  1.9893417



MAE_loss :  1.4566329


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5385897159576416


0it [00:00, ?it/s]




MAE_loss :  1.5291789



MAE_loss :  3.303574



MAE_loss :  1.1185389



MAE_loss :  1.2093974



MAE_loss :  1.0876429



MAE_loss :  1.7343547



MAE_loss :  1.1132541



MAE_loss :  1.1601634



MAE_loss :  2.4895153



MAE_loss :  1.1386125



MAE_loss :  2.0792742



MAE_loss :  0.988672



MAE_loss :  1.2067461



MAE_loss :  0.75112617



MAE_loss :  1.4292972



MAE_loss :  0.94762117



MAE_loss :  1.0388919



MAE_loss :  1.9262222



MAE_loss :  1.4775077


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.4372442960739136


0it [00:00, ?it/s]




MAE_loss :  2.1626158



MAE_loss :  2.0501902



MAE_loss :  0.9852154



MAE_loss :  1.8699992



MAE_loss :  2.808031



MAE_loss :  0.9963601



MAE_loss :  1.7826288



MAE_loss :  1.3120258



MAE_loss :  1.4139503



MAE_loss :  1.0900121



MAE_loss :  1.0736566



MAE_loss :  1.8581208



MAE_loss :  0.92918354



MAE_loss :  2.9547906



MAE_loss :  0.6434061



MAE_loss :  1.4962317



MAE_loss :  3.642292



MAE_loss :  2.8318865



MAE_loss :  2.9394708


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5379841327667236


0it [00:00, ?it/s]




MAE_loss :  1.7274668



MAE_loss :  1.7153065



MAE_loss :  1.3933728



MAE_loss :  0.74533033



MAE_loss :  1.5864679



MAE_loss :  0.8373383



MAE_loss :  1.153925



MAE_loss :  1.1136622



MAE_loss :  2.5998168



MAE_loss :  1.6567793



MAE_loss :  0.89639896



MAE_loss :  3.1606786



MAE_loss :  1.4189134



MAE_loss :  1.387059



MAE_loss :  2.54487



MAE_loss :  2.898705



MAE_loss :  0.7205132



MAE_loss :  1.4886703



MAE_loss :  2.6006436


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.2733372449874878


0it [00:00, ?it/s]




MAE_loss :  1.9987934



MAE_loss :  1.2332458



MAE_loss :  0.55608857



MAE_loss :  1.7513101



MAE_loss :  2.868637



MAE_loss :  1.2152091



MAE_loss :  1.254117



MAE_loss :  1.4883685



MAE_loss :  2.2393117



MAE_loss :  1.3041754



MAE_loss :  1.3815371



MAE_loss :  3.459239



MAE_loss :  0.95952034



MAE_loss :  0.942872



MAE_loss :  1.1330253



MAE_loss :  1.3001109



MAE_loss :  1.1612749



MAE_loss :  0.66572046



MAE_loss :  1.2256976


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.157650351524353


0it [00:00, ?it/s]




MAE_loss :  1.5555224



MAE_loss :  0.76997924



MAE_loss :  0.7540852



MAE_loss :  0.8670684



MAE_loss :  1.0572754



MAE_loss :  0.734951



MAE_loss :  0.8213978



MAE_loss :  0.7578014



MAE_loss :  2.5847588



MAE_loss :  1.4003465



MAE_loss :  0.9693128



MAE_loss :  0.5677587



MAE_loss :  2.4609652



MAE_loss :  0.9106597



MAE_loss :  1.2202041



MAE_loss :  1.4477557



MAE_loss :  1.4433119



MAE_loss :  1.1578681



MAE_loss :  1.1975857


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.1541821956634521


0it [00:00, ?it/s]




MAE_loss :  0.9640225



MAE_loss :  2.2777162



MAE_loss :  0.6833091



MAE_loss :  0.97624975



MAE_loss :  1.7779367



MAE_loss :  1.3432493



MAE_loss :  4.7332835



MAE_loss :  2.9354773



MAE_loss :  0.73593444



MAE_loss :  1.4787774



MAE_loss :  0.7556242



MAE_loss :  1.4936895



MAE_loss :  0.7736889



MAE_loss :  0.7629318



MAE_loss :  0.63960826



MAE_loss :  1.0554034



MAE_loss :  1.2104042



MAE_loss :  0.97157425



MAE_loss :  3.3627176


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.115152359008789


0it [00:00, ?it/s]




MAE_loss :  1.9377015



MAE_loss :  0.88023096



MAE_loss :  1.361668



MAE_loss :  2.0524898



MAE_loss :  0.74540234



MAE_loss :  0.6216215



MAE_loss :  1.4690518



MAE_loss :  1.7741163



MAE_loss :  1.3934133



MAE_loss :  1.0282044



MAE_loss :  1.0624764



MAE_loss :  0.59023094



MAE_loss :  2.560965



MAE_loss :  1.6114461



MAE_loss :  1.9883164



MAE_loss :  2.251968



MAE_loss :  0.5274932



MAE_loss :  0.82138085



MAE_loss :  1.1666205


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.2614505290985107


0it [00:00, ?it/s]




MAE_loss :  2.0954604



MAE_loss :  0.8210161



MAE_loss :  0.7116605



MAE_loss :  1.3648894



MAE_loss :  1.9772626



MAE_loss :  0.5420829



MAE_loss :  1.4855727



MAE_loss :  0.64907545



MAE_loss :  0.8268616



MAE_loss :  0.6470954



MAE_loss :  0.83887863



MAE_loss :  1.7558892



MAE_loss :  1.6298952



MAE_loss :  1.3737581



MAE_loss :  1.0944598



MAE_loss :  0.51985097



MAE_loss :  0.7959128



MAE_loss :  0.645032



MAE_loss :  0.6432016


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.0315072536468506


In [7]:
test_set = pd.read_csv('./drive/MyDrive/open_224/test_dataset/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: './drive/MyDrive/open_224/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: './drive/MyDrive/open_224/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['before_file_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_file_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'

test_dataset = KistDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset,
                               batch_size=32)

In [8]:
test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        
        test_value.extend(value)

  0%|          | 0/124 [00:00<?, ?it/s]

In [9]:
# submission 형식을 불러온다.
submission = pd.read_csv('./drive/MyDrive/open_224/sample_submission.csv')

# 예측한 값들은 텐서 형태로 변환 시켜준다.
predict = torch.FloatTensor(test_value)

# 음수의 값을 갖는 모든 값들을 1 Day 차이가 발생하도록 바꿔줌
temp_predict = predict.numpy()
temp_predict[np.where(temp_predict<1)] = 1

# 모델의 예측 값을 저장함
submission['time_delta'] = temp_predict
submission.to_csv('resnet50_v1.csv', index=False)