In [12]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

In [13]:
from torchvision.transforms.transforms import Resize
from glob import glob

import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

def extract_day(images):
    day = int(images.split('.')[-2][-2:])
    return day

def make_day_array(images):
    day_array = np.array([extract_day(x) for x in images])
    return day_array

def make_combination(length, species, data_frame, direct_name):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        
        # 하위 폴더 중에서 랜덤하게 선택을 한다.
        direct = random.randrange(0,len(direct_name))
        # 위에서 결정된 폴더를 선택한다. 
        temp = data_frame[data_frame['version'] == direct_name[direct]]
    
        # 밑은 기존의 코드와 동일합니다.
        sample = temp[temp['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df

def randomFromFiveCrops(crops):
    randomIndex = random.randint(0,len(crops)-1)
    return crops[randomIndex]

class TrainDataset(Dataset):
    def __init__(self, combination_df, is_test=None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([
            transforms.FiveCrop(120),
            transforms.Lambda(lambda crops: randomFromFiveCrops(crops)),
            transforms.Resize(224),           
            transforms.RandomHorizontalFlip(p=0.5),  # 수평 뒤집기
            transforms.RandomVerticalFlip(p=0.5), # 수직 뒤집기
            transforms.RandomAffine((-20, 20)), # 아핀 변환: 선형 변환에서 이동 변환까지 포함
            transforms.RandomRotation(90), # 90도 회전                                  
            transforms.ToTensor(),                                 
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

class TestDataset(Dataset):
    def __init__(self, combination_df, is_test=None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([                                                             
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

In [14]:
import torch
from torch import nn
from torchvision.models import resnet50


class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.resnet = resnet50(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1) 

    def forward(self, input):
        x = self.resnet(input)
        output = self.fc_layer(x)
        return output



class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta

In [15]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [16]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2048)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-5
epochs = 20
batch_size = 64
valid_batch_size = 50

model = CompareNet().to(device)

# 학습 데이터가 있는 폴더 위치
root_path = './drive/MyDrive/open_224/train_dataset/'

# BC 폴더와 LT 폴더에 있는 하위 폴더를 저장한다.
bc_direct = glob(root_path + '/BC/*')
bc_direct_name = [x[-5:] for x in bc_direct]
lt_direct = glob(root_path + '/LT/*')
lt_direct_name = [x[-5:] for x in lt_direct]

# 하위 폴더에 있는 이미지들을 하위 폴더 이름과 매칭시켜서 저장한다.
bc_images = {key : glob(name + '/*.png') for key,name in zip(bc_direct_name, bc_direct)}
lt_images = {key : glob(name + '/*.png') for key,name in zip(lt_direct_name, lt_direct)}

# 하위 폴더에 있는 이미지들에서 날짜 정보만 따로 저장한다.
bc_dayes = {key : make_day_array(bc_images[key]) for key in bc_direct_name}
lt_dayes = {key : make_day_array(lt_images[key]) for key in lt_direct_name}

bc_dfs = []
for i in bc_direct_name:
    bc_df = pd.DataFrame({
        'file_name':bc_images[i],
        'day':bc_dayes[i],
        'species':'bc',
        'version':i
    })
    bc_dfs.append(bc_df)
    
lt_dfs = []
for i in lt_direct_name:
    lt_df = pd.DataFrame({
        'file_name':lt_images[i],
        'day':lt_dayes[i],
        'species':'lt',
        'version':i
    })
    lt_dfs.append(lt_df)

bc_dataframe = pd.concat(bc_dfs).reset_index(drop=True)
lt_dataframe = pd.concat(lt_dfs).reset_index(drop=True)
total_dataframe = pd.concat([bc_dataframe, lt_dataframe]).reset_index(drop=True)

bc_combination = make_combination(5000, 'bc', total_dataframe, bc_direct_name)
lt_combination = make_combination(5000, 'lt', total_dataframe, lt_direct_name)

bc_train = bc_combination.iloc[:4500]
bc_valid = bc_combination.iloc[4500:]

lt_train = lt_combination.iloc[:4500]
lt_valid = lt_combination.iloc[4500:]

train_set = pd.concat([bc_train, lt_train])
valid_set = pd.concat([bc_valid, lt_valid])



train_dataset = TrainDataset(train_set)
valid_dataset = TestDataset(valid_set)

optimizer = optim.Adam(model.parameters(), lr=lr)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)

In [17]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [18]:
for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)
        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
                      torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 15 == 0:
            print('\n=====================loss=======================')
            print(f'\n=====================EPOCH: {epoch}=======================')
            print(f'\n=====================step: {step}=======================')
            print('MAE_loss : ', train_loss.detach().cpu().numpy())

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())


    print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')
    checkpoint = {
        'model': model.state_dict(),

    }

    torch.save(checkpoint, 'resnet50_v4.pt')

  0%|          | 0/20 [00:00<?, ?it/s]

0it [00:00, ?it/s]




MAE_loss :  13.14554



MAE_loss :  9.415086



MAE_loss :  7.456052



MAE_loss :  6.1843266



MAE_loss :  4.551488



MAE_loss :  4.1162486



MAE_loss :  3.146556



MAE_loss :  3.2596464



MAE_loss :  3.6637363



MAE_loss :  2.4781544


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 3.1458663940429688


0it [00:00, ?it/s]




MAE_loss :  2.8435616



MAE_loss :  2.3971124



MAE_loss :  3.7287357



MAE_loss :  2.6686754



MAE_loss :  2.236857



MAE_loss :  2.41877



MAE_loss :  2.5017164



MAE_loss :  2.1293018



MAE_loss :  2.2273436



MAE_loss :  2.3805246


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.889057159423828


0it [00:00, ?it/s]




MAE_loss :  2.414884



MAE_loss :  2.3145728



MAE_loss :  2.3380942



MAE_loss :  2.3579106



MAE_loss :  2.5023654



MAE_loss :  2.3491135



MAE_loss :  2.6649103



MAE_loss :  2.1437078



MAE_loss :  2.3205338



MAE_loss :  2.2408247


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.834993839263916


0it [00:00, ?it/s]




MAE_loss :  1.8135569



MAE_loss :  2.0838096



MAE_loss :  1.8151705



MAE_loss :  2.218272



MAE_loss :  2.1255755



MAE_loss :  2.4445167



MAE_loss :  2.1095438



MAE_loss :  1.80214



MAE_loss :  1.7180966



MAE_loss :  2.7937818


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.752601385116577


0it [00:00, ?it/s]




MAE_loss :  3.534999



MAE_loss :  2.5201445



MAE_loss :  1.9423699



MAE_loss :  2.3355372



MAE_loss :  1.7659543



MAE_loss :  1.7783599



MAE_loss :  2.047964



MAE_loss :  2.0356646



MAE_loss :  1.7614788



MAE_loss :  2.263578


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.766361713409424


0it [00:00, ?it/s]




MAE_loss :  1.795354



MAE_loss :  2.4421606



MAE_loss :  1.9990429



MAE_loss :  1.9609354



MAE_loss :  2.4807234



MAE_loss :  1.6343877



MAE_loss :  2.1012552



MAE_loss :  1.9903326



MAE_loss :  2.1873941



MAE_loss :  2.1632903


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.7057645320892334


0it [00:00, ?it/s]




MAE_loss :  2.3778758



MAE_loss :  2.6737661



MAE_loss :  2.151013



MAE_loss :  1.9258876



MAE_loss :  2.1408677



MAE_loss :  2.507719



MAE_loss :  2.2096581



MAE_loss :  2.3612776



MAE_loss :  1.6092343



MAE_loss :  1.849623


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.726930618286133


0it [00:00, ?it/s]




MAE_loss :  2.0248609



MAE_loss :  2.8702207



MAE_loss :  3.7906847



MAE_loss :  1.5080793



MAE_loss :  1.637519



MAE_loss :  2.2283304



MAE_loss :  2.5101721



MAE_loss :  2.1703176



MAE_loss :  1.9942918



MAE_loss :  2.925579


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.677701950073242


0it [00:00, ?it/s]




MAE_loss :  1.5319595



MAE_loss :  2.1948469



MAE_loss :  1.9422777



MAE_loss :  1.9730535



MAE_loss :  1.8623223



MAE_loss :  1.5987649



MAE_loss :  1.8539063



MAE_loss :  1.5929557



MAE_loss :  1.8744762



MAE_loss :  1.6464455


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.598978042602539


0it [00:00, ?it/s]




MAE_loss :  1.5823436



MAE_loss :  2.7366652



MAE_loss :  2.1610956



MAE_loss :  1.8465056



MAE_loss :  1.6758157



MAE_loss :  2.0644627



MAE_loss :  3.2366667



MAE_loss :  1.6963315



MAE_loss :  1.58315



MAE_loss :  1.7716112


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.6502184867858887


0it [00:00, ?it/s]




MAE_loss :  1.4124068



MAE_loss :  1.7596877



MAE_loss :  1.7073061



MAE_loss :  1.6961403



MAE_loss :  1.3319421



MAE_loss :  1.6674086



MAE_loss :  1.6532896



MAE_loss :  3.0076342



MAE_loss :  1.6136488



MAE_loss :  2.072846


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.532172441482544


0it [00:00, ?it/s]




MAE_loss :  1.5073729



MAE_loss :  1.7045947



MAE_loss :  1.7590129



MAE_loss :  1.7590704



MAE_loss :  1.4686786



MAE_loss :  1.3005265



MAE_loss :  2.1045108



MAE_loss :  1.6381503



MAE_loss :  1.6696811



MAE_loss :  1.7735744


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.518051862716675


0it [00:00, ?it/s]




MAE_loss :  2.1285448



MAE_loss :  1.4788959



MAE_loss :  1.4428244



MAE_loss :  2.3532393



MAE_loss :  2.2373238



MAE_loss :  2.0798235



MAE_loss :  2.1903276



MAE_loss :  1.4861441



MAE_loss :  1.8318719



MAE_loss :  2.570578


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.593327045440674


0it [00:00, ?it/s]




MAE_loss :  1.3573285



MAE_loss :  2.6733384



MAE_loss :  1.2994192



MAE_loss :  2.0500448



MAE_loss :  1.7349968



MAE_loss :  1.6936831



MAE_loss :  2.1205935



MAE_loss :  1.9498837



MAE_loss :  1.3452938



MAE_loss :  2.0461824


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.614724636077881


0it [00:00, ?it/s]




MAE_loss :  1.5733972



MAE_loss :  3.414537



MAE_loss :  1.2913121



MAE_loss :  1.7372879



MAE_loss :  2.925085



MAE_loss :  1.7168101



MAE_loss :  1.5557396



MAE_loss :  1.327625



MAE_loss :  1.6305996



MAE_loss :  1.8892376


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.4845564365386963


0it [00:00, ?it/s]




MAE_loss :  1.7746323



MAE_loss :  1.8019705



MAE_loss :  1.2523432



MAE_loss :  1.39656



MAE_loss :  1.3357347



MAE_loss :  1.6003469



MAE_loss :  1.4506454



MAE_loss :  1.5817564



MAE_loss :  1.7027099



MAE_loss :  1.650084


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.476344585418701


0it [00:00, ?it/s]




MAE_loss :  1.4704877



MAE_loss :  1.7229795



MAE_loss :  1.4009919



MAE_loss :  1.6564776



MAE_loss :  1.7672987



MAE_loss :  1.2776407



MAE_loss :  1.4519167



MAE_loss :  2.1753838



MAE_loss :  1.4931593



MAE_loss :  1.6723138


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.4863133430480957


0it [00:00, ?it/s]




MAE_loss :  1.7233554



MAE_loss :  1.8222437



MAE_loss :  2.555937



MAE_loss :  1.993322



MAE_loss :  1.23431



MAE_loss :  1.4362848



MAE_loss :  1.2327646



MAE_loss :  1.794579



MAE_loss :  1.2086343



MAE_loss :  1.2373731


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.6277213096618652


0it [00:00, ?it/s]




MAE_loss :  1.3728039



MAE_loss :  1.2068148



MAE_loss :  1.4198766



MAE_loss :  1.4880128



MAE_loss :  1.2902235



MAE_loss :  1.5664043



MAE_loss :  1.2827032



MAE_loss :  1.1289178



MAE_loss :  1.014091



MAE_loss :  1.6528246


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.536561965942383


0it [00:00, ?it/s]




MAE_loss :  1.1703608



MAE_loss :  1.3296745



MAE_loss :  1.7796493



MAE_loss :  1.5714773



MAE_loss :  1.4159579



MAE_loss :  1.4242806



MAE_loss :  1.1167847



MAE_loss :  1.7065287



MAE_loss :  1.7095108



MAE_loss :  1.4968301


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.503229856491089


In [19]:
test_set = pd.read_csv('./drive/MyDrive/open_224/test_dataset/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: './drive/MyDrive/open_224/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: './drive/MyDrive/open_224/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['before_file_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_file_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'

test_dataset = TestDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset,
                               batch_size=64)

In [20]:
test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        
        test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [21]:
# submission 형식을 불러온다.
submission = pd.read_csv('./drive/MyDrive/open_224/sample_submission.csv')

# 예측한 값들은 텐서 형태로 변환 시켜준다.
predict = torch.FloatTensor(test_value)

# 음수의 값을 갖는 모든 값들을 1 Day 차이가 발생하도록 바꿔줌
temp_predict = predict.numpy()
temp_predict[np.where(temp_predict<1)] = 1

# 모델의 예측 값을 저장함
submission['time_delta'] = temp_predict
submission.to_csv('resnet50_v4.csv', index=False)