In [1]:
## import library ##
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

In [None]:
!pip install timm
import timm

In [3]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2022)

In [4]:
## Load data & Preprocess data ##
def extract_day(file_name):
    day = int(file_name.split('.')[-2][-2:])
    return day


def make_day_array(image_pathes):
    day_array = np.array([extract_day(file_name) for file_name in image_pathes])
    return day_array


def make_image_path_array(root_path=None):

    if root_path is None:
        bc_directories = glob('./BC/*')
        lt_directories = glob('./LT/*')
    else:
        # print(root_path)
        bc_directories = glob(root_path + 'BC/*')
        # print(bc_directories)
        lt_directories = glob(root_path + 'LT/*')

    bc_image_path = []
    for bc_path in bc_directories:
        images = glob(bc_path + '/*.png')
        bc_image_path.extend(images)

    lt_image_path = []
    for lt_path in lt_directories:
        images = glob(lt_path + '/*.png')
        lt_image_path.extend(images)

    return bc_image_path, lt_image_path


def make_dataframe(root_path=None):
    bc_image_path, lt_image_path = make_image_path_array(root_path)
    bc_day_array = make_day_array(bc_image_path)
    lt_day_array = make_day_array(lt_image_path)

    bc_df = pd.DataFrame({'file_name': bc_image_path,
                          'day': bc_day_array})
    bc_df['species'] = 'bc'

    lt_df = pd.DataFrame({'file_name': lt_image_path,
                          'day': lt_day_array})
    lt_df['species'] = 'lt'

    total_data_frame = pd.concat([bc_df, lt_df]).reset_index(drop=True)

    return total_data_frame


def make_combination(length, species, data_frame):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        sample = data_frame[data_frame['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df

In [5]:
## Dataset ##
class KistDataset(Dataset):
    def __init__(self, combination_df, is_test= None, is_valid = None, is_train = None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([
            # transforms.Resize(224),
            transforms.FiveCrop(96),
            transforms.Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])),
            # transforms.ToTensor()
        ])
        self.transform_test = transforms.Compose([
            # transforms.Resize(224),
            transforms.FiveCrop(96),
            transforms.Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])),
            # transforms.ToTensor()
        ])
        self.is_test = is_test
        self.is_valid = is_valid
        self.is_train = is_train

    def __getitem__(self, idx):        
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])
        
        if self.is_test:
          before_image = self.transform_test(before_image)
          after_image = self.transform_test(after_image)
          return before_image, after_image
        
        if self.is_valid:
          before_image = self.transform_test(before_image)
          after_image = self.transform_test(after_image)
        
        if self.is_train:
          before_image = self.transform(before_image)
          after_image = self.transform(after_image)
            
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

In [6]:
## Model ##
class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.swin = timm.create_model('tf_efficientnet_b7', pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)
        
    def forward(self, input):
        x = self.swin(input)
        output = self.fc_layer(x)
        return output


class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta

In [None]:
## Training ##
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-3
epochs = 12
batch_size = 32
valid_batch_size = 50

model = CompareNet().to(device)

total_dataframe = make_dataframe(root_path = "/content/drive/MyDrive/seculayer/plant/train_dataset/")

bt_combination = make_combination(6000, 'bc', total_dataframe)
lt_combination = make_combination(6000, 'lt', total_dataframe)

bt_train = bt_combination.iloc[:5500]
bt_valid = bt_combination.iloc[5500:]

lt_train = lt_combination.iloc[:5500]
lt_valid = lt_combination.iloc[5500:]

train_set = pd.concat([bt_train, lt_train])
valid_set = pd.concat([bt_valid, lt_valid])

train_dataset = KistDataset(train_set, is_train = True)
valid_dataset = KistDataset(valid_set, is_valid = True)

optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)

best_loss = np.inf

for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        
        bs, ncrops, c, h, w = before_image.size()
        before_image, _, _, _, _ = torch.chunk(before_image, ncrops, dim=1)
        before_image = before_image.squeeze()
        
        after_image, _, _, _, _ = torch.chunk(after_image, ncrops, dim=1)
        after_image = after_image.squeeze()

        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)

        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) / torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 100 == 0:
          print('Epoch: %d \t Step: %d \tTraining Loss: %.6f' %(epoch +1 , step + 1, train_loss.detach().cpu().numpy()))
    
    scheduler.step()
    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            bs, ncrops, c, h, w = valid_before.size()
            valid_before, _, _, _, _ = torch.chunk(valid_before, ncrops, dim=1)
            valid_before = valid_before.squeeze()
            
            valid_after, _, _, _, _ = torch.chunk(valid_after, ncrops, dim=1)
            valid_after = valid_after.squeeze()


            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())

    
    loss = sum(valid_losses)/len(valid_losses)
    
    print(f'VALIDATION_LOSS MAE : {loss}')
    if best_loss > loss :
      best_loss = loss
      torch.save (model.state_dict(), '/content/drive/MyDrive/seculayer/plant/model.pt')


In [None]:
## inference ##
test_set = pd.read_csv('/content/drive/MyDrive/seculayer/plant/test_dataset/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: '/content/drive/MyDrive/seculayer/plant/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: '/content/drive/MyDrive/seculayer/plant/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['before_file_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_file_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'

test_dataset = KistDataset(test_set, is_test=True)

test_data_loader = DataLoader(test_dataset,
                               batch_size=64)

test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        bs, ncrops, c, h, w = test_before.size()
        test_before, _, _, _, _ = torch.chunk(test_before, ncrops, dim=1)
        test_before = test_before.squeeze()
        
        test_after, _, _, _, _ = torch.chunk(test_after, ncrops, dim=1)
        test_after = test_after.squeeze()

        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        
        test_value.extend(value)

In [None]:
## Make Submission File ##
submission = pd.read_csv('/content/drive/MyDrive/seculayer/plant/sample_submission.csv')

predict = torch.FloatTensor(test_value)
temp_predict = predict.numpy()
temp_predict[np.where(temp_predict<1)] = 1

submission['time_delta'] = temp_predict
submission.to_csv('/content/drive/MyDrive/seculayer/plant/submission.csv', index=False)