In [None]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

In [34]:
from glob import glob

import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms


def extract_day(file_name):
    day = int(file_name.split('.')[-2][-2:])
    return day


def make_day_array(image_pathes):
    day_array = np.array([extract_day(file_name) for file_name in image_pathes])
    return day_array


def make_image_path_array(root_path=None):
    if root_path is None:
        bc_directories = glob('./BC/*')
        lt_directories = glob('./LT/*')

    else:
        bc_directories = glob(root_path + 'BC/*')
        lt_directories = glob(root_path + 'LT/*')

    bc_image_path = []
    for bc_path in bc_directories:
        images = glob(bc_path + '/*.png')
        bc_image_path.extend(images)

    lt_image_path = []
    for lt_path in lt_directories:
        images = glob(lt_path + '/*.png')
        lt_image_path.extend(images)

    return bc_image_path, lt_image_path


def make_dataframe(root_path=None):
    bc_image_path, lt_image_path = make_image_path_array(root_path)
    bc_day_array = make_day_array(bc_image_path)
    lt_day_array = make_day_array(lt_image_path)

    bc_df = pd.DataFrame({'file_name': bc_image_path,
                          'day': bc_day_array})
    bc_df['species'] = 'bc'

    lt_df = pd.DataFrame({'file_name': lt_image_path,
                          'day': lt_day_array})
    lt_df['species'] = 'lt'

    total_data_frame = pd.concat([bc_df, lt_df]).reset_index(drop=True)

    return total_data_frame


def make_combination(length, species, data_frame):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        sample = data_frame[data_frame['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df


class KistDataset(Dataset):
    def __init__(self, combination_df, is_test= None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor()
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        if self.is_test:
          before_image = Image.open(self.combination_df.iloc[idx]['l_path'])
          after_image = Image.open(self.combination_df.iloc[idx]['r_path'])
        else:
          before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
          after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])
        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

In [None]:
import torch
from torch import nn
from torchvision.models import mobilenet_v2


class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)

    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output


class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta

In [4]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2048)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-5
epochs = 10
batch_size = 64
valid_batch_size = 50

model = CompareNet().to(device)

total_dataframe = make_dataframe(root_path = "/content/drive/MyDrive/데이콘/생육/Data/open/train_dataset/")
bt_combination = make_combination(5000, 'bc', total_dataframe)
lt_combination = make_combination(5000, 'lt', total_dataframe)

bt_train = bt_combination.iloc[:4500]
bt_valid = bt_combination.iloc[4500:]

lt_train = lt_combination.iloc[:4500]
lt_valid = lt_combination.iloc[4500:]

train_set = pd.concat([bt_train, lt_train])
valid_set = pd.concat([bt_valid, lt_valid])



train_dataset = KistDataset(train_set)
valid_dataset = KistDataset(valid_set)

optimizer = optim.Adam(model.parameters(), lr=lr)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)


for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)
        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
                      torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 15 == 0:
            print('\n=====================loss=======================')
            print(f'\n=====================EPOCH: {epoch}=======================')
            print(f'\n=====================step: {step}=======================')
            print('MAE_loss : ', train_loss.detach().cpu().numpy())

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())


    print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')
    checkpoiont = {
        'model': model.state_dict(),

    }

    torch.save(checkpoiont, 'checkpoiont_128.pt')

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

  0%|          | 0/10 [00:00<?, ?it/s]

0it [00:00, ?it/s]




MAE_loss :  12.446974



MAE_loss :  9.409853



MAE_loss :  6.794766



MAE_loss :  4.364027



MAE_loss :  3.6993418



MAE_loss :  3.9359198



MAE_loss :  3.6253312



MAE_loss :  2.330778



MAE_loss :  1.8619204



MAE_loss :  2.3872585


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.87296199798584


0it [00:00, ?it/s]




MAE_loss :  2.3421154



MAE_loss :  1.9724323



MAE_loss :  2.2298388



MAE_loss :  2.2268686



MAE_loss :  1.4340774



MAE_loss :  1.8669333



MAE_loss :  1.7152755



MAE_loss :  1.6010631



MAE_loss :  1.1485744



MAE_loss :  1.3953809


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.164365530014038


0it [00:00, ?it/s]




MAE_loss :  2.1569471



MAE_loss :  1.7843425



MAE_loss :  1.635805



MAE_loss :  1.8382213



MAE_loss :  1.3540425



MAE_loss :  1.5261364



MAE_loss :  1.3548641



MAE_loss :  1.242995



MAE_loss :  1.2027481



MAE_loss :  1.0362579


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9737575054168701


0it [00:00, ?it/s]




MAE_loss :  1.2198743



MAE_loss :  1.8402121



MAE_loss :  1.1539421



MAE_loss :  1.4344088



MAE_loss :  1.285527



MAE_loss :  0.955621



MAE_loss :  1.2106426



MAE_loss :  1.0693228



MAE_loss :  1.2941885



MAE_loss :  1.1262528


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9187085628509521


0it [00:00, ?it/s]




MAE_loss :  1.2123843



MAE_loss :  1.159874



MAE_loss :  1.097887



MAE_loss :  1.0913491



MAE_loss :  0.9116975



MAE_loss :  1.5357025



MAE_loss :  1.1962594



MAE_loss :  1.1490974



MAE_loss :  2.2713828



MAE_loss :  1.2904866


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9322096109390259


0it [00:00, ?it/s]




MAE_loss :  1.9138247



MAE_loss :  1.7908845



MAE_loss :  1.0219016



MAE_loss :  0.8276797



MAE_loss :  1.0020134



MAE_loss :  1.268506



MAE_loss :  1.5673115



MAE_loss :  1.3235004



MAE_loss :  1.3068259



MAE_loss :  0.81196284


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6892141103744507


0it [00:00, ?it/s]




MAE_loss :  1.192731



MAE_loss :  0.8953571



MAE_loss :  1.3787502



MAE_loss :  3.223585



MAE_loss :  1.5854905



MAE_loss :  2.3782158



MAE_loss :  0.8781421



MAE_loss :  2.440279



MAE_loss :  1.9896164



MAE_loss :  2.3286743


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6375236511230469


0it [00:00, ?it/s]




MAE_loss :  1.7908797



MAE_loss :  0.9408463



MAE_loss :  0.9047283



MAE_loss :  0.93395305



MAE_loss :  1.0139062



MAE_loss :  0.7401037



MAE_loss :  0.8075601



MAE_loss :  0.75483716



MAE_loss :  1.1615515



MAE_loss :  1.0694664


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6092199087142944


0it [00:00, ?it/s]




MAE_loss :  0.95989394



MAE_loss :  1.2383728



MAE_loss :  0.9277306



MAE_loss :  0.8826722



MAE_loss :  0.8534114



MAE_loss :  1.0829704



MAE_loss :  1.0802404



MAE_loss :  0.78259003



MAE_loss :  1.0111921



MAE_loss :  0.9382831


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6962865591049194


0it [00:00, ?it/s]




MAE_loss :  1.9508907



MAE_loss :  1.6200641



MAE_loss :  0.8519304



MAE_loss :  0.8680218



MAE_loss :  0.5859334



MAE_loss :  1.2859905



MAE_loss :  1.0326861



MAE_loss :  1.1018243



MAE_loss :  0.8696724



MAE_loss :  0.7220999


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6166328191757202


In [14]:
test_set

Unnamed: 0,idx,before_file_path,after_file_path,l_root,r_root,l_path,r_path
0,0,idx_LT_1003_00341,idx_LT_1003_00154,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
1,1,idx_LT_1003_00592,idx_LT_1003_00687,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
2,2,idx_BC_1100_00445,idx_BC_1100_00840,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
3,3,idx_BC_1112_00229,idx_BC_1112_00105,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
4,4,idx_LT_1088_00681,idx_LT_1088_00698,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
...,...,...,...,...,...,...,...
3955,3955,idx_BC_1100_00110,idx_BC_1100_00525,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
3956,3956,idx_LT_1089_00006,idx_LT_1089_00442,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
3957,3957,idx_BC_1100_00511,idx_BC_1100_00132,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...
3958,3958,idx_BC_1088_00353,idx_BC_1088_00196,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...,/content/drive/MyDrive/데이콘/생육/Data/ope...


In [20]:
PATH = '/content/drive/MyDrive/데이콘/생육/Data'

In [35]:
test_set = pd.read_csv(PATH + '/open/test_dataset/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: PATH + '/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: PATH + '/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['l_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['r_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_dataset = KistDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset,
                               batch_size=64)
test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [36]:
sample = pd.read_csv('/content/drive/MyDrive/데이콘/생육/Data/open/sample_submission.csv')
sample['time_delta'] = test_value
sample['time_delta'] = sample['time_delta'].apply(lambda x: np.array(x))
sample.to_csv('*.csv', index = False)