In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from easydict import EasyDict

from tqdm import tqdm
from glob import glob
import os
import json 
import timm

import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold
import albumentations as A

# Label

In [2]:
def label_preprocessing(path) :
#     path = os.path.join(path, 'train.csv')
    labels = pd.read_csv(path)

    cnt = 0
    label_encoder = {}
    for i, label in enumerate(tqdm(sorted(labels['label']))) :
        
        if label not in label_encoder.values() :
            label_encoder[cnt] = label
            cnt += 1
        
    label_decoder = {val : key for key, val in label_encoder.items()}
    
    return label_encoder, label_decoder

# enc, dec = label_preprocessing("../data/train.csv")
# display(enc)
# display(dec)

# CSV feature - min, max value 

In [3]:
def csv_feature_dict(path, csv_features) :
    
    csv_files = sorted(glob(os.path.join(path, '*/*.csv')))

    temp_csv = pd.read_csv(csv_files[0])[csv_features]
    max_arr, min_arr = temp_csv.max().to_numpy(), temp_csv.min().to_numpy()

    # feature 별 최대값, 최솟값 계산
    for csv in tqdm(csv_files[1:]):
        temp_csv = pd.read_csv(csv)[csv_features]
        temp_csv = temp_csv.replace('-',np.nan).dropna()
        if len(temp_csv) == 0:
            continue
        temp_csv = temp_csv.astype(float)
        temp_max, temp_min = temp_csv.max().to_numpy(), temp_csv.min().to_numpy()
        max_arr = np.max([max_arr,temp_max], axis=0)
        min_arr = np.min([min_arr,temp_min], axis=0)

    # feature 별 최대값, 최솟값 dictionary return
    return {csv_features[i]:[min_arr[i], max_arr[i]] for i in range(len(csv_features))}

# csv_feature_dict = csv_feature_dict('../data/train', csv_features)
# csv_feature_dict

# Data Split

In [4]:
def data_split(path, label_decoder, kfold=False, test_size=0.2) : 
    imgs = glob(os.path.join(path, '*/*.jpg'))
    json_files = glob(os.path.join(path, '*/*.json'))
    
    label_list = []
    for json_path in tqdm(json_files) :
        json_file = json.load(open(json_path, 'r'))
        
        crop = json_file["annotations"]["crop"]
        disease = json_file["annotations"]["disease"]
        risk = json_file["annotations"]["risk"]
        
        label = f'{crop}_{disease}_{risk}'
        label_list.append(label_decoder[label])
    
    if kfold :
        return imgs, label_list
    else :
        return train_test_split(imgs, test_size=test_size, shuffle=True, stratify=label_list)
# a, b = data_split('../data/train', label_decoder)

# Transforms

In [5]:
def transform(size=224):
    train_transforms = A.Compose([
                A.Resize(size ,size),
                A.OneOf([
                    A.Rotate(),
                    A.HorizontalFlip(),
                    A.VerticalFlip()
                ], p=1)
            ])

    val_transforms = A.Compose([
        A.Resize(size,size)
    ])
    
    return train_transforms, val_transforms

# Custom Dataset 

In [6]:
class CustomDataset(Dataset):
    def __init__(self, 
                 files, 
                 transforms, 
                 label_decoder, 
                 opt,
                 mode='train'):
        
        if opt.use_kfold :
            self.files = self.kfold_files(files, opt)
        else : 
            self.files = files
        
        self.mode = mode
        self.label_decoder = label_decoder #label_encoder
        self.csv_feature_dict = opt.csv_feature_dict
        self.max_len = opt.max_len
        self.transforms = transforms
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        file = self.files[i]
        
        # CSV
        csv_data, seq_len = self.csv_preprocessing(file)
        
        # image
        img = self.img_preprocessing(file)
        
        if self.mode == 'train':         
            # Label
            label = self.label_preprocessing(file)
            
            return {
                'img' : torch.tensor(img, dtype=torch.float32),
                'label' : torch.tensor(self.label_decoder[label], dtype=torch.long),
                'csv_feature': torch.tensor(csv_data, dtype=torch.float32),
                'seq_len' : seq_len
            }
        
        else:
            return {
                'img' : torch.tensor(img, dtype=torch.float32),
                'csv_feature': torch.tensor(csv_data, dtype=torch.float32),
                'seq_len' : seq_len
            }
        
    def kfold_files(self, data_index, opt) :
        file_list = glob(os.path.join(opt.dataset_path, "*/*.jpg"))
        return [file_list[idx] for idx in data_index]
        
    
    def csv_preprocessing(self, file) :
        # CSV
        csv_path = file.replace("jpg","csv")
        df = pd.read_csv(csv_path)[self.csv_feature_dict.keys()]
        df = df.replace('-', 0)
        
        # MinMax scaling
        for col in df.columns:
            df[col] = df[col].astype(float) - self.csv_feature_dict[col][0]
            df[col] = df[col] / (self.csv_feature_dict[col][1]-self.csv_feature_dict[col][0])

        # pack_padded_sequence 하기 위한 len 추가
        seq_len = len(df)

        df_np = df.to_numpy()
        df_len, df_features = df_np.shape
        
        csv_data = np.zeros([self.max_len, df_features])
        csv_data[0:df_len, :] = df_np
        
        return csv_data, seq_len

    def img_preprocessing(self, file) :
        image_path = file
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(image=img)["image"]
        img = img.transpose(2,0,1)
        
        return img
    
    def label_preprocessing(self, file) :
        json_path = file.replace("jpg","json")
        with open(json_path, 'r') as f:
            json_file = json.load(f)

        crop = json_file['annotations']['crop']
        disease = json_file['annotations']['disease']
        risk = json_file['annotations']['risk']
        
        return f'{crop}_{disease}_{risk}'

# Model - CNN

In [7]:
class CNN_Encoder(nn.Module):
    def __init__(self, model_name, num_classes, pretrained_path=None):
        super(CNN_Encoder, self).__init__()
        
        if pretrained_path :
            # no use pretrained model trained with Public dataset
            self.model = self.create_pretrained_model(model_name, num_classes, pretrained_path)
            
        else :            
            self.model = timm.create_model(model_name, num_classes=num_classes, pretrained=True)
            
    
    def forward(self, inputs):
        output = self.model(inputs)
        return output

    def create_pretrained_model(self, model_name, num_classes, pretrained_path):
        pre_model = torch.load(pretrained_path, map_location="cpu")
        output_size = pre_model[list(pre_model.keys())[-1]].shape[0]
 
        return nn.Sequential(
                    timm.create_model(model_name, num_classes=output_size, pretrained=True),
                    nn.Linear(output_size, num_classes)
                )

# model = CNN_Encoder("efficientnetv2_rw_s", 1000)
# model = CNN_Encoder("efficientnetv2_rw_s", 1000, "../model/k_fold_50k_pretrained_effiv2S/4_f9462_public_vill_50k_pretrain_efficientnetv2S.pt")

# Model - RNN


In [8]:
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y.expand_as(x)

class MLSTMfcn(nn.Module):
    def __init__(self, *, num_classes, max_seq_len, num_features,
                 num_lstm_out=128, num_lstm_layers=1, 
                 conv1_nf=128, conv2_nf=256, conv3_nf=128,
                 lstm_drop_p=0.8, fc_drop_p=0.3):
        
        super(MLSTMfcn, self).__init__()
        self.num_classes = num_classes
        self.max_seq_len = max_seq_len
        self.num_features = num_features

        self.num_lstm_out = num_lstm_out
        self.num_lstm_layers = num_lstm_layers

        self.conv1_nf = conv1_nf
        self.conv2_nf = conv2_nf
        self.conv3_nf = conv3_nf

        self.lstm_drop_p = lstm_drop_p
        self.fc_drop_p = fc_drop_p

        self.lstm = nn.LSTM(input_size=self.num_features, 
                            hidden_size=self.num_lstm_out,
                            num_layers=self.num_lstm_layers,
                            batch_first=True)
        
        self.conv1 = nn.Conv1d(self.num_features, self.conv1_nf, 8)
        self.conv2 = nn.Conv1d(self.conv1_nf, self.conv2_nf, 5)
        self.conv3 = nn.Conv1d(self.conv2_nf, self.conv3_nf, 3)

        self.bn1 = nn.BatchNorm1d(self.conv1_nf)
        self.bn2 = nn.BatchNorm1d(self.conv2_nf)
        self.bn3 = nn.BatchNorm1d(self.conv3_nf)

        self.se1 = SELayer(self.conv1_nf)  # ex 128
        self.se2 = SELayer(self.conv2_nf)  # ex 256

        self.relu = nn.ReLU()
        self.lstmDrop = nn.Dropout(self.lstm_drop_p)
        self.convDrop = nn.Dropout(self.fc_drop_p)

        self.fc = nn.Linear(self.conv3_nf+self.num_lstm_out, 128)

        self.out_layer = nn.Linear(self.num_classes+128, self.num_classes)
        self.dropout = nn.Dropout(0.1)
    
    def forward(self, enc_out, x, seq_lens):
        ''' input x should be in size [B,T,F], where 
            B = Batch size
            T = Time samples
            F = features
        '''
        x1 = nn.utils.rnn.pack_padded_sequence(x, seq_lens.cpu(), 
                                               batch_first=True, 
                                               enforce_sorted=False)
        x1, (ht,ct) = self.lstm(x1)
        x1, _ = nn.utils.rnn.pad_packed_sequence(x1, batch_first=True, 
                                                 padding_value=0.0)
        x1 = x1[:,-1,:]
        
        x2 = x.transpose(2,1)
        x2 = self.convDrop(self.relu(self.bn1(self.conv1(x2))))
        x2 = self.se1(x2)
        x2 = self.convDrop(self.relu(self.bn2(self.conv2(x2))))
        x2 = self.se2(x2)
        x2 = self.convDrop(self.relu(self.bn3(self.conv3(x2))))
        x2 = torch.mean(x2,2)
        
        x_all = torch.cat((x1,x2),dim=1)
        x_out = self.fc(x_all)
        concat = torch.cat([enc_out, x_out], dim=1)  # enc_out + hidden 
        output = self.dropout(concat)
        x_output = self.out_layer(output)
        x_out = F.log_softmax(x_output, dim=1)

        return x_out
    
# model = MLSTMfcn(num_classes=38, max_seq_len=512, num_features=9)

# Model - CNN + RNN

In [9]:
class CNN2RNN(nn.Module):
    def __init__(self, opt):
        super(CNN2RNN, self).__init__()
        
        self.cnn = CNN_Encoder(opt.model_name, opt.num_classes, opt.pretrained_path)
        self.rnn = MLSTMfcn(num_classes=opt.num_classes, max_seq_len=opt.max_len, num_features=opt.num_features)


    def forward(self, img, seq, seq_len):
        cnn_output = self.cnn(img)
        output = self.rnn(cnn_output, seq, seq_len)
        
        return output
    
# model = CNN2RNN("efficientnetv2_rw_s", 
#                 1000, 
#                 512,
#                 6,
#                 "../model/k_fold_50k_pretrained_effiv2S/4_f9462_public_vill_50k_pretrain_efficientnetv2S.pt")
# model

# CutMix

In [10]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)
 
    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

# Training

In [19]:
def accuracy_function(real, pred):    
    real = real.cpu()
    pred = torch.argmax(pred, dim=1).cpu()
    score = f1_score(real, pred, average='macro')
    return score

def run(train_loader, valid_loader, opt) :
    
    model = CNN2RNN(opt).to(opt.device)
    optimizer = torch.optim.AdamW(model.parameters(), lr= opt.learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    early_stopping_step = 0
    best_loss = 10
    for epoch in range(opt.epochs) : 

        # training
        tqdm_train = tqdm(train_loader)
        train_loss, train_macro_f1 = 0, 0
        for batch, batch_item in enumerate(tqdm_train) :
            model.train()
            
            img = batch_item['img'].to(opt.device)
            label = batch_item['label'].to(opt.device)
            csv_feature = batch_item['csv_feature'].to(opt.device)
            seq_lens = batch_item['seq_len'].to(opt.device)

            lam = np.random.beta(1.0, 1.0)
                        
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                # add - cutmix
                rand_index = torch.randperm(img.size()[0])
                target_a = label
                target_b = label[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))

                output = model(img, csv_feature, seq_lens)
                loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam)

            loss.backward()
            optimizer.step()
            score = accuracy_function(label, output)
            
            train_loss += loss
            train_macro_f1 += score
            
            tqdm_train.set_postfix({"Epoch" : epoch+1,
                                    "Mean train loss" : "{:06f}".format(train_loss/(batch+1)),
                                    "Mean train f1" : "{:06f}".format(train_macro_f1/(batch+1))
                                   })
            
#             print(f"Traing Epoch : [{epoch}/{opt.epochs}] loss : {train_loss}  f1 : {train_macro_f1}",end='\r')
            
#         print(f"Traing Epoch : [{epoch}/{opt.epochs}] loss : {train_loss}  f1 : {train_macro_f1}")
        
        # validation
        tqdm_valid = tqdm(valid_loader)
        valid_loss, valid_macro_f1 = 0, 0
        for batch, batch_item in enumerate(tqdm_valid) :
            img = batch_item['img'].to(opt.device)
            label = batch_item['label'].to(opt.device)
            csv_feature = batch_item['csv_feature'].to(opt.device)
            seq_lens = batch_item['seq_len'].to(opt.device)
            
            model.eval()
            with torch.no_grad():
                output = model(img, csv_feature, seq_lens)
                loss = criterion(output, label)
            score = accuracy_function(label, output)
            
            valid_loss += loss
            valid_macro_f1 += score
            
            tqdm_valid.set_postfix({"Mean valid loss" : "{:06f}".format(valid_loss/(batch+1)),
                                    "Mean valid f1" : "{:06f}".format(valid_macro_f1/(batch+1))
                                   })
            
#             print(f"Valid Epoch : [{epoch}/{opt.epochs}] loss : {valid_loss}  f1 : {valid_macro_f1}",end='\r')
            
#         print(f"Valid Epoch : [{epoch}/{opt.epochs}] loss : {valid_loss}  f1 : {valid_macro_f1}")

        if valid_loss < best_loss :
            early_stopping_step = 0
            best_f1 = valid_macro_f1
            os.makedirs(opt.save_path, exist_ok=True)
            torch.save(model.state_dict(), os.path.join(opt.save_path, f'{epoch}E_{round(valid_loss, 4)}_{opt.model_name}.pt'))
        
        elif valid_loss > best_loss and epoch != 0 :
            early_stopping_step += 1
            print(f"Early Stopping Step : [{early_stopping_step} / {opt.early_stopping}]")
            
        if early_stopping_step == opt.early_stopping :
            print("=== Early Stop ===")
            break

In [None]:
# 변수 설명 csv 파일 참조
crop = {'1':'딸기','2':'토마토','3':'파프리카','4':'오이','5':'고추','6':'시설포도'}
disease = {'1':{'a1':'딸기잿빛곰팡이병','a2':'딸기흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '2':{'a5':'토마토흰가루병','a6':'토마토잿빛곰팡이병','b2':'열과','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '3':{'a9':'파프리카흰가루병','a10':'파프리카잘록병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '4':{'a3':'오이노균병','a4':'오이흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '5':{'a7':'고추탄저병','a8':'고추흰가루병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '6':{'a11':'시설포도탄저병','a12':'시설포도노균병','b4':'일소피해','b5':'축과병'}}
risk = {'1':'초기','2':'중기','3':'말기'}

csv_features = ['내부 온도 1 평균', '내부 온도 1 최고', '내부 온도 1 최저', '내부 습도 1 평균', '내부 습도 1 최고', 
                '내부 습도 1 최저', '내부 이슬점 평균', '내부 이슬점 최고', '내부 이슬점 최저']


opt = {"dataset_path" : "../data/train",
       "label_path" : "../data/train.csv",
       "save_path" : "../pretrain",
       "pretrained_path" : None,
       "batch_size" : 16,
       "use_kfold" : False,
       "kfold_splits" : 4,
       "model_name" : 'deit_small_patch16_224',
       "resize" : 224,
       "num_classes" : 38,
       "learning_rate" : 1e-4,
       "early_stopping" : 5,
       "device" : "cuda", 
       "csv_features" : csv_features,
       "max_len" : 590,
       "num_features" : len(csv_features),
       "epochs" : 3}

opt = EasyDict(opt)

# csv_feature_dict 옵션 추가
opt.csv_feature_dict = csv_feature_dict(opt.dataset_path, opt.csv_features)

# label_enc, dec 및 trasnforms 설정
label_encoder, label_decoder = label_preprocessing(opt.label_path)
train_transforms, valid_transforms = transform(size=opt.resize)

 34%|██████████████████████████▍                                                  | 1983/5766 [00:08<00:14, 254.12it/s]

# Train Without kfold training

In [None]:
# option 출력
print("<< option >>")
print(*["{} : {}".format(k, v) for k, v in opt.items()], sep='\n')

# data split with stratify
train, valid = data_split(opt.dataset_path, label_decoder)

train_dataset = CustomDataset(train, train_transforms, label_decoder, opt)
valid_dataset = CustomDataset(valid, valid_transforms, label_decoder, opt)

train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=opt.batch_size, shuffle=False)

run(train_loader, valid_loader, opt)

# Train With kfold training

In [18]:
# option 출력
print("<< option >>")
print(*["{} : {}".format(k, v) for k, v in opt.items()], sep='\n')

img_list, label_list = data_split(opt.dataset_path, label_decoder, kfold=True)

kfold = StratifiedKFold(n_splits=opt.kfold_splits, random_state=13, shuffle=True)
for k, (fold_train, fold_valid) in enumerate(kfold.split(img_list, label_list), 1) :
    
    print(f"\n\n\n===== k_fold : {k} / {opt.kfold_splits} =====")
    train_dataset = CustomDataset(fold_train, train_transforms, label_decoder, opt)
    valid_dataset = CustomDataset(fold_valid, valid_transforms, label_decoder, opt)

    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=opt.batch_size, shuffle=False)

    run(train_loader, valid_loader, opt)

<< option >>
dataset_path : ../data/train
label_path : ../data/train.csv
save_path : ../pretrain
pretrained_path : None
batch_size : 16
use_kfold : True
kfold_splits : 4
model_name : deit_small_patch16_224
resize : 224
num_classes : 38
learning_rate : 0.0001
early_stopping : 5
device : cuda
csv_features : ['내부 온도 1 평균', '내부 온도 1 최고', '내부 온도 1 최저', '내부 습도 1 평균', '내부 습도 1 최고', '내부 습도 1 최저', '내부 이슬점 평균', '내부 이슬점 최고', '내부 이슬점 최저']
max_len : 590
num_features : 9
epochs : 1
csv_feature_dict : {'내부 온도 1 평균': [3.4, 47.3], '내부 온도 1 최고': [3.4, 47.6], '내부 온도 1 최저': [3.3, 47.0], '내부 습도 1 평균': [23.7, 100.0], '내부 습도 1 최고': [25.9, 100.0], '내부 습도 1 최저': [0.0, 100.0], '내부 이슬점 평균': [0.1, 34.5], '내부 이슬점 최고': [0.2, 34.7], '내부 이슬점 최저': [0.0, 34.4]}


100%|███████████████████████████████████████████████████████████████████████████| 5767/5767 [00:00<00:00, 14896.53it/s]





===== k_fold : 1 / 4 =====


100%|█████████████████████| 271/271 [01:17<00:00,  3.49it/s, Epoch=1, Mean train loss=2.348999, Mean train f1=0.250484]
100%|████████████████████████████████| 91/91 [00:22<00:00,  4.01it/s, Mean valid loss=1.650510, Mean valid f1=0.351820]


Early Stopping Step : [1 / 5]



===== k_fold : 2 / 4 =====


100%|█████████████████████| 271/271 [01:16<00:00,  3.53it/s, Epoch=1, Mean train loss=2.339868, Mean train f1=0.237967]
100%|████████████████████████████████| 91/91 [00:22<00:00,  4.03it/s, Mean valid loss=1.611374, Mean valid f1=0.368475]


Early Stopping Step : [1 / 5]



===== k_fold : 3 / 4 =====


100%|█████████████████████| 271/271 [01:16<00:00,  3.53it/s, Epoch=1, Mean train loss=2.326114, Mean train f1=0.249759]
100%|████████████████████████████████| 91/91 [00:21<00:00,  4.24it/s, Mean valid loss=1.560356, Mean valid f1=0.390233]


Early Stopping Step : [1 / 5]



===== k_fold : 4 / 4 =====


100%|█████████████████████| 271/271 [01:16<00:00,  3.55it/s, Epoch=1, Mean train loss=2.357530, Mean train f1=0.244086]
100%|████████████████████████████████| 91/91 [00:22<00:00,  4.07it/s, Mean valid loss=1.667482, Mean valid f1=0.347218]

Early Stopping Step : [1 / 5]



