# 転移学習のベースライン
参考： https://www.kaggle.com/khyeh0719/pytorch-efficientnet-baseline-train-amp-aug/

In [None]:
# timm: 事前学習済みモデルの使用のためのライブラリ
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master') # パスを通す
import timm

In [None]:
print("Available Vision Transformer Models: ")
timm.list_models("vit*")

In [None]:
import os
import datetime
import random
import time
from tqdm import tqdm
import cv2

import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import  log_loss

In [None]:
# 設定まとめ config
CFG = {
    "fold_num": 5,
    "seed": 6, # シード値
    "model_arch": "vit_small_patch16_224", # 使用するモデル
    "img_size_h": 224, # 画像の高さ
    "img_size_w": 224,
    "epochs": 10,
    "train_bs": 64, # bs: バッチサイズ
    "valid_bs": 64,
    "T_0": 5,
    "lr": 1e-4,
    "min_lr": 1e-6, # lr: learning rate
    "weight_decay": 1e-4,
    "num_workers": 6,
    "accum_iter": 5, # 学習をまとめて行うことでバッチサイズが大きくなるような効果
    "verbose_step": 1,
    "device": "cuda:0",
    "tta": 5,  # test time augmentation の回数
    "used_epochs": [
        5,6,7,
    ],
    "weights": [
        0.5,1,1
        
    ]
}


In [None]:
# 保存用ディレクトリの作成
os.makedirs("output", exist_ok=True)
os.makedirs("save", exist_ok=True)

# train と test

In [None]:
# train 用 df の作成
train_df = pd.DataFrame()
base_train_data_path = '../input/flowers-recognition/train/'

train_data_labels = ['daisy',
                    'dandelion',
                    'rose',
                    'sunflower',
                    'tulip'
                   ]

for one_label in train_data_labels:
    one_label_df = pd.DataFrame()
    one_label_paths = os.path.join(base_train_data_path, one_label)
    one_label_df['image_path'] = [os.path.join(one_label_paths, f) for f in os.listdir(one_label_paths)]
    one_label_df['label'] = one_label
    train_df = pd.concat([train_df, one_label_df])
train_df = train_df.reset_index(drop=True)
print(train_df.shape)
display(train_df.head())

In [None]:
# train の label を数字にエンコードする

label_dic = {"daisy":0, "dandelion":1, "rose":2,"sunflower":3, "tulip":4}
train_df["label"]=train_df["label"].map(label_dic)
display(train_df.head())

In [None]:
# test 用 df の作成
test_df = pd.DataFrame()
base_test_data_path = '../input/flowers-recognition/test/'
test_df['image_path'] = [os.path.join(base_test_data_path, f) for f in os.listdir('../input/flowers-recognition/test/')]
test_df = test_df.sort_values('image_path').reset_index(drop=True)

display(test_df.head())

# 便利関数

In [None]:
def seed_everything(seed):
    "seed値を一括指定"
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def get_img(path):
    """
    pathからimageの配列を得る
    """
    im_bgr = cv2.imread(path)
    if im_bgr is None:
        print(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb


# データセットクラス

In [None]:
class FlowerDataset(Dataset):
    def __init__(self, df, 
                 shape, # 追加
                 transforms=None,
                 output_label=True,
                 one_hot_label=False,
                 image_name_col = "image_path",
                 label_col = "label"
                ):

        super().__init__()
        self.shape = shape
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.output_label = output_label
        self.one_hot_label = one_hot_label
        self.image_name_col = image_name_col
        self.label_col = label_col

        if output_label == True:
            self.labels = self.df[self.label_col].values
            if one_hot_label is True:
                self.labels = np.eye(self.df[self.label_col].max()+1)[self.labels]

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index: int):

        if self.output_label:
            target = self.labels[index]

        img  = get_img(self.df.loc[index][self.image_name_col])

        if self.transforms:
            img = self.transforms(image=img)['image']

        if self.output_label == True:
            return img, target
        else:
            return img


# 画像のスケーリング&オーグメンテーション

In [None]:

from albumentations import (
    PadIfNeeded, HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize,ToGray
)

from albumentations.pytorch import ToTensorV2

def get_train_transforms(input_shape):
    return Compose([
            Resize(input_shape[0], input_shape[1]),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(scale_limit=0.0, p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),                
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            Cutout(num_holes=20,p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)

def get_valid_transforms(input_shape):
    return Compose([
                Resize(input_shape[0], input_shape[1]),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
                ToTensorV2(p=1.0),
            ], p=1.)

def get_inference_transforms(input_shape):
    return Compose([
                Resize(input_shape[0], input_shape[1]),
                HorizontalFlip(p=0.5),
                VerticalFlip(p=0.5),
                ShiftScaleRotate(scale_limit=0.0, p=0.5),
                HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
                RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
                Cutout(p=0.5),
                ToTensorV2(p=1.0),
            ], p=1.)


# データローダー作成

In [None]:
def prepare_dataloader(df, input_shape, trn_idx, val_idx, train_bs, valid_bs, num_workers):
    train_ = df.loc[trn_idx,:].reset_index(drop=True)
    valid_ = df.loc[val_idx,:].reset_index(drop=True)

    train_ds = FlowerDataset(train_, input_shape, transforms=get_train_transforms(input_shape), output_label=True, one_hot_label=False)
    valid_ds = FlowerDataset(valid_, input_shape, transforms=get_valid_transforms(input_shape), output_label=True)

    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=train_bs,
        pin_memory=True, # faster and use memory
        drop_last=False,
        shuffle=True,
        num_workers=num_workers,
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds,
        batch_size=valid_bs,
        num_workers=num_workers,
        shuffle=False,
        pin_memory=False,
    )
    return train_loader, val_loader


# モデル

In [None]:
class FlowerImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x


In [None]:
# reference: https://www.kaggle.com/c/siim-isic-melanoma-classification/discussion/173733
class MyCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean'):
        super().__init__(weight=weight, reduction=reduction)
        self.weight = weight
        self.reduction = reduction

    def forward(self, inputs, targets):
        lsm = F.log_softmax(inputs, -1)

        if self.weight is not None:
            lsm = lsm * self.weight.unsqueeze(0)

        loss = -(targets * lsm).sum(-1)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

# 学習用&推論用関数

In [None]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, accum_iter, verbose_step, scheduler=None, schd_batch_update=False):
    model.train()
    scaler = GradScaler()

    t = time.time()
    running_loss = None

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()

        with autocast():
            image_preds = model(imgs)
            loss = loss_fn(image_preds, image_labels)

            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  accum_iter == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)

                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()

                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % verbose_step == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                pbar.set_description(description)

    print("train: "+ description)
    if scheduler is not None and not schd_batch_update:
        scheduler.step()

def valid_one_epoch(epoch, model, loss_fn, val_loader, device, accum_iter, verbose_step, scheduler=None, schd_loss_update=False):
    model.eval()

    t = time.time()
    loss_sum = 0
    sample_num = 0
    image_preds_all = []
    image_targets_all = []

    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()

        image_preds = model(imgs)   
        image_preds_all += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]

        loss = loss_fn(image_preds, image_labels)

        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]

        if ((step + 1) % verbose_step== 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)

    print("valid "+ description)
    image_preds_all = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    print('validation multi-class accuracy = {:.4f}'.format((image_preds_all==image_targets_all).mean()))


    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()


def inference_one_epoch(model, data_loader, device):
    model.eval()
    image_preds_all = []
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()

        image_preds = model(imgs)   #output = model(input)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]

    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all


# 学習

In [None]:
def train():
    train = train_df
    seed_everything(CFG['seed'])

    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train.label.values)    
    for fold, (trn_idx, val_idx) in enumerate(folds):
        """
        """
        
        if fold > 0:
            break
        print(f'Training with fold {fold} started (train:{len(trn_idx)}, val:{len(val_idx)})')

        train_loader, val_loader = prepare_dataloader(train, (CFG["img_size_h"], CFG["img_size_w"]), trn_idx, val_idx, train_bs=CFG["train_bs"], valid_bs=CFG["valid_bs"], num_workers=CFG["num_workers"] )

        device = torch.device(CFG['device'])

        model = FlowerImgClassifier(CFG['model_arch'], train.label.nunique(), pretrained=True).to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)

        loss_tr = nn.CrossEntropyLoss().to(device)
        loss_fn = nn.CrossEntropyLoss().to(device)

        for epoch in range(CFG['epochs']):
            train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, CFG['accum_iter'], CFG['verbose_step'],scheduler=scheduler, schd_batch_update=False)

            with torch.no_grad():
                valid_one_epoch(epoch, model, loss_fn, val_loader, device, CFG['accum_iter'], CFG['verbose_step'], scheduler=None, schd_loss_update=False)

            torch.save(model.state_dict(),f'save/{CFG["model_arch"]}_fold_{fold}_{epoch}')

        del model, optimizer, train_loader, val_loader,  scheduler
        torch.cuda.empty_cache()
        print("\n")


In [None]:
train()

# 推論&提出

In [None]:

def infer():
    print("pred start")
    train = train_df
    seed_everything(CFG['seed'])

    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train.label.values)


    tst_preds = []
    val_loss = []
    val_acc = []

    # 行数を揃えた空のデータフレームを作成
    cols = ['daisy',
            'dandelion',
            'rose',
            'sunflower',
            'tulip'
           ]

    for fold, (trn_idx, val_idx) in enumerate(folds):
        """
        """
        if fold > 0:
            break
        print(' fold {} started'.format(fold))
        input_shape=(CFG["img_size_h"], CFG["img_size_w"])

        valid_ = train.loc[val_idx,:].reset_index(drop=True)
        valid_ds = FlowerDataset(valid_, transforms=get_inference_transforms(input_shape), shape = input_shape, output_label=False)

        test_ds = FlowerDataset(test_df, transforms=get_inference_transforms(input_shape), shape=input_shape, output_label=False)


        val_loader = torch.utils.data.DataLoader(
            valid_ds,
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

        tst_loader = torch.utils.data.DataLoader(
            test_ds,
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

        device = torch.device(CFG['device'])
        model = FlowerImgClassifier(CFG['model_arch'], train.label.nunique()).to(device)

        val_preds = []

        #for epoch in range(CFG['epochs']-3):
        for i, epoch in enumerate(CFG['used_epochs']):
            model.load_state_dict(torch.load(f'save/{CFG["model_arch"]}_fold_{fold}_{epoch}'))

            with torch.no_grad():
                for _ in range(CFG['tta']):
                    val_preds += [CFG['weights'][i]/sum(CFG['weights'])*inference_one_epoch(model, val_loader, device)]
                    tst_preds += [CFG['weights'][i]/sum(CFG['weights'])*inference_one_epoch(model, tst_loader, device)]

        val_preds = np.mean(val_preds, axis=0)
        val_loss.append(log_loss(valid_.label.values, val_preds))
        val_acc.append((valid_.label.values == np.argmax(val_preds, axis=1)).mean())

    print('validation loss = {:.5f}'.format(np.mean(val_loss)))
    print('validation accuracy = {:.5f}'.format(np.mean(val_acc)))
    tst_preds = np.mean(tst_preds, axis=0)

    del model
    torch.cuda.empty_cache()
    return np.argmax(tst_preds, axis=1)


    

In [None]:
tst_preds_label_all = infer()
print(tst_preds_label_all.shape)    

In [None]:

# 予測結果を保存
sub = pd.read_csv("../input/flowers-recognition/sample_submission.csv")
sub['class'] = tst_preds_label_all
label_dic = {0:"daisy", 1:"dandelion", 2:"rose",3:"sunflower", 4:"tulip"}
sub["class"] = sub["class"].map(label_dic)
print(sub.value_counts("class"))
sub.to_csv(f'output/submission.csv', index=False)
