# config

## GPU

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
print(gpu_info)

## CFG

In [None]:
CONFIG_NAME = 'stacking12.yml'
debug = False

STAGE2_DIR = '../input/train-stacking-2dcnn-ver3/output'

In [None]:
# ====================================================
# CFG
# ====================================================
import yaml

CONFIG_PATH = f'{STAGE2_DIR}/{CONFIG_NAME}'
with open(CONFIG_PATH) as f:
    config = yaml.load(f)

INFO = config['info']
TAG = config['tag']
CFG = config['cfg']

OUTPUT_DIR = './'
DATA_PATH = '../input/cassava-leaf-disease-classification'

In [None]:
# # ====================================================
# # Directory settings
# # ====================================================
# import os
# import glob

# OUTPUT_DIR = './'
# NORMAL_MODEL_DIRS = ['../input/06t-efficientnet-b4-ns-512',
#                      '../input/12t-efficientnet-b5-cutout',
#                      '../input/14t-seresnext50']
# TTA_MODEL_DIRS = ['../input/20t-efficientnet-b3-cutmix-tta',
#                   '../input/22t-efficientnet-b4-cutmix-tta']

# MODEL_DIRS = NORMAL_MODEL_DIRS + TTA_MODEL_DIRS

# MODEL_WEIGHTS = [0.13412473, 0.18325853, 0.18344057, 0.30333854, 0.19583763]

# if not os.path.exists(OUTPUT_DIR):
#     os.makedirs(OUTPUT_DIR)

# TRAIN_PATH = '../input/cassava-leaf-disease-classification/train_images'
# TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

# normal_config_paths = []
# for model_dir in NORMAL_MODEL_DIRS:
#     assert len(glob.glob(f'{model_dir}/*.yml'))==1
#     normal_config_paths.append(glob.glob(f'{model_dir}/*.yml')[0])
    
# tta_config_paths = []
# for model_dir in TTA_MODEL_DIRS:
#     assert len(glob.glob(f'{model_dir}/*.yml'))==1
#     tta_config_paths.append(glob.glob(f'{model_dir}/*.yml')[0])

## Library

In [None]:
# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import datetime
import os
import math
import time
from typing import Any, List, Optional
import random
import glob
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import yaml

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, CenterCrop
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm

import warnings 
warnings.filterwarnings('ignore')

if CFG['debug']:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
    device = torch.device('cuda')

start_time = datetime.datetime.now()
start_time_str = start_time.strftime('%m%d%H%M')

## Directory settings

## Data Loading

In [None]:
train = pd.read_csv(f'{DATA_PATH}/train.csv')
test = pd.read_csv(f'{DATA_PATH}/sample_submission.csv')
label_map = pd.read_json(f'{DATA_PATH}/label_num_to_disease_map.json', 
                         orient='index')

if CFG['debug']:
    train = train.sample(n=1000, random_state=CFG['seed']).reset_index(drop=True)

## Load Outputs

In [None]:
import os
import glob

model_dirs = []
for stage1 in CFG['stage1_models']:
    num = str(stage1).rjust(2, '0')
    output_dir_ = glob.glob(f'../input/{num}*/')
    assert len(output_dir_) == 1, output_dir_
    model_dirs.append(output_dir_[0])
model_dirs

In [None]:
normal_configs = []
tta_configs = []
normal_model_dirs = []
tta_model_dirs = []

for model_dir in model_dirs:
    assert len(glob.glob(f'{model_dir}/*.yml'))==1
    config_path = glob.glob(f'{model_dir}/*.yml')[0]
    with open(config_path) as f:
        config = yaml.load(f)
    if 'valid_augmentation' in config['tag'].keys():
        tta_model_dirs.append(model_dir)
        tta_configs.append(config)
    else:
        normal_model_dirs.append(model_dir)
        normal_configs.append(config)

## Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)


def remove_glob(pathname, recursive=True):
    for p in glob.glob(pathname, recursive=recursive):
        if os.path.isfile(p):
            os.remove(p)

            
@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


# def init_logger(log_file=OUTPUT_DIR+'inference.log'):
#     from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
#     logger = getLogger(__name__)
#     logger.setLevel(INFO)
#     handler1 = StreamHandler()
#     handler1.setFormatter(Formatter("%(message)s"))
#     handler2 = FileHandler(filename=log_file)
#     handler2.setFormatter(Formatter("%(message)s"))
#     logger.addHandler(handler1)
#     logger.addHandler(handler2)
#     return logger

#LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# seed_torch(seed=CFG['seed'])

# stage1

## Dataset

In [None]:
TRAIN_PATH = '../input/cassava-leaf-disease-classification/train_images'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

In [None]:
# ====================================================
# Dataset
# ====================================================
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image
    
    
class TTADataset(Dataset):
    def __init__(self, df, image_path, ttas):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.image_path = image_path
        self.ttas = ttas

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{self.image_path}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        imglist=[tta(image=image)['image'] for tta in self.ttas]  # update

        image=torch.stack(imglist)
        label = torch.tensor(self.labels[idx]).long()
        
        return image, label

## transforms

In [None]:
def _get_augmentations(aug_list, cfg):
    process = []
    for aug in aug_list:
        if aug ==  'Resize':
            process.append(Resize(cfg['size'], cfg['size']))
        elif aug == 'RandomResizedCrop':
            process.append(RandomResizedCrop(cfg['size'], cfg['size']))
        elif aug == 'CenterCrop':
            process.append(CenterCrop(CFG['size'], CFG['size']))
        elif aug == 'Transpose':
            process.append(Transpose(p=0.5))
        elif aug == 'HorizontalFlip':
            process.append(HorizontalFlip(p=0.5))
        elif aug == 'VerticalFlip':
            process.append(VerticalFlip(p=0.5))
        elif aug == 'ShiftScaleRotate':
            process.append(ShiftScaleRotate(p=0.5))
        elif aug == 'Normalize':
            process.append(Normalize(
                            mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225],
                        ))
        else:
            raise ValueError(f'{aug} is not suitable')

    process.append(ToTensorV2())

    return process


# Transforms
# ====================================================
def get_transforms(*, aug_list, cfg):
    
    return Compose(
        _get_augmentations(aug_list, cfg)
    )

[TODO] 違うTTAに対応する

In [None]:
def get_ttas(cfg):
    norm_mean = [0.485, 0.456, 0.406]
    norm_std = [0.229, 0.224, 0.225]

    oneof_augs = [
        CenterCrop(cfg['size'], cfg['size']), 
        Resize(cfg['size'], cfg['size'])
    ]

    ttas = [[
        Compose([
            oneof_aug,
            Normalize(mean=norm_mean, std=norm_std, p=1.),
            ToTensorV2()
        ]),
        Compose([
            oneof_aug,
            Transpose(p=1),
            Normalize(mean=norm_mean, std=norm_std, p=1.),
            ToTensorV2()
        ])
    ] for oneof_aug in oneof_augs]

    # 平滑化
    ttas = sum(ttas, [])
    
    return ttas

## Model

In [None]:
# ====================================================
# MODEL
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, model_name, target_size, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        if hasattr(self.model, 'classifier'):
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, target_size)
        elif hasattr(self.model, 'fc'):
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, target_size)
        elif hasattr(self.model, 'head'):
            n_features = self.model.head.in_features
            self.model.head = nn.Linear(n_features, target_size)

    def forward(self, x):
        x = self.model(x)
        return x

## helper function

In [None]:
# ====================================================
# Helper functions
# ====================================================
# def inference_normal(model, states, test_loader, device):
#     model.to(device)
#     tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
#     probs = []
#     for i, (images) in tk0:
#         images = images.to(device)
#         avg_preds = []
#         for state in states:
#             model.load_state_dict(state['model'])
#             model.eval()
#             with torch.no_grad():
#                 y_preds = model(images)
#             avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
#         avg_preds = np.mean(avg_preds, axis=0)
#         probs.append(avg_preds)
#     probs = np.concatenate(probs)
#     return probs


def inference_tta(model, states, tta_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(tta_loader), total=len(tta_loader))
    probs = []
    for i, (images, _) in tk0:
        images = images.to(device)
        batch_size, n_crops, c, h, w = images.size()
        images = images.view(-1, c, h, w)
        
        avg_preds = []
        for state in states:
            model.load_state_dict(state['model'])
            model.eval()
            with torch.no_grad():
                y_preds = model(images).softmax(1)
                y_preds = y_preds.view(batch_size, n_crops,-1)
            avg_preds.append(y_preds.to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
        del images, _, y_preds, avg_preds
        torch.cuda.empty_cache()
    probs = np.concatenate(probs)
#     return probs.mean(1)
    return probs


## inference

In [None]:
# def main_normal(config, model_dir):
#     # ====================================================
#     # inference
#     # ====================================================
    
#     INFO = config['info']
#     TAG = config['tag']
#     CFG = config['cfg']
#     CFG['train'] = False
#     CFG['inference'] = True
#     inference_batch_size = 64
    
#     seed_torch(seed=CFG['seed'])

#     model = CustomModel(TAG['model_name'], CFG['target_size'], pretrained=False)
#     states = [torch.load(path) for path in glob.glob(f'{model_dir}/*.pth')]
#     test_dataset = TestDataset(test, transform=get_transforms(aug_list=['Resize', 'Normalize'], cfg=CFG))
#     test_loader = DataLoader(test_dataset, batch_size=inference_batch_size, shuffle=False, 
#                              num_workers=CFG['num_workers'], pin_memory=True)
#     predictions = inference_normal(model, states, test_loader, device)
    
#     return predictions



def main_tta(config, model_dir):
    # ====================================================
    # inference
    # ====================================================
    
    INFO = config['info']
    TAG = config['tag']
    CFG = config['cfg']
    CFG['train'] = False
    CFG['inference'] = True
    inference_batch_size = 8
    
    seed_torch(seed=CFG['seed'])

    model = CustomModel(TAG['model_name'], CFG['target_size'], pretrained=False)
    states = [torch.load(path) for path in glob.glob(f'{model_dir}/*.pth')]
    ttas = get_ttas(CFG)
    tta_dataset = TTADataset(test, TEST_PATH, ttas=ttas)
    tta_loader = DataLoader(tta_dataset, batch_size=inference_batch_size, shuffle=False, 
                             num_workers=2, pin_memory=True)
    predictions = inference_tta(model, states, tta_loader, device)
    
    return predictions

In [None]:
data_num = len(test)
model_num = len(model_dirs)
target_num = CFG['target_size']
channel_num = 4

# [Models, N, Labels]
# stage1_predictions = np.zeros((model_num, data_num, target_num), dtype=np.float)
# [Models, N, Channel, Labels]
stage1_predictions = np.zeros((model_num, data_num, channel_num, target_num), dtype=np.float)
# for config, model_dir in zip(normal_configs, normal_model_dirs):
#     stage1_predictions[model_dirs.index(model_dir)] = main_normal(config, model_dir)
for config, model_dir in zip(tta_configs, tta_model_dirs):
    stage1_predictions[model_dirs.index(model_dir)] = main_tta(config, model_dir)
    
    
# # [Models, N, Labels] -> [N, Models, Labels]
# stage1_predictions = stage1_predictions.transpose(1, 0, 2)
# # add Channel dim
# stage1_predictions = stage1_predictions.reshape(data_num, model_num, target_num, 1)

# [N, Models, Labels, Channel] -> [N, Channel, Models, Labels]
# stage1_predictions = stage1_predictions.transpose(0, 3, 1, 2)


# [Models, N, Channel, Labels] -> [N, Channel, Models, Labels]
stage1_predictions = stage1_predictions.transpose(1, 2, 0, 3)

# stage2

In [None]:
class StackingDataset(Dataset):
    def __init__(self, X: np.ndarray, y: Optional[np.ndarray] = None):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        if self.y is None:
            return torch.tensor(self.X[idx], dtype=torch.float)
        else:
            return (
                torch.tensor(self.X[idx], dtype=torch.float),
                torch.tensor(self.y[idx], dtype=torch.long),
            )

## model

In [None]:
class CNNStacking(nn.Module):
    def __init__(self, n_labels):
        super(CNNStacking, self).__init__()

        self.sq = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=8, kernel_size=(3, 1), bias=False),
            nn.ReLU(),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 1), bias=False),
            nn.ReLU(),
#             nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(2, 1), bias=False),
#             nn.ReLU(),
            # nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 1), bias=False),
            # nn.ReLU(),
            nn.Flatten(),
#             nn.Linear(in_features=32* n_labels, out_features=16 * n_labels),
#             nn.ReLU(),
            nn.Linear(in_features=16* n_labels, out_features=4 * n_labels),
            nn.ReLU(),
            nn.Linear(in_features=4 * n_labels, out_features=n_labels),
        )

    def forward(self, x):
        return self.sq(x)

## helper function

In [None]:
# ====================================================
# Helper functions
# ====================================================
def inference(model, states, test_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (features) in tk0:
        features = features.to(device)
        avg_preds = []
        for state in states:
            model.load_state_dict(state['model'])
#             model.load_state_dict(state)
            model.eval()
            with torch.no_grad():
                y_preds = model(features)
            avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [None]:
# inference
model = CNNStacking(CFG['target_size'])
states = [torch.load(STAGE2_DIR+f'/fold{fold}_best.pth') for fold in CFG['trn_fold']]
test_dataset = StackingDataset(stage1_predictions)
test_loader = DataLoader(test_dataset, batch_size=CFG['batch_size'], shuffle=False, 
                         num_workers=CFG['num_workers'], pin_memory=True)
predictions = inference(model, states, test_loader, device)
# submission
test['label'] = predictions.argmax(1)
test[['image_id', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [None]:
test