In [None]:
import sys
sys.path.append('../input/panda-cwd/')
sys.path.append('../input/pandamixupbaselineb0seutao/')

In [None]:
# drop efficientnet dependency
!echo "class EfficientNet: pass" > efficientnet_pytorch.py

In [None]:
import os
import time
from pathlib import Path
from tqdm import tqdm
from copy import copy, deepcopy
from pprint import pprint
import random

import skimage.io as io

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import torchvision.models as models
import torch.utils.data as D
from torchvision import transforms as T
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
try:
    from apex import amp
    USE_APEX = True
except:
    USE_APEX = False
    
from albumentations import *
from albumentations.pytorch import ToTensor, ToTensorV2
from kuma_utils.nn.training import TorchTrainer
from kuma_utils.nn.logger import Logger
from kuma_utils.nn.snapshot import *
from kuma_utils.metrics import *

# from configs import *
from panda_models import *
from transforms import *
from metrics import sigmoid, OptimizedRounder
from datasets import PandaDataset
from utils import analyse_results

In [None]:
import collections
from collections import defaultdict, Counter
from functools import partial
import math

########################################################################
############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
########################################################################
USE_PRETRAINED = False

# Parameters for the entire model (stem, all blocks, and head)
GlobalParams = collections.namedtuple('GlobalParams', [
    'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
    'num_classes', 'width_coefficient', 'depth_coefficient',
    'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])

# Parameters for an individual model block
BlockArgs = collections.namedtuple('BlockArgs', [
    'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
    'expand_ratio', 'id_skip', 'stride', 'se_ratio'])

# Change namedtuple defaults
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)

class SwishImplementation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))


class MemoryEfficientSwish(nn.Module):
    def forward(self, x):
        return SwishImplementation.apply(x)

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


def round_filters(filters, global_params):
    """ Calculate and round number of filters based on depth multiplier. """
    multiplier = global_params.width_coefficient
    if not multiplier:
        return filters
    divisor = global_params.depth_divisor
    min_depth = global_params.min_depth
    filters *= multiplier
    min_depth = min_depth or divisor
    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
        new_filters += divisor
    return int(new_filters)


def round_repeats(repeats, global_params):
    """ Round number of filters based on depth multiplier. """
    multiplier = global_params.depth_coefficient
    if not multiplier:
        return repeats
    return int(math.ceil(multiplier * repeats))


def drop_connect(inputs, p, training):
    """ Drop connect. """
    if not training: return inputs
    batch_size = inputs.shape[0]
    keep_prob = 1 - p
    random_tensor = keep_prob
    random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
    binary_tensor = torch.floor(random_tensor)
    output = inputs / keep_prob * binary_tensor
    return output


def get_same_padding_conv2d(image_size=None):
    """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
        Static padding is necessary for ONNX exporting of models. """
    if image_size is None:
        return Conv2dDynamicSamePadding
    else:
        return partial(Conv2dStaticSamePadding, image_size=image_size)


def get_width_and_height_from_size(x):
    """ Obtains width and height from a int or tuple """
    if isinstance(x, int): return x, x
    if isinstance(x, list) or isinstance(x, tuple): return x
    else: raise TypeError()


def calculate_output_image_size(input_image_size, stride):
    """ Calculates the output image size when using Conv2dSamePadding with a stride. 
        Necessary for static padding. Thanks to mannatsingh for pointing this out. """
    if input_image_size is None: return None
    image_height, image_width = get_width_and_height_from_size(input_image_size)
    stride = stride if isinstance(stride, int) else stride[0]
    image_height = int(math.ceil(image_height / stride))
    image_width = int(math.ceil(image_width / stride))
    return [image_height, image_width]


class Conv2dDynamicSamePadding(nn.Conv2d):
    """ 2D Convolutions like TensorFlow, for a dynamic image size """

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2

    def forward(self, x):
        ih, iw = x.size()[-2:]
        kh, kw = self.weight.size()[-2:]
        sh, sw = self.stride
        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        if pad_h > 0 or pad_w > 0:
            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)


class Conv2dStaticSamePadding(nn.Conv2d):
    """ 2D Convolutions like TensorFlow, for a fixed image size"""

    def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
        super().__init__(in_channels, out_channels, kernel_size, **kwargs)
        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2

        # Calculate padding based on image size and save it
        assert image_size is not None
        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
        kh, kw = self.weight.size()[-2:]
        sh, sw = self.stride
        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        if pad_h > 0 or pad_w > 0:
            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
        else:
            self.static_padding = Identity()

    def forward(self, x):
        x = self.static_padding(x)
        x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
        return x


class Identity(nn.Module):
    def __init__(self, ):
        super(Identity, self).__init__()

    def forward(self, input):
        return input

########################################################################
############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
########################################################################


def efficientnet_params(model_name):
    """ Map EfficientNet model name to parameter coefficients. """
    params_dict = {
        # Coefficients:   width,depth,res,dropout
        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
    }
    return params_dict[model_name]


class BlockDecoder(object):
    """ Block Decoder for readability, straight from the official TensorFlow repository """

    @staticmethod
    def _decode_block_string(block_string):
        """ Gets a block through a string notation of arguments. """
        assert isinstance(block_string, str)

        ops = block_string.split('_')
        options = {}
        for op in ops:
            splits = re.split(r'(\d.*)', op)
            if len(splits) >= 2:
                key, value = splits[:2]
                options[key] = value

        # Check stride
        assert (('s' in options and len(options['s']) == 1) or
                (len(options['s']) == 2 and options['s'][0] == options['s'][1]))

        return BlockArgs(
            kernel_size=int(options['k']),
            num_repeat=int(options['r']),
            input_filters=int(options['i']),
            output_filters=int(options['o']),
            expand_ratio=int(options['e']),
            id_skip=('noskip' not in block_string),
            se_ratio=float(options['se']) if 'se' in options else None,
            stride=[int(options['s'][0])])

    @staticmethod
    def _encode_block_string(block):
        """Encodes a block to a string."""
        args = [
            'r%d' % block.num_repeat,
            'k%d' % block.kernel_size,
            's%d%d' % (block.strides[0], block.strides[1]),
            'e%s' % block.expand_ratio,
            'i%d' % block.input_filters,
            'o%d' % block.output_filters
        ]
        if 0 < block.se_ratio <= 1:
            args.append('se%s' % block.se_ratio)
        if block.id_skip is False:
            args.append('noskip')
        return '_'.join(args)

    @staticmethod
    def decode(string_list):
        """
        Decodes a list of string notations to specify blocks inside the network.
        :param string_list: a list of strings, each string is a notation of block
        :return: a list of BlockArgs namedtuples of block args
        """
        assert isinstance(string_list, list)
        blocks_args = []
        for block_string in string_list:
            blocks_args.append(BlockDecoder._decode_block_string(block_string))
        return blocks_args

    @staticmethod
    def encode(blocks_args):
        """
        Encodes a list of BlockArgs to a list of strings.
        :param blocks_args: a list of BlockArgs namedtuples of block args
        :return: a list of strings, each string is a notation of block
        """
        block_strings = []
        for block in blocks_args:
            block_strings.append(BlockDecoder._encode_block_string(block))
        return block_strings


def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
                 drop_connect_rate=0.2, image_size=None, num_classes=1000):
    """ Creates a efficientnet model. """

    blocks_args = [
        'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
        'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
        'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
        'r1_k3_s11_e6_i192_o320_se0.25',
    ]
    blocks_args = BlockDecoder.decode(blocks_args)

    global_params = GlobalParams(
        batch_norm_momentum=0.99,
        batch_norm_epsilon=1e-3,
        dropout_rate=dropout_rate,
        drop_connect_rate=drop_connect_rate,
        # data_format='channels_last',  # removed, this is always true in PyTorch
        num_classes=num_classes,
        width_coefficient=width_coefficient,
        depth_coefficient=depth_coefficient,
        depth_divisor=8,
        min_depth=None,
        image_size=image_size,
    )

    return blocks_args, global_params


def get_model_params(model_name, override_params):
    """ Get the block args and global params for a given model """
    if model_name.startswith('efficientnet'):
        w, d, s, p = efficientnet_params(model_name)
        # note: all models have drop connect rate = 0.2
        blocks_args, global_params = efficientnet(
            width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
    else:
        raise NotImplementedError('model name is not pre-defined: %s' % model_name)
    if override_params:
        # ValueError will be raised here if override_params has fields not included in global_params.
        global_params = global_params._replace(**override_params)
    return blocks_args, global_params


url_map = {
    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
}


url_map_advprop = {
    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
    'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
}


def load_pretrained_weights(model, model_name, load_fc=True, advprop=False):
    """ Loads pretrained weights, and downloads if loading for the first time. """
    # AutoAugment or Advprop (different preprocessing)
    url_map_ = url_map_advprop if advprop else url_map
    state_dict = model_zoo.load_url(url_map_[model_name])
    if load_fc:
        model.load_state_dict(state_dict)
    else:
        state_dict.pop('_fc.weight')
        state_dict.pop('_fc.bias')
        res = model.load_state_dict(state_dict, strict=False)
        assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
    print('Loaded pretrained weights for {}'.format(model_name))
    
    
class MBConvBlock(nn.Module):
    """
    Mobile Inverted Residual Bottleneck Block
    Args:
        block_args (namedtuple): BlockArgs, see above
        global_params (namedtuple): GlobalParam, see above
    Attributes:
        has_se (bool): Whether the block contains a Squeeze and Excitation layer.
    """

    def __init__(self, block_args, global_params, image_size=None):
        super().__init__()
        self._block_args = block_args
        self._bn_mom = 1 - global_params.batch_norm_momentum
        self._bn_eps = global_params.batch_norm_epsilon
        self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
        self.id_skip = block_args.id_skip  # skip connection and drop connect


        # Expansion phase
        inp = self._block_args.input_filters  # number of input channels
        oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
        if self._block_args.expand_ratio != 1:
            Conv2d = get_same_padding_conv2d(image_size=image_size)
            self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
            self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
            # image_size = calculate_output_image_size(image_size, 1) <-- this would do nothing
        
        # Depthwise convolution phase
        k = self._block_args.kernel_size
        s = self._block_args.stride
        Conv2d = get_same_padding_conv2d(image_size=image_size)
        self._depthwise_conv = Conv2d(
            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
            kernel_size=k, stride=s, bias=False)
        self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
        image_size = calculate_output_image_size(image_size, s)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            Conv2d = get_same_padding_conv2d(image_size=(1,1))
            num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
            self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
            self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)

        # Output phase
        final_oup = self._block_args.output_filters
        Conv2d = get_same_padding_conv2d(image_size=image_size)
        self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
        self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
        self._swish = MemoryEfficientSwish()

    def forward(self, inputs, drop_connect_rate=None):
        """
        :param inputs: input tensor
        :param drop_connect_rate: drop connect rate (float, between 0 and 1)
        :return: output of block
        """

        # Expansion and Depthwise Convolution
        x = inputs
        if self._block_args.expand_ratio != 1:
            x = self._swish(self._bn0(self._expand_conv(inputs)))
        x = self._swish(self._bn1(self._depthwise_conv(x)))

        # Squeeze and Excitation
        if self.has_se:
            x_squeezed = F.adaptive_avg_pool2d(x, 1)
            x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
            x = torch.sigmoid(x_squeezed) * x

        x = self._bn2(self._project_conv(x))

        # Skip connection and drop connect
        input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
        if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
            if drop_connect_rate:
                x = drop_connect(x, p=drop_connect_rate, training=self.training)
            x = x + inputs  # skip connection
        return x

    def set_swish(self, memory_efficient=True):
        """Sets swish function as memory efficient (for training) or standard (for export)"""
        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()


class EfficientNet(nn.Module):
    """
    An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
    Args:
        blocks_args (list): A list of BlockArgs to construct blocks
        global_params (namedtuple): A set of GlobalParams shared between blocks
    Example:
        model = EfficientNet.from_pretrained('efficientnet-b0')
    """

    def __init__(self, blocks_args=None, global_params=None):
        super().__init__()
        assert isinstance(blocks_args, list), 'blocks_args should be a list'
        assert len(blocks_args) > 0, 'block args must be greater than 0'
        self._global_params = global_params
        self._blocks_args = blocks_args

        # Batch norm parameters
        bn_mom = 1 - self._global_params.batch_norm_momentum
        bn_eps = self._global_params.batch_norm_epsilon

        # Get stem static or dynamic convolution depending on image size
        image_size = global_params.image_size
        Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)

        # Stem
        in_channels = 3  # rgb
        out_channels = round_filters(32, self._global_params)  # number of output channels
        self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
        self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
        image_size = calculate_output_image_size(image_size, 2)

        # Build blocks
        self._blocks = nn.ModuleList([])
        for block_args in self._blocks_args:

            # Update block input and output filters based on depth multiplier.
            block_args = block_args._replace(
                input_filters=round_filters(block_args.input_filters, self._global_params),
                output_filters=round_filters(block_args.output_filters, self._global_params),
                num_repeat=round_repeats(block_args.num_repeat, self._global_params)
            )

            # The first block needs to take care of stride and filter size increase.
            self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
            image_size = calculate_output_image_size(image_size, block_args.stride)
            if block_args.num_repeat > 1:
                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
            for _ in range(block_args.num_repeat - 1):
                self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
                # image_size = calculate_output_image_size(image_size, block_args.stride)  # ?

        # Head
        in_channels = block_args.output_filters  # output of final block
        out_channels = round_filters(1280, self._global_params)
        Conv2d = get_same_padding_conv2d(image_size=image_size)
        self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)

        # Final linear layer
        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
        self._dropout = nn.Dropout(self._global_params.dropout_rate)
        self._fc = nn.Linear(out_channels, self._global_params.num_classes)
        self._swish = MemoryEfficientSwish()

    def set_swish(self, memory_efficient=True):
        """Sets swish function as memory efficient (for training) or standard (for export)"""
        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
        for block in self._blocks:
            block.set_swish(memory_efficient)


    def extract_features(self, inputs):
        """ Returns output of the final convolution layer """

        # Stem
        x = self._swish(self._bn0(self._conv_stem(inputs)))

        # Blocks
        for idx, block in enumerate(self._blocks):
            drop_connect_rate = self._global_params.drop_connect_rate
            if drop_connect_rate:
                drop_connect_rate *= float(idx) / len(self._blocks)
            x = block(x, drop_connect_rate=drop_connect_rate)

        # Head
        x = self._swish(self._bn1(self._conv_head(x)))

        return x

    def forward(self, inputs):
        """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
        bs = inputs.size(0)
        # Convolution layers
        x = self.extract_features(inputs)

        # Pooling and final linear layer
        x = self._avg_pooling(x)
        x = x.view(bs, -1)
        x = self._dropout(x)
        x = self._fc(x)
        return x

    @classmethod
    def from_name(cls, model_name, override_params=None):
        cls._check_model_name_is_valid(model_name)
        blocks_args, global_params = get_model_params(model_name, override_params)
        return cls(blocks_args, global_params)

    @classmethod
    def from_pretrained(cls, model_name, advprop=False, num_classes=1000, in_channels=3):
        model = cls.from_name(model_name, override_params={'num_classes': num_classes})
        load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000), advprop=advprop)
        model._change_in_channels(in_channels)
        return model
    
    @classmethod
    def get_image_size(cls, model_name):
        cls._check_model_name_is_valid(model_name)
        _, _, res, _ = efficientnet_params(model_name)
        return res

    @classmethod
    def _check_model_name_is_valid(cls, model_name):
        """ Validates model name. """ 
        valid_models = ['efficientnet-b'+str(i) for i in range(9)]
        if model_name not in valid_models:
            raise ValueError('model_name should be one of: ' + ', '.join(valid_models))

    def _change_in_channels(model, in_channels):
        if in_channels != 3:
            Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size)
            out_channels = round_filters(32, model._global_params)
            model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)

            
class FeatureEfficientNet(EfficientNet):

    def forward(self, inputs):
        """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
        bs = inputs.size(0)
        # Convolution layers
        x = self.extract_features(inputs)
        return x
    
    
import skimage.io
class PandaDataset(D.Dataset):
    def __init__(self, images, labels, insts=None, img_size=2, transform=None, bin_label=False,
                 root_path='', istest=False, return_index=True,
                 use_cache=False, mixup=False, mixup_alpha=1.0, separate_image=False, cat_insts=False):
        assert len(images) == len(labels)
        self.images = images
        self.labels = labels
        self.insts = insts
        self.img_size = img_size
        self.transform = transform
        self.bin_label = bin_label
        self.root = root_path
        self.istest = istest
        self.return_index = return_index
        self.use_cache = use_cache
        self.cache = {}
        self.mixup = mixup
        self.mixup_alpha = mixup_alpha
        self.separate_image = separate_image
        self.cat_insts = cat_insts

    def __len__(self):
        return len(self.images)

    def _load_data(self, idx):
        cache_loaded = False
        if idx in self.cache.keys():
            image = self.cache[idx]
            cache_loaded = True
        else:
            if self.istest:
                fpath = str(self.root/'test_images'/f'{self.images[idx]}.tiff')
            else:
                fpath = str(self.root/'train_images'/f'{self.images[idx]}.tiff')
            image = skimage.io.MultiImage(fpath)[self.img_size]
            if self.use_cache and not self.separate_image:
                self.cache[idx] = image

        if self.transform:
            if self.separate_image:
                if cache_loaded:
                    pass
                else:
                    assert 'tile' in self.transform.keys()
                    image = self.transform['tile'](image=image)['image']  # N x 3 x W x H
                    if self.use_cache:
                        self.cache[idx] = image
                output = []
                for tile in image:
                    output.append(self.transform['augmentation'](image=tile)['image'])
                output = torch.stack(output)
            else:
                output = self.transform(image=image)['image']

        label = self.labels[idx]

        if self.insts is not None and self.cat_insts:
            output = torch.flatten(output)
            insts = self.insts[idx]
            if insts == 'karolinska':
                insts = torch.tensor([0.0])
            elif insts == 'radboud':
                insts = torch.tensor([1.0])
            output = torch.cat((output, insts))
            
        return output, label

    def __getitem__(self, idx):
        image, label = self._load_data(idx)

        if self.mixup:
            idx2 = np.random.randint(0, len(self.images))
            lam = np.random.beta(self.mixup_alpha, self.mixup_alpha)
            image2, label2 = self._load_data(idx2)
            # image = lam * image + (1 - lam) * image2
            image = torch.cat([torch.Tensor([lam]), image.view(-1), image2.view(-1)])
            label = lam * label + (1 - lam) * label2

        if self.bin_label:  # 2: [1, 1, 0, 0, 0] / 3: [1, 1, 1, 0, 0]
            if self.mixup:
                label_dec = label - label_int
                label2 = torch.zeros(5)
                label2[:label_int] = 1.0
                if label_int < 5:
                    label2[label_int] = label_dec
            else: 
                label2 = torch.zeros(5)
                label2[:label] = 1
        else:
            label2 = label

        if self.return_index:
            return image, label2, idx
        else:
            return image, label2

In [None]:
def seed_everything(seed=2020):
    #print(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)


def print_config(cfg):
    items = [
        'name', 
        # general
        'patch_size', 'patch_dim',
        'resume', 'img_size', 'batch_size', 'lr', 'epochs', 'CV', 'seed',
        # dataset
        'use_cache', 'separate_image', 'return_index', 'bin_label', 'mixup',
        # 
        'model', 'criterion', 'metric', 'log_metrics', 'stopper', 'event', 'transform',
    ]
    print(f'\n----- Config -----')
    for key in items:
        try:
            value = eval(f'cfg.{key}')
            print(f'{key}: {value}')
        except:
            print(f'{key}: ERROR')
    print(f'----- Config -----\n')

In [None]:
import torch
import os
import cv2
import numpy as np

from torch.utils.data import Dataset, DataLoader, Subset

MAX_GRIDS = 200


# full_folder = r'../input/prostate-cancer-grade-assessment/train_images'
# full_folder_png = r'./png'
# if os.path.exists(full_folder_png):
#     os.makedirs(full_folder_png)

### Task-specific utilities
def encode_gleason(gleason):
    if gleason == 'negative':
        a = b = 0
    else:
        a, b = gleason.split('+')
        a, b = int(a), int(b)
        if a >= 3:
            a -= 1
        else:
            a = 1
        if b >= 3:
            b -= 1
        else:
            b = 1
    a = [1 for _ in range(a)] + [0 for _ in range(4 - a)]
    b = [1 for _ in range(b)] + [0 for _ in range(4 - b)]
    
    return a + b

def Rotate(img, degrees=45):
 
    (h, w) = img.shape[:2]
    M = cv2.getRotationMatrix2D((w // 2, h // 2), degrees, 1.0)

    M_ = M[:, :2].T
    corners = np.array([[w // 2, w // 2], [h // 2, -h // 2]])
    new_corners = M_ @ corners
    del_y = max(np.abs(new_corners[0])) - w // 2
    del_x = max(np.abs(new_corners[1])) - h // 2
    pad_x, pad_y = int(max(del_x, 0)), int(max(del_y, 0))
    img = np.pad(img, ((pad_x, pad_x), (pad_y, pad_y), (0, 0)), constant_values=255)

    (h, w) = img.shape[:2]
    M = cv2.getRotationMatrix2D((w // 2, h // 2), degrees, 1.0)
    rotated = cv2.warpAffine(img, M, (w, h), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT,
                             borderValue=(255, 255, 255))
    return rotated



def get_img(png_path, mode = 0, tile_size = 224, patch_dim = 8):
  
    
    if '.png' in png_path:
        img_ = cv2.imread(png_path)
    else:
        img_ = io.MultiImage(png_path)[1]
        
    img = cv2.resize(img_, (img_.shape[1], img_.shape[0]), cv2.INTER_AREA)

    mean = 1 - torch.tensor([.485, .456, .406]).float().unsqueeze(1).unsqueeze(1).unsqueeze(0)
    std = torch.tensor([.229, .224, .225]).float().unsqueeze(1).unsqueeze(1).unsqueeze(0)

  
    if mode % 2 == 1:
        img = np.pad(img, ((tile_size // 2, tile_size // 2),
                           (tile_size // 2, tile_size // 2),
                           (0, 0)), constant_values=255)

    degrees = (mode % 3) * 30
    img = Rotate(img, degrees)

    size = tile_size

    if img.shape[0] % size == 0:
        pad_h = 0
    else:
        pad_h = (img.shape[0] // size + 1) * size - img.shape[0]

    if img.shape[1] % size == 0:
        pad_w = 0
    else:
        pad_w = (img.shape[1] // size + 1) * size - img.shape[1]

    img = np.pad(img, ((pad_h, 0), (pad_w, 0), (0, 0)), constant_values=255)

    h_, w_ = img.shape[0] // size, img.shape[1] // size
    imgs = img.reshape(h_, size, w_, size, 3).transpose(0, 2, 1, 3, 4).reshape(-1, size, size, 3)
    brightness = imgs.reshape(imgs.shape[0], -1).mean(1)
    indices = np.argsort(brightness)
    num_valid = np.array(brightness < 250).astype(np.uint8).sum()
    indices = indices[:num_valid]
    img = np.concatenate(imgs[indices], axis=1)

    imgs = np.split(img, img.shape[1] // tile_size, axis=1)
    drop = (np.random.rand(len(imgs)) < 0.0).astype(np.float32)
    brightness = np.array([tile.mean() for tile in imgs])
    brightness = drop * 255 + (1 - drop) * brightness
    indices = np.argsort(brightness)[:patch_dim * patch_dim]
    imgs = [imgs[i].astype(np.uint8) for i in indices]

    if len(imgs) < patch_dim * patch_dim:
        deficit = patch_dim * patch_dim - len(imgs)
        imgs = np.concatenate([imgs + [np.ones([tile_size , tile_size, 3]) * 255 for _ in range(deficit)]], axis=1)

    img = 255 - np.stack([img for img in imgs], axis=0)
    img = torch.from_numpy(img).permute(0, 3, 1, 2).float() / 255.
    img = (img - mean) / std
    return img

# class PandaDataset_valid(Dataset):
#     def __init__(self, df):
#         self.names = df.image_id.values
#         self.labels = df.isup_grade.values
#         self.df = df

#     def __getitem__(self, idx):
#         try:
#             img = get_img(os.path.join(full_folder, self.names[idx] + '.tiff'))
            
#             label = self.labels[idx]
#             isup = [1 for _ in range(label)] + [0 for _ in range(5 - label)]
#             gleason = encode_gleason(self.df.gleason_score.values[idx])
#             label = torch.tensor(isup + gleason)

#             return img , label
#         except:
# #           print(idx, 'error')
#             new_idx = np.random.randint(len(self.labels))
#             return self.__getitem__(new_idx)

#     def __len__(self):
#         return len(self.labels)
    
class PandaDataset_Inference(Dataset):
    def __init__(self, df, tta_mode, folder,tile_size = 224, patch_dim = 8):
        self.names = df.image_id.values
        self.df = df
        self.tta_mode = tta_mode 
        self.folder = folder
        self.tile_size = tile_size
        self.patch_dim = patch_dim

    def __getitem__(self, idx):
#         png = os.path.join(full_folder_png, self.names[idx] + '.png')

        tta_mode = random.randint(0,7)
        img = get_img(os.path.join(self.folder, self.names[idx] + '.tiff'), tta_mode, self.tile_size, self.patch_dim)

        return img,img

    def __len__(self):
        return len(self.names)
    
# img = get_img(os.path.join(full_folder, '12625a6ae522d7d2168049db06b4a86d.tiff'))
# print(img.shape)

# Config

In [None]:
# dirty code for to_mish, only for b0
from utils_enet import Swish, MemoryEfficientSwish
from activation import Mish
from activation import Swish as Swish_timm

def to_mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, (nn.ReLU, Swish, MemoryEfficientSwish, Swish_timm)) or child_name == '_swish':
            print(child_name)
            setattr(model, child_name, Mish())
        else:
            to_mish(child)

In [None]:
class Classification:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 8

    # Dataset
    separate_image = True
    return_index = False
    bin_label = False
    cat_insts = False
    
    ### Model: Base
    model = PatchPoolModel2(
        base_model = senet_mod(se_resnext50_32x4d, pretrained=USE_PRETRAINED),
#         base_model = FeatureEfficientNet.from_name('efficientnet-b2'),
#         base_model = xception_mod(in_channel=3, num_classes=1000, pretrained=USE_PRETRAINED),
#         base_model = densenet_mod(torchvision.models.densenet121, pretrained=USE_PRETRAINED),
        patch_total=patch_dim**2, num_classes=6
    )
    
    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
        ### Transform: Concated image
#         'test': Compose([
#             HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
#             MakePatches(patch_size, patch_dim,
#                         criterion='blue_ratio', always_apply=True),
#             Normalize([0.910, 0.819, 0.878],
#                       [0.363, 0.499, 0.404], always_apply=True),
#             ToTensor()
#         ]),
    }
    

class Classification2:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 8

    # Dataset
    separate_image = True
    return_index = False
    bin_label = False
    cat_insts = False
    
    ### Model: Base
    model = PatchPoolModel2(
        base_model = xception_mod(in_channel=3, num_classes=1000, pretrained=USE_PRETRAINED),
        patch_total=patch_dim**2, num_classes=6
    )
    
    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }


class Regression:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 12

    # Dataset
    separate_image = True
    return_index = False
    bin_label = False
    cat_insts = False
    
    ### Model: Base
    model = PatchPoolModel2(
        base_model=senet_mod(se_resnext50_32x4d, pretrained=USE_PRETRAINED),
        patch_total=patch_dim**2, num_classes=1
    )
    
    transform = {
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }
    
    
class OrdinalRegressionBear:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 6

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = PatchPoolModel2(
        base_model = senet_mod(se_resnext50_32x4d, pretrained=USE_PRETRAINED),
        patch_total=patch_dim**2, num_classes=5
    )

    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }
    
class OrdinalRegressionBear101:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 6

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = PatchPoolModel2(
        base_model = senet_mod(se_resnext101_32x4d, pretrained=USE_PRETRAINED),
        patch_total=patch_dim**2, num_classes=5
    )

    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }

class OrdinalRegression0:
    
    # General
    img_size = 1
    patch_size = 256
    patch_dim = 6
    batch_size = 6

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = PatchPoolModel2(
        base_model=FeatureEfficientNet.from_name('efficientnet-b0'),
        patch_total=patch_dim**2, num_classes=5
    )
    
    to_mish(model)

    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }
    
class OrdinalRegression1:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 8
    batch_size = 6

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = PatchPoolModel2(
        base_model=FeatureEfficientNet.from_name('efficientnet-b0'),
        patch_total=patch_dim**2, num_classes=5
    )
    
    to_mish(model)

    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }
    
class OrdinalRegression2:
    
    # General
    img_size = 1
    patch_size = 192
    patch_dim = 8
    batch_size = 6

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = PatchPoolModel2(
        base_model=FeatureEfficientNet.from_name('efficientnet-b0'),
        patch_total=patch_dim**2, num_classes=5
    )
    
    to_mish(model)

    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }
    

class MixOrdinalRegression:
    
    # General
    img_size = 1
    patch_size = 224
    patch_dim = 7
    batch_size = 8

    # Dataset
    separate_image = True
    return_index = False
    bin_label = True
    cat_insts = False
    
    ### Model: Bin label
    model = MixPatchPoolModel(
        # base_model=senet_mod(se_resnext50_32x4d, pretrained=USE_PRETRAINED),
        # base_model=resnest_mod(torchvision.models.resnet34, pretrained=USE_PRETRAINED),
        base_model=FeatureEfficientNet.from_name('efficientnet-b0'),
        patch_size=patch_size, patch_total=patch_dim**2, num_classes=5,
    )
    
    transform = {
        ### Transform: Base
        'test': {
            'tile': Compose([
                MakePatches(patch_size, patch_dim, concat=False, always_apply=True)
            ]),
            'augmentation': Compose([
                ShiftScaleRotate(scale_limit=0.0625, rotate_limit=15, p=0.5),
                HorizontalFlip(p=0.5), VerticalFlip(p=0.5),
                Normalize([0.910, 0.819, 0.878],
                          [0.363, 0.499, 0.404], always_apply=True),
                ToTensor()
            ])
        },
    }



In [None]:
# submission_setting = [
#      {
#         'config': OrdinalRegressionBear,
#         'snapshot_path': [
#               '../input/pandabest-model-reproduction/fold0.pt',
#              '../input/pandabest-model-reproduction/fold1.pt',
#              '../input/pandabest-model-reproduction/fold2.pt',
#              '../input/pandabest-model-reproduction/fold3.pt',
#              '../input/pandabest-model-reproduction/fold4.pt',
            
#             ],
#         'oof_path':  [
#             '../input/pandabest-model-reproduction/oof0.npy',
#             '../input/pandabest-model-reproduction/oof1.npy',
#             '../input/pandabest-model-reproduction/oof2.npy',
#             '../input/pandabest-model-reproduction/oof3.npy',
#             '../input/pandabest-model-reproduction/oof4.npy',
#         ],
#     }
#     ,
    
#     {
#         'config': OrdinalRegressionBear,
#         'snapshot_path': [
#             '../input/panda-ckpts-seutao/fold0.pt',
#             '../input/panda-ckpts-seutao/fold1.pt',
#             '../input/panda-ckpts-seutao/fold2.pt',
#             '../input/panda-ckpts-seutao/fold3.pt',
#             '../input/panda-ckpts-seutao/fold4.pt',
            
#             ],
#         'oof_path':  [
#             '../input/panda-ckpts-seutao/oof0.npy',
#             '../input/panda-ckpts-seutao/oof1.npy',
#             '../input/panda-ckpts-seutao/oof2.npy',
#             '../input/panda-ckpts-seutao/oof3.npy',
#             '../input/panda-ckpts-seutao/oof4.npy',
#         ],
#     }
#     ,
#         {
#         'config': OrdinalRegressionBear101,
#         'snapshot_path': [
#             '../input/panda-ckpts-seutao/se101_fold0.pt',
#             '../input/panda-ckpts-seutao/se101_fold1.pt',
#             '../input/panda-ckpts-seutao/se101_fold2.pt',
#             '../input/panda-ckpts-seutao/se101_fold3.pt',
#             '../input/panda-ckpts-seutao/se101_fold4.pt',
            
#             ],
#         'oof_path':  [
#             '../input/panda-ckpts-seutao/se101_oof.npy',
#         ],
#     }
# ]

submission_setting_ = [
#     {
#         'config': OrdinalRegression0,
#         'snapshot_path': [
#             '../input/pandamixupbaselineb0seutao/mixup_baseline/fold_0_45.pt',
#             '../input/pandamixupbaselineb0seutao/mixup_baseline/fold_1_38.pt',
#             '../input/pandamixupbaselineb0seutao/mixup_baseline/fold_2_49.pt',
#             '../input/pandamixupbaselineb0seutao/mixup_baseline/fold_3_48.pt',
#             '../input/pandamixupbaselineb0seutao/mixup_baseline/fold_4_46.pt'
#             ],
#         'oof_path':  [
#             '../input/pandamixupbaselineb0seutao/fold_0_45_oof.npy',
#             '../input/pandamixupbaselineb0seutao/fold_1_38_oof.npy',
#             '../input/pandamixupbaselineb0seutao/fold_2_49_oof.npy',
#             '../input/pandamixupbaselineb0seutao/fold_3_48_oof.npy',
#             '../input/pandamixupbaselineb0seutao/fold_4_46_oof.npy',
#         ],
#     },
    
        {
        'config': OrdinalRegression1,
        'snapshot_path': [
            '../input/bigalphamixup/fold_0_46.pt',
            '../input/bigalphamixup/fold_1_45.pt',
            '../input/bigalphamixup/fold_2_40.pt',
            '../input/bigalphamixup/fold_3_45.pt',
            '../input/bigalphamixup/fold_4_40.pt'
            ],
        'oof_path':  [
            '../input/bigalphamixup/mixup_large_alpha_oof_224_-1.npy',
        ],
        },
    
]

In [None]:
INPUT_PATH = Path('../input/prostate-cancer-grade-assessment/')
USE_PRETRAINED = False

# Inference


In [None]:
import  random

def simple_inference_(snapshots, df, cfg, tta, folder):
    
    print(cfg.patch_size, cfg.patch_dim)
    images = df.image_id.values
    insts = df.data_provider.values
    labels = np.zeros(len(df), dtype=int)
    
    predictions = np.zeros((tta, len(snapshots), len(df)), dtype=np.float16)
    model = deepcopy(cfg.model)
    model.to(device)

    ds = PandaDataset_Inference(df, None, folder, cfg.patch_size, cfg.patch_dim)
    loader = D.DataLoader(ds, batch_size=cfg.batch_size, shuffle=False, num_workers=2)
    
    with torch.no_grad():
        for tta_i, tta_fold in enumerate(range(tta)):
            
            pred_fold = []    
            for idx, (inputs, _) in enumerate(tqdm(loader, total=len(loader), desc=f'tta{tta_i}')):
                pred_batch = np.zeros((len(snapshots), len(inputs)), dtype=np.float16)
                
                for fold_i, fold_snapshot in enumerate(snapshots):
                    model.load_state_dict(torch.jit.load(fold_snapshot, map_location='cpu').state_dict())
                    model.eval()
                    inputs = inputs.to(device)
                    outputs = model(inputs)
                    if outputs.shape[1] == 6: # Classification
                        outputs = F.softmax(outputs, dim=1).cpu().detach().numpy()
                        outputs = np.dot(outputs, np.arange(6))
                    elif outputs.shape[1] == 7: # DACClassification
                        outputs = F.softmax(outputs[:, :6], dim=1).cpu().detach().numpy()
                        outputs = np.dot(outputs, np.arange(6))
                    elif outputs.shape[1] == 5: # OrdinalRegression
                        outputs = outputs.sigmoid().sum(1).cpu().detach().numpy()
                    elif outputs.shape[1] == 1: # Regression
                        outputs = outputs.reshape(-1).cpu().detach().numpy()
                    pred_batch[fold_i] = outputs
                pred_fold.append(pred_batch)
                
            predictions[tta_i] = np.concatenate(pred_fold, axis=1)
                
    return predictions

def simple_inference(snapshots, df, cfg, tta=1, folder=None):
    images = df.image_id.values
    insts = df.data_provider.values
    labels = np.zeros(len(df), dtype=int)
    ds = PandaDataset(
        images=images, labels=labels, insts=insts,
        img_size=cfg.img_size, transform=cfg.transform['test'],
        return_index=cfg.return_index, bin_label=cfg.bin_label,
        separate_image=cfg.separate_image, cat_insts=cfg.cat_insts,
        root_path=INPUT_PATH, istest=IS_TEST)
    loader = D.DataLoader(ds, batch_size=cfg.batch_size, shuffle=False, num_workers=2)
    predictions = np.zeros((tta, len(snapshots), len(df)), dtype=np.float16)
    model = deepcopy(cfg.model)
    model.to(device)
    
    with torch.no_grad():
        for tta_i, tta_fold in enumerate(range(tta)):
            pred_fold = []    
            for idx, (inputs, _) in enumerate(tqdm(loader, total=len(loader), desc=f'tta{tta_i}')):
                pred_batch = np.zeros((len(snapshots), len(inputs)), dtype=np.float16)
                for fold_i, fold_snapshot in enumerate(snapshots):
                    load_snapshots_to_model(fold_snapshot, model=model)
                    model.eval()
                    inputs = inputs.to(device)
                    outputs = model(inputs)
                    if outputs.shape[1] == 6: # Classification
                        outputs = F.softmax(outputs, dim=1).cpu().detach().numpy()
                        outputs = np.dot(outputs, np.arange(6))
                    elif outputs.shape[1] == 7: # DACClassification
                        outputs = F.softmax(outputs[:, :6], dim=1).cpu().detach().numpy()
                        outputs = np.dot(outputs, np.arange(6))
                    elif outputs.shape[1] == 5: # OrdinalRegression
                        outputs = outputs.sigmoid().sum(1).cpu().detach().numpy()
                    elif outputs.shape[1] == 1: # Regression
                        outputs = outputs.reshape(-1).cpu().detach().numpy()
                    pred_batch[fold_i] = outputs
                pred_fold.append(pred_batch)
            predictions[tta_i] = np.concatenate(pred_fold, axis=1)
                
    return predictions
    
 

In [None]:
train_df = pd.read_csv(INPUT_PATH/'train.csv')
test_df = pd.read_csv(INPUT_PATH/'test.csv')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
TTA = 2

if (INPUT_PATH/'test_images').exists():
    ### Inference
    print('Inference')
    target = test_df
    IS_TEST = True
    folder = INPUT_PATH/'test_images'
else:
    print('Debug')
    target = train_df.head(10)
    IS_TEST = False
    TTA = 1
    folder = INPUT_PATH/'train_images'

seed_everything(2020)
target.head()

# all_predictions = []
# for c in submission_setting:
#     all_predictions.append(simple_inference(c['snapshot_path'], target, c['config'], TTA, folder))
# all_predictions = np.concatenate(all_predictions)

all_predictions_ = []
for c in submission_setting_:
    all_predictions_.append(simple_inference_(c['snapshot_path'], target, c['config'], TTA, folder))
all_predictions = np.concatenate(all_predictions_)

print(all_predictions.shape)
# print(all_predictions_.shape)
# all_predictions = np.concatenate([all_predictions, all_predictions_])

In [None]:
print(all_predictions.shape)

In [None]:
# submission_setting

In [None]:
# for cfg in submission_setting:
#     print_config(cfg['config'])

In [None]:
import scipy.stats as stats

def logits_to_predictions(pred):
    return np.dot(F.softmax(torch.Tensor(pred), 1).numpy(), np.arange(6))

def soft_vote(pred):
    return np.clip(np.mean(pred, axis=0).round(), 0, 5).astype(int)

def hard_vote(pred):
    return np.clip(stats.mode(pred.round().astype(int), axis=0)[0][0], 0, 5).astype(int)

In [None]:
predictions = np.zeros(len(target), dtype=int)
print(all_predictions)
predictions = soft_vote(all_predictions.mean(1))

In [None]:
labels_ = target.isup_grade.values
qwk = QWK(6)
print(qwk(labels_, predictions))
print(predictions)

# Optimize threshold for QWK

In [None]:
from scipy.stats import mode
OPTIMIZE_QWK = True
HARD_VOTE = False
limit_loss = 1 - 1/12

mean_predictions = all_predictions.mean(1)
hard_predictions = np.zeros_like(mean_predictions)
print(mean_predictions.shape)
print(hard_predictions.shape)


if OPTIMIZE_QWK:

    oof_all_list = []
    for ic, c in enumerate(submission_setting_):
        oof_list = []
        print('============================================')
        for npy in c['oof_path']:
            print(npy)
            oof = np.load(npy)
            oof_list.append(oof)
        oof_all_list.append(np.sum(oof_list, axis=0))
    
    all_coefs = []
    for i_oof, oof_fold in enumerate(oof_all_list):
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            print(len(oof_all_list))

            mask_fold = oof_fold.sum(axis=1) != 0
            print(mask_fold.shape)
            print(np.sum(mask_fold))
            output_shape = oof_fold.shape
            if output_shape[0] == 9555:
                _train = pd.read_csv('../input/mixup-baseline-b0-224-8-train2/train2.csv')
                _train = _train.query('certain_flag2 == 1')
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
            elif output_shape[0] == 8493:
                _train = pd.read_csv('../input/panda-seed-gacha/gatcha_train_nr20.csv')
                _train = _train.query('certain_flag2 == 1')
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
            elif output_shape[0] == 10086:
                _train = pd.read_csv('../input/panda-seed-gacha/gatcha_train_nr5.csv')
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
            elif output_shape[0] == 9024:
                _train = pd.read_csv('../input/panda-seed-gacha/gatcha_train_nr15.csv')
                _train = _train.query('certain_flag2 == 1')
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
                labels = df.isup_grade.values
                insts = df.data_provider.values
            elif output_shape[0] == 10615:
                _train = pd.read_csv('../input/panda-seed-gacha/train.csv')
                _train = _train[_train.image_id.values != '3790f55cad63053e956fb73027179707'].reset_index(drop=True)
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
            else:
                _train = pd.read_csv('../input/panda-seed-gacha/train.csv')
                labels = _train.isup_grade.values
                insts = _train.data_provider.values
                

            if output_shape[1] == 5: # Ordinal Regression
                oof_fold = torch.from_numpy(oof_fold[mask_fold]).float()
                label_fold = torch.zeros_like(oof_fold)
                for i, t in enumerate(labels[mask_fold]):
                    if t == 0:
                        continue
                    label_fold[i, 0:t] = 1
                criterion = nn.BCEWithLogitsLoss(reduction='none')
                loss_values = criterion(oof_fold, label_fold).mean(1)

                _, certainty_mask = loss_values.topk(round(len(oof_fold)*limit_loss), largest=False)
                oof_to_use = oof_fold[certainty_mask].sigmoid().sum(1).numpy()
                label_to_use = label_fold[certainty_mask].sum(1).numpy()
                analyse_results(oof_to_use.round(), label_to_use)
                
            optR = OptimizedRounder()
            optR.fit(oof_to_use, label_to_use)
            coefficients = optR.coefficients()
            print(coefficients)
            all_coefs.append(coefficients)
            hard_predictions[i_oof] = optR.predict(mean_predictions[i_oof], coefficients)
            analyse_results(optR.predict(oof_to_use, coefficients), label_to_use)
            
    all_coefs = np.vstack(all_coefs)
    print(all_coefs)
    print(all_coefs.mean(0))
    soft_predictions = optR.predict(mean_predictions.mean(0), all_coefs.mean(0))

    if HARD_VOTE:
        print('HARD_VOTE')
        print(hard_predictions)
        optimized_predictions = mode(hard_predictions)[0][0]
        print(optimized_predictions)
    else:
        print('SOFT_PRED')
        print(mean_predictions)
        print(soft_predictions)
        optimized_predictions = soft_predictions
        
else:
    print(all_predictions.mean(0).mean(0))
    optimized_predictions = all_predictions.mean(0).mean(0).round()
    print(optimized_predictions)

In [None]:
submission = pd.read_csv(INPUT_PATH/'sample_submission.csv')
submission['isup_grade'] = optimized_predictions.astype(int)
submission.head(20)

In [None]:
if (INPUT_PATH/'test_images').exists():
    if submission['isup_grade'].sum() > 0:
        submission.to_csv('submission.csv', index=False)
    else:
        pass
else:
    submission.to_csv('submission.csv', index=False)