# Preparation

## mode config

In [None]:
DO_SUBMISSION = True
DO_TRAIN_FOR_ENSEMBLE = False
DO_VIRTUAL_SUBMISSION = False
assert (sum([DO_SUBMISSION, DO_TRAIN_FOR_ENSEMBLE, DO_VIRTUAL_SUBMISSION]) == 1), "select `ONE` mode"

## pip install

In [None]:
%%time
# for tabnet
!pip install --no-index --find-links ../input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl pytorch-tabnet

In [None]:
%%time
# for nn.py
!pip install ../input/iterative-stratification/iterative-stratification-master/

## import

In [None]:
import os
import gc
import sys
import random
import shutil
import warnings
import typing as tp
from pathlib import Path
from copy import deepcopy

import yaml
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss

warnings.resetwarnings()
warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', DeprecationWarning)
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
import torch
from torch import nn
from torch.utils import data
torch.backends.cudnn.benchmark = True

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

sys.path.append("../input/pytorch-pfn-extras/pytorch-pfn-extras-0.3.1/")
import pytorch_pfn_extras as ppe
from pytorch_pfn_extras.training import extensions as ppe_extensions

In [None]:
def generate_virtual_private(test_feat, smpl_sub, rate_to_pub=4):
    
    assert (test_feat.sig_id == smpl_sub.sig_id).all()
    
    # public は含まれるので、そのまま入れる.
    test_feat_list = [test_feat]
    smpl_sub_list = [smpl_sub]
    
    for i in range(1, rate_to_pub):
        tmp_test_feat = test_feat.copy()
        tmp_smpl_sub = smpl_sub.copy()
        
        # #  id を変更
        tmp_test_feat.sig_id = tmp_test_feat.sig_id + f"_{i}"
        tmp_smpl_sub.sig_id = tmp_smpl_sub.sig_id + f"_{i}"
        assert (tmp_test_feat.sig_id == tmp_smpl_sub.sig_id).all()
        
        # # `c-*` と `g-*` に適当な値を加える. 複製して増やすだけだとすり抜ける場合があったため.
        tmp_test_feat.iloc[:, 4:] += i * 10
        assert (tmp_test_feat.iloc[:, 4:] != test_feat.iloc[:, 4:]).all().all()
        
        test_feat_list.append(tmp_test_feat)
        smpl_sub_list.append(tmp_smpl_sub)
        
    # # 結合
    test_feat_concat = pd.concat(test_feat_list, axis=0, ignore_index=True)
    smpl_sub_concat = pd.concat(smpl_sub_list, axis=0, ignore_index=True)
    
    return test_feat_concat, smpl_sub_concat

## Inference Test by Stage1 Models

In [None]:
MODEL_NAMES = ["NN(drugCV)", "TabNet", "ResNet", "ThrNN", "ThrNN(drugCV)"]

In [None]:
%%time
if DO_SUBMISSION:
    !python ../input/moa-nn-tabnet-fix5/nn-inference-0.01833.py

elif DO_VIRTUAL_SUBMISSION:
    !python ../input/moa-nn-tabnet-fix5/virtual-nn-use-train-public-inference.py

elif DO_TRAIN_FOR_ENSEMBLE:
    pass
else:
    raise ValueError

In [None]:
%%time
if DO_SUBMISSION:
    !python ../input/moa-nn-tabnet-fix5/tabnet-inference-0.01840.py

elif DO_VIRTUAL_SUBMISSION:
    !python ../input/moa-nn-tabnet-fix5/virtual-tabnet-inference-add-param-n-shared.py

elif DO_TRAIN_FOR_ENSEMBLE:
    pass
else:
    raise ValueError

In [None]:
%%time
if DO_SUBMISSION:
    !python ../input/moa-takapy-script/tf-rn-transfer-1layerother-selcol100.py  # 0.01862

elif DO_VIRTUAL_SUBMISSION:
    !python ../input/moa-takapy-script/virtual-tf-rn-transfer-1layerother-selcol100.py

elif DO_TRAIN_FOR_ENSEMBLE:
    pass
else:
    raise ValueError

In [None]:
%%time
if DO_SUBMISSION:
    !python ../input/moa-tawara-scripts-for-final-submission/moa-for-final-thrnn-seed-cv-0.01836.py

elif DO_VIRTUAL_SUBMISSION:
    !python ../input/moa-tawara-scripts-for-final-submission/virtual-moa-for-final-thrnn-seed-cv-0.01836.py

elif DO_TRAIN_FOR_ENSEMBLE:
    pass
else:
    raise ValueError

In [None]:
%%time
if DO_SUBMISSION:
    !python ../input/moa-tawara-scripts-for-final-submission/moa-for-final-thrnn-drug-seed-cv-0.01841.py

elif DO_VIRTUAL_SUBMISSION:
    !python ../input/moa-tawara-scripts-for-final-submission/virtual-moa-for-final-thrnn-drug-seed-cv-0.01841.py

elif DO_TRAIN_FOR_ENSEMBLE:
    pass
else:
    raise ValueError

## Load OOF predcitions

In [None]:
TARGET_COL = ['5-alpha_reductase_inhibitor', '11-beta-hsd1_inhibitor', 'acat_inhibitor', 'acetylcholine_receptor_agonist', 'acetylcholine_receptor_antagonist', 'acetylcholinesterase_inhibitor', 'adenosine_receptor_agonist', 'adenosine_receptor_antagonist', 'adenylyl_cyclase_activator', 'adrenergic_receptor_agonist', 'adrenergic_receptor_antagonist', 'akt_inhibitor', 'aldehyde_dehydrogenase_inhibitor', 'alk_inhibitor', 'ampk_activator', 'analgesic', 'androgen_receptor_agonist', 'androgen_receptor_antagonist', 'anesthetic_-_local', 'angiogenesis_inhibitor', 'angiotensin_receptor_antagonist', 'anti-inflammatory', 'antiarrhythmic', 'antibiotic', 'anticonvulsant', 'antifungal', 'antihistamine', 'antimalarial', 'antioxidant', 'antiprotozoal', 'antiviral', 'apoptosis_stimulant', 'aromatase_inhibitor', 'atm_kinase_inhibitor', 'atp-sensitive_potassium_channel_antagonist', 'atp_synthase_inhibitor', 'atpase_inhibitor', 'atr_kinase_inhibitor', 'aurora_kinase_inhibitor', 'autotaxin_inhibitor', 'bacterial_30s_ribosomal_subunit_inhibitor', 'bacterial_50s_ribosomal_subunit_inhibitor', 'bacterial_antifolate', 'bacterial_cell_wall_synthesis_inhibitor', 'bacterial_dna_gyrase_inhibitor', 'bacterial_dna_inhibitor', 'bacterial_membrane_integrity_inhibitor', 'bcl_inhibitor', 'bcr-abl_inhibitor', 'benzodiazepine_receptor_agonist', 'beta_amyloid_inhibitor', 'bromodomain_inhibitor', 'btk_inhibitor', 'calcineurin_inhibitor', 'calcium_channel_blocker', 'cannabinoid_receptor_agonist', 'cannabinoid_receptor_antagonist', 'carbonic_anhydrase_inhibitor', 'casein_kinase_inhibitor', 'caspase_activator', 'catechol_o_methyltransferase_inhibitor', 'cc_chemokine_receptor_antagonist', 'cck_receptor_antagonist', 'cdk_inhibitor', 'chelating_agent', 'chk_inhibitor', 'chloride_channel_blocker', 'cholesterol_inhibitor', 'cholinergic_receptor_antagonist', 'coagulation_factor_inhibitor', 'corticosteroid_agonist', 'cyclooxygenase_inhibitor', 'cytochrome_p450_inhibitor', 'dihydrofolate_reductase_inhibitor', 'dipeptidyl_peptidase_inhibitor', 'diuretic', 'dna_alkylating_agent', 'dna_inhibitor', 'dopamine_receptor_agonist', 'dopamine_receptor_antagonist', 'egfr_inhibitor', 'elastase_inhibitor', 'erbb2_inhibitor', 'estrogen_receptor_agonist', 'estrogen_receptor_antagonist', 'faah_inhibitor', 'farnesyltransferase_inhibitor', 'fatty_acid_receptor_agonist', 'fgfr_inhibitor', 'flt3_inhibitor', 'focal_adhesion_kinase_inhibitor', 'free_radical_scavenger', 'fungal_squalene_epoxidase_inhibitor', 'gaba_receptor_agonist', 'gaba_receptor_antagonist', 'gamma_secretase_inhibitor', 'glucocorticoid_receptor_agonist', 'glutamate_inhibitor', 'glutamate_receptor_agonist', 'glutamate_receptor_antagonist', 'gonadotropin_receptor_agonist', 'gsk_inhibitor', 'hcv_inhibitor', 'hdac_inhibitor', 'histamine_receptor_agonist', 'histamine_receptor_antagonist', 'histone_lysine_demethylase_inhibitor', 'histone_lysine_methyltransferase_inhibitor', 'hiv_inhibitor', 'hmgcr_inhibitor', 'hsp_inhibitor', 'igf-1_inhibitor', 'ikk_inhibitor', 'imidazoline_receptor_agonist', 'immunosuppressant', 'insulin_secretagogue', 'insulin_sensitizer', 'integrin_inhibitor', 'jak_inhibitor', 'kit_inhibitor', 'laxative', 'leukotriene_inhibitor', 'leukotriene_receptor_antagonist', 'lipase_inhibitor', 'lipoxygenase_inhibitor', 'lxr_agonist', 'mdm_inhibitor', 'mek_inhibitor', 'membrane_integrity_inhibitor', 'mineralocorticoid_receptor_antagonist', 'monoacylglycerol_lipase_inhibitor', 'monoamine_oxidase_inhibitor', 'monopolar_spindle_1_kinase_inhibitor', 'mtor_inhibitor', 'mucolytic_agent', 'neuropeptide_receptor_antagonist', 'nfkb_inhibitor', 'nicotinic_receptor_agonist', 'nitric_oxide_donor', 'nitric_oxide_production_inhibitor', 'nitric_oxide_synthase_inhibitor', 'norepinephrine_reuptake_inhibitor', 'nrf2_activator', 'opioid_receptor_agonist', 'opioid_receptor_antagonist', 'orexin_receptor_antagonist', 'p38_mapk_inhibitor', 'p-glycoprotein_inhibitor', 'parp_inhibitor', 'pdgfr_inhibitor', 'pdk_inhibitor', 'phosphodiesterase_inhibitor', 'phospholipase_inhibitor', 'pi3k_inhibitor', 'pkc_inhibitor', 'potassium_channel_activator', 'potassium_channel_antagonist', 'ppar_receptor_agonist', 'ppar_receptor_antagonist', 'progesterone_receptor_agonist', 'progesterone_receptor_antagonist', 'prostaglandin_inhibitor', 'prostanoid_receptor_antagonist', 'proteasome_inhibitor', 'protein_kinase_inhibitor', 'protein_phosphatase_inhibitor', 'protein_synthesis_inhibitor', 'protein_tyrosine_kinase_inhibitor', 'radiopaque_medium', 'raf_inhibitor', 'ras_gtpase_inhibitor', 'retinoid_receptor_agonist', 'retinoid_receptor_antagonist', 'rho_associated_kinase_inhibitor', 'ribonucleoside_reductase_inhibitor', 'rna_polymerase_inhibitor', 'serotonin_receptor_agonist', 'serotonin_receptor_antagonist', 'serotonin_reuptake_inhibitor', 'sigma_receptor_agonist', 'sigma_receptor_antagonist', 'smoothened_receptor_antagonist', 'sodium_channel_inhibitor', 'sphingosine_receptor_agonist', 'src_inhibitor', 'steroid', 'syk_inhibitor', 'tachykinin_antagonist', 'tgf-beta_receptor_inhibitor', 'thrombin_inhibitor', 'thymidylate_synthase_inhibitor', 'tlr_agonist', 'tlr_antagonist', 'tnf_inhibitor', 'topoisomerase_inhibitor', 'transient_receptor_potential_channel_antagonist', 'tropomyosin_receptor_kinase_inhibitor', 'trpv_agonist', 'trpv_antagonist', 'tubulin_inhibitor', 'tyrosine_kinase_inhibitor', 'ubiquitin_specific_protease_inhibitor', 'vegfr_inhibitor', 'vitamin_b', 'vitamin_d_receptor_agonist', 'wnt_inhibitor']

In [None]:
# # functions for sorting sig_id

def del_control(df: pd.DataFrame()) -> pd.DataFrame():
    train = pd.read_csv('../input/lish-moa/train_features.csv')
    df['cp_type'] = train['cp_type']
    df = df.loc[df['cp_type']=='trt_cp'].reset_index(drop=True)
    df = df.drop('cp_type', axis=1)
    return df

def drop_sig_id(df: pd.DataFrame()) -> pd.DataFrame():
    return df.drop('sig_id', axis=1)

def fix_oof(df: pd.DataFrame()) -> pd.DataFrame():
    if df.shape[0] != 21948:
        df = del_control(df)
    if 'sig_id' in df.columns.tolist():
        df = drop_sig_id(df)
    return df

In [None]:
oof_list = []

# nn(drugCV)
oof_list.append(fix_oof(
    pd.read_pickle('../input/nn-use-train-public/oof.pkl')))

# tabnet
oof_list.append(fix_oof(
    pd.DataFrame(
        np.load('../input/tabnet-train-public-add-n-shared-1/oof_tabnet.npy'),
        columns=TARGET_COL))
)

# # resnet
oof_list.append(fix_oof(
    pd.read_csv("../input/moa-takapy-tf-resnet-transfer/oof_prediction_takapy_resnet.csv")))

# thrnn
oof_list.append(fix_oof(
    pd.read_csv("../input/moa-weight-thrnn-seed-cv/oof_prediction.csv")))

# thrnn(drugCV)
oof_list.append(fix_oof(
    pd.read_csv("../input/moa-weight-thrnn-drug-seed-cv/oof_prediction.csv")))

In [None]:
y_true = fix_oof(pd.read_csv('../input/lish-moa/train_targets_scored.csv'))

for i, name in enumerate(MODEL_NAMES):
    print(f"[{name}]")
    print("shape:", oof_list[i].shape)
    print("local logloss:", log_loss(y_true.values.ravel(), oof_list[i].values.ravel()))

# Training Stacking Model

## definition

### dataset

In [None]:
class MoAStackingDataset(data.Dataset):
    
    def __init__(self, feat: np.ndarray, label: np.ndarray = None):
        """"""
        self.feat = feat
        if label is None:
            self.label = np.full((len(feat), 1), -1)
        else:
            self.label = label
        self.model_order = None
        
    def __len__(self):
        """"""
        return len(self.feat)
    
    def __getitem__(self, index: int):
        """"""
        return [
            torch.from_numpy(self.feat[index]).float(),
            torch.from_numpy(self.label[index]).float()
        ]
    
    def reset_model_order(self):
        """Dummy Method."""
        pass
        
    def shuffle_model_order(self, seed):
        """Dummy Method."""
        pass


class MoAStackingDatasetForCNN(data.Dataset):
    
    def __init__(self, feat: np.ndarray, label: np.ndarray = None):
        """"""
        self.feat = feat
        if label is None:
            self.label = np.full((len(feat), 1), -1)
        else:
            self.label = label
        self.reset_model_order()
        
    def reset_model_order(self):
        self.model_order = np.arange(self.feat.shape[-1])
        
    def shuffle_model_order(self, seed):
        np.random.seed(seed)
        self.model_order = np.random.permutation(self.model_order)
        
    def __len__(self):
        """"""
        return len(self.feat)
    
    def __getitem__(self, index: int):
        """"""
        return [
            torch.from_numpy(self.feat[index][..., self.model_order]).float(),
            torch.from_numpy(self.label[index]).float()
        ]
    
    
class MoAStackingDatasetForGCN(data.Dataset):
    
    def __init__(self, feat: np.ndarray, label: np.ndarray = None):
        """"""
        self.feat = feat
        if label is None:
            self.label = np.full((len(feat), 1), -1)
        else:
            self.label = label
        self.model_order = None
        
    def reset_model_order(self):
        """Dummy Method."""
        pass
        
    def shuffle_model_order(self, seed):
        """Dummy Method."""
        pass
        
    def __len__(self):
        """"""
        return len(self.feat)
    
    def __getitem__(self, index: int):
        """"""
        return [
            torch.from_numpy(self.feat[index]).float(),
            torch.from_numpy(self.label[index]).float()
        ]

### models

In [None]:
def get_activation(activ_name: str="relu"):
    """"""
    act_dict = {
        "relu": nn.ReLU(),
        "tanh": nn.Tanh(),
        "sigmoid": nn.Sigmoid(),
        "identity": nn.Identity()}
    if activ_name in act_dict:
        return act_dict[activ_name]
    elif re.match(r"^htanh\_\d{4}$", activ_name):
        bound = int(activ_name[-4:]) / 1000
        return nn.Hardtanh(-bound, bound)
    else:
        raise NotImplementedError

class LBAD(nn.Module):
    """Linear (-> BN) -> Activation (-> Dropout)"""
    
    def __init__(
        self, in_features: int, out_features: int, drop_rate: float=0.0,
        use_bn: bool=False, use_wn: bool=False, activ: str="relu"
    ):
        """"""
        super(LBAD, self).__init__()
        layers = [nn.Linear(in_features, out_features)]
        if use_wn:
            layers[0] = nn.utils.weight_norm(layers[0])
        
        if use_bn:
            layers.append(nn.BatchNorm1d(out_features))
        
        layers.append(get_activation(activ))
        
        if drop_rate > 0:
            layers.append(nn.Dropout(drop_rate))
        
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """"""
        return self.layers(x)
    
    
class BDLA(nn.Module):
    """(BN -> Dropout ->) Linear -> Activation"""
    
    def __init__(
        self, in_features: int, out_features: int, drop_rate: float=0.0,
        use_bn: bool=False, use_wn: bool=False, activ: str="relu"
    ):
        """"""
        super(BDLA, self).__init__()
        layers = []
        if use_bn:
            layers.append(nn.BatchNorm1d(in_features))
            
        if drop_rate > 0:
            layers.append(nn.Dropout(drop_rate))
        
        layers.append(nn.Linear(in_features, out_features))
        if use_wn:
            layers[-1] = nn.utils.weight_norm(layers[-1])
            
        layers.append(get_activation(activ))
        
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """"""
        return self.layers(x)
    

class LABD(nn.Module):
    """Linear -> Activation (-> BN -> Dropout) """
    
    def __init__(
        self, in_features: int, out_features: int, drop_rate: float=0.0,
        use_bn: bool=False, use_wn: bool=False, activ: str="relu"
    ):
        """"""
        super(LABD, self).__init__()
        layers = [nn.Linear(in_features, out_features), get_activation(activ)]
        
        if use_wn:
            layers[0] = nn.utils.weight_norm(layers[0])
        
        if use_bn:
            layers.append(nn.BatchNorm1d(out_features))
        
        if drop_rate > 0:
            layers.append(nn.Dropout(drop_rate))
        
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """"""
        return self.layers(x)

In [None]:
# # for GCNs
def vector_wise_matmul(X: torch.Tensor, W: torch.Tensor) -> torch.Tensor:
    """
    See input matrixes X as bags of vectors, and multiply corresponding weight matrices by vector.
    
    Args:
        X: Input Tensor, shape: (batch_size, **n_vectors**, in_features)
        W: Weight Tensor, shape: (**n_vectors**, out_features, in_features)
    """
    X = torch.transpose(X, 0, 1)  # shape: (n_vectors, batch_size, in_features)
    W = torch.transpose(W, 1, 2)  # shape: (n_vectors, in_features, out_features)
    H = torch.matmul(X, W)        # shape: (n_vectors, batch_size, out_features)
    H = torch.transpose(H, 0, 1)  # shape: (batch_size, n_vectors, out_features)
    
    return H


def vector_wise_shared_matmul(X: torch.Tensor, W: torch.Tensor) -> torch.Tensor:
    """
    See input matrixes X as bags of vectors, and multiply **shared** weight matrices.
    
    Args:
        X: Input Tensor, shape: (batch_size, **n_vectors**, in_features)
        W: Weight Tensor, shape: (out_features, in_features)
    """
    # W = torch.transpose(W, 0, 1)  # shape: (in_features, out_features)
    # H = torch.matmul(X, W)        # shape: (batch_size, n_vectors, out_features)
    
    H = nn.functional.linear(X, W)  # shape: (batch_size, n_vectors, out_features)
    
    return H

In [None]:
def _calculate_fan_in_and_fan_out_for_vwl(tensor) -> tp.Tuple[int]:
    """
    Input tensor: (n_vectors, out_features, in_features) or (out_features, in_features)
    """
    dimensions = tensor.dim()
    if dimensions < 2:
        raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")

    fan_in = tensor.size(-1)
    fan_out = tensor.size(-2)

    return fan_in, fan_out
    

def _calculate_correct_fan_for_vwl(tensor, mode) -> int:
    """"""
    mode = mode.lower()
    valid_modes = ['fan_in', 'fan_out']
    if mode not in valid_modes:
        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))

    fan_in, fan_out = _calculate_fan_in_and_fan_out_for_vwl(tensor)
    return fan_in if mode == 'fan_in' else fan_out


def kaiming_uniform_for_vwl(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    """"""
    fan = _calculate_correct_fan_for_vwl(tensor, mode)
    gain = nn.init.calculate_gain(nonlinearity, a)
    std = gain / np.sqrt(fan)
    bound = np.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
    with torch.no_grad():
        return tensor.uniform_(-bound, bound)

In [None]:
class VectorWiseLinear(nn.Module):
    """
    For mini batch which have several matrices,
    see as these matrixes as bags of vectors, and multiply weight matrices by vector.
    
    input    X: (batch_size, **n_vectors**, in_features)
    weight W: (**n_vector**, out_features, in_features)
    output  Y: (batch_size, **n_vectors**, out_features)

    **Note**: For simplicity, bias is not described.
    
    X and W are can be seen as below.
    X: [
            [vec_{ 1, 1}, vec_{ 1, 2}, ... vec_{ 1, n_vectors}],
            [vec_{ 2, 1}, vec_{ 2, 2}, ... vec_{ 2, n_vectors}],
                                            .
                                            .
            [vec_{bs, 1}, vec_{bs, 2}, ... vec_{bs, n_vectors}]
        ]
    W: [
            Mat_{1}, Mat_{2}, ... , Mat_{n_vectors}
        ]
    Then Y is calclauted as:
    Y: [
        [ Mat_{1} vec_{ 1, 1}, Mat_{2} vec_{ 1, 2}, ... Mat_{n_vectors} vec_{ 1, n_vectors}],
        [ Mat_{1} vec_{ 2, 1}, Mat_{2} vec_{ 2, 2}, ... Mat_{n_vectors} vec_{ 2, n_vectors}],
        .
        .
        [ Mat_{1} vec_{bs, 1}, Mat_{2} vec_{bs, 2}, ... Mat_{n_vectors} vec_{bs, n_vectors}],
    ]
    """
    
    def __init__(
        self,
        in_features: int, out_features: int, n_vectors: int,
        bias: bool=True, weight_shared: bool=True
    ) -> None:
        """Initialize."""
        super(VectorWiseLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.n_vectors = n_vectors
        self.weight_shared = weight_shared
        
        if self.weight_shared:
            self.weight = nn.Parameter(
                torch.Tensor(self.out_features, self.in_features))
            self.matmul_func = vector_wise_shared_matmul
        else:
            self.weight = nn.Parameter(
                torch.Tensor(self.n_vectors, self.out_features, self.in_features))
            self.matmul_func = vector_wise_matmul
            
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()
        
    def reset_parameters(self) -> None:
        """Initialize weight and bias."""
        kaiming_uniform_for_vwl(self.weight, a=np.sqrt(5))
        if self.bias is not None:
            fan_in, _ = _calculate_fan_in_and_fan_out_for_vwl(self.weight)
            bound = 1 / np.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
             
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """Forward."""
        H = self.matmul_func(X, self.weight)
        if self.bias is not None:
            H = H + self.bias
        
        return H

In [None]:
class GraphConv(nn.Module):
    """Basic Graph Convolution Layer."""
    
    def __init__(
        self, 
        in_channels: int, out_channels: int, n_nodes: int, shrare_msg: bool=True,
        model_self: bool=True, share_model_self: bool=True,
        bias: bool=True, share_bias: bool=True
    ) -> None:
        """Intialize."""
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.n_nodes = n_nodes
        self.model_self = model_self
        super(GraphConv, self).__init__()
        
        # # message
        self.msg = VectorWiseLinear(
            in_channels, out_channels, n_nodes, False, shrare_msg)

        # # self-modeling
        if model_self:
            self.model_self = VectorWiseLinear(
                in_channels, out_channels, n_nodes, False, share_model_self)
        
        # # bias
        if bias:
            if share_bias:
                self.bias = nn.Parameter(torch.Tensor(out_channels))
            else:
                self.bias = nn.Parameter(torch.Tensor(n_nodes, out_channels))
            bound = 1 / np.sqrt(out_channels)
            nn.init.uniform_(self.bias, -bound, bound)
        else:
            self.register_parameter('bias', None)
            
    
    def forward(self, X: torch.Tensor, A: torch.Tensor, W: torch.Tensor=None) -> torch.Tensor:
        """Forward.
        
        Args:
            X: (batch_size, n_nodes, n_channels)
                Array which represents bags of vectors.
                X[:, i, :] are corresponded to feature vectors of node i.
            A: (batch_size, n_nodes, n_nodes)
                Array which represents adjacency matrices.
                A[:, i, j] are corresponded to weights (scalar) of edges from node j to node i.
            W: (batch_size, n_nodes, n_nodes)
                Array which represents weight matrices between nodes.
        """
        if W is not None:
            A = A * W  # shape: (batch_size, n_nodes, n_nodes)
        
        # # update message
        M = X  #  shape: (batch_size, n_nodes, in_channels)
        # # # send message
        M = self.msg(M)  # shape: (batch_size, n_nodes, out_channels)
        # # # aggregate
        M = torch.matmul(A, M)  # shape: (batch_size, n_nodes, out_channels)
            
        # # update node
        # # # self-modeling
        H = M
        if self.model_self:
            H = H + self.model_self(X)
        if self.bias is not None:
            H = H + self.bias
        
        return H

In [None]:
class MLP(nn.Module):
    """Stacked Dense layers"""
    
    def __init__(
        self, n_features_list: tp.List[int], use_tail_as_out: bool=False,
        drop_rate: float=0.0, use_bn: bool=False, use_wn: bool=False,
        activ:str="relu", block_name: str="LBAD"
    ):
        """"""
        super(MLP, self).__init__()
        n_layers = len(n_features_list) - 1
        block_class = {
            "LBAD": LBAD, "BDLA": BDLA, "LABD": LABD}[block_name]
        layers = []
        for i in range(n_layers):
            in_feats, out_feats = n_features_list[i: i + 2]
            if i == n_layers - 1 and use_tail_as_out:
                if block_name in ["BDLA"]:
                    layer = block_class(in_feats, out_feats, drop_rate, use_bn,  use_wn, "identity")
                else:
                    layer = nn.Linear(in_feats, out_feats)
                    if use_wn:
                        layer = nn.utils.weight_norm(layer)
            else:
                layer = block_class(in_feats, out_feats, drop_rate, use_bn,  use_wn, activ)
            layers.append(layer)
                
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """"""
        return self.layers(x)


class CNNStacking1d(nn.Module):
    """1D-CNN for Stacking."""
    
    def __init__(
        self, n_models: int,
        n_channels_list: tp.List[int], use_bias: bool=False,
        kwargs_head: tp.Dict={},
    ):
        """"""
        super(CNNStacking1d, self).__init__()
        self.n_conv_layers = len(n_channels_list) - 1
        for i in range(self.n_conv_layers):
            in_ch = n_channels_list[i]
            out_ch = n_channels_list[i + 1]
            layer = nn.Sequential(
                nn.Conv1d(
                    in_ch, out_ch, kernel_size=3, stride=1, padding=0, bias=use_bias),
                # nn.BatchNorm1d(out_ch),
                nn.ReLU(inplace=True))
            setattr(self, "conv{}".format(i + 1), layer)
        
        kwargs_head["n_features_list"][0] = (n_models - 2 * self.n_conv_layers) * n_channels_list[-1]
        self.head = MLP(**kwargs_head)
    
    def forward(self, x: torch.FloatTensor) -> torch.Tensor:
        """"""
        bs = x.shape[0]
        h = x  # shape: (bs, n_classes, n_models)
        for i in range(self.n_conv_layers):
            h = getattr(self, "conv{}".format(i + 1))(h)
            
        h = torch.reshape(h, (bs, -1))
        h = self.head(h)
        return h
    
    
    
class CNNStacking2d(nn.Module):
    """2D-CNN for Stacking."""
    
    def __init__(
        self, n_models: int, n_classes: int,
        n_channels_list: tp.List[int], use_bias: bool=False,
        kwargs_head: tp.Dict={},
    ):
        """"""
        super(CNNStacking2d, self).__init__()
        self.n_conv_layers = len(n_channels_list) - 1
        for i in range(self.n_conv_layers):
            in_ch = n_channels_list[i]
            out_ch = n_channels_list[i + 1]
            layer = nn.Sequential(
                nn.Conv2d(
                    in_ch, out_ch, kernel_size=(1, 3), stride=1, padding=0, bias=use_bias),
                # nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True))
            setattr(self, "conv{}".format(i + 1), layer)
        
        kwargs_head["n_features_list"][0] = (n_models - 2 * self.n_conv_layers) * n_classes * n_channels_list[-1]
        self.head = MLP(**kwargs_head)
    
    def forward(self, x: torch.FloatTensor) -> torch.Tensor:
        """"""
        bs = x.shape[0]
        h = x  # shape: (bs, 1, n_classes, n_models)
        for i in range(self.n_conv_layers):
            h = getattr(self, "conv{}".format(i + 1))(h)
        
        h = torch.reshape(h, (bs, -1))
        h = self.head(h)
        return h
    
    
class GCNStacking(nn.Module):
    """GCN for Stacking."""
    
    def __init__(
        self, n_classes: int,
        n_channels_list: tp.List[int],
        add_self_loop: bool=False,
        kwargs_head: tp.Dict={},
    ):
        """"""
        super(GCNStacking, self).__init__()
        self.n_conv_layers = len(n_channels_list) - 1
        for i in range(self.n_conv_layers):
            in_ch = n_channels_list[i]
            out_ch = n_channels_list[i + 1]
            # layer = CustomGraphConv(in_ch, out_ch, n_classes)
            layer = GraphConv(
                in_ch, out_ch, n_classes,
                shrare_msg=False, share_model_self=False, share_bias=False)
            setattr(self, "conv{}".format(i + 1), layer)
        
        self.relu = nn.ReLU(inplace=True)
        if add_self_loop:
            adj_mat = torch.ones(n_classes, n_classes) / n_classes
        else:
            adj_mat = (1 - torch.eye(n_classes, n_classes)) / (n_classes - 1) 
        self.register_buffer("A", adj_mat.float())
               
        kwargs_head["n_features_list"][0] = n_classes * n_channels_list[-1]
        self.head = MLP(**kwargs_head)
    
    def forward(self, X: torch.FloatTensor) -> torch.Tensor:
        """"""
        bs, n_classes = X.shape[:2]
        H = X  # shape: (bs, n_classes, n_models)
        for i in range(self.n_conv_layers):
            H = getattr(self, "conv{}".format(i + 1))(H, self.A[None, ...])
            H = self.relu(H)
        
        h = torch.reshape(H, (bs, -1))
        h = self.head(h)
        return h

### utils for training

In [None]:
class EvalFuncManager(nn.Module):
    """Manager Class for evaluation at the end of epoch"""

    def __init__(
        self, iters_per_epoch: int,
        evalfunc_dict: tp.Dict[str, nn.Module], prefix: str = "val"
    ) -> None:
        """Initialize"""
        super(EvalFuncManager, self).__init__()
        self.tmp_iter = 0
        self.iters_per_epoch = iters_per_epoch
        self.prefix = prefix
        self.metric_names = []
        for k, v in evalfunc_dict.items():
            setattr(self, k, v)
            self.metric_names.append(k)
        self.reset()

    def reset(self) -> None:
        """Reset State."""
        self.tmp_iter = 0
        for name in self.metric_names:
            getattr(self, name).reset()

    def __call__(self, y: torch.Tensor, t: torch.Tensor) -> None:
        """Forward."""
        for name in self.metric_names:
            getattr(self, name).update(y, t)
        self.tmp_iter += 1

        if self.tmp_iter == self.iters_per_epoch:
            ppe.reporting.report({
                "{}/{}".format(self.prefix, name): getattr(self, name).compute()
                for name in self.metric_names})
            self.reset()


class MeanLoss(nn.Module):
    
    def __init__(self):
        super(MeanLoss, self).__init__()
        self.loss_sum = 0
        self.n_examples = 0
        
    def forward(self, y: torch.Tensor, t: torch.Tensor):
        """Compute metric at once"""
        return self.loss_func(y, t)

    def reset(self):
        """Reset state"""
        self.loss_sum = 0
        self.n_examples = 0
    
    def update(self, y: torch.Tensor, t: torch.Tensor):
        """Update metric by mini batch"""
        self.loss_sum += self(y, t).item() * y.shape[0]
        self.n_examples += y.shape[0]
        
    def compute(self):
        """Compute metric for dataset"""
        return self.loss_sum / self.n_examples
    

class MyLogLoss(MeanLoss):
    
    def __init__(self, **params):
        super(MyLogLoss, self).__init__()
        self.loss_func = nn.BCEWithLogitsLoss(**params)


class LSBCEWithLogitsLoss(nn.Module):
    """"""
    
    def __init__(self, k: int, alpha: float=0.01):
        """"""
        super(LSBCEWithLogitsLoss, self).__init__()
        self.k = k
        self.alpha = alpha
        self.loss_func = nn.BCEWithLogitsLoss()
        
    def forward(self, y, t):
        """"""
        t_s = t * (1 - self.alpha) + self.alpha / self.k
        loss = self.loss_func(y, t_s)
        return loss


class MyLSLogLoss(MeanLoss):
    
    def __init__(self, **params):
        super(MyLSLogLoss, self).__init__()
        self.loss_func = LSBCEWithLogitsLoss(**params)

In [None]:
def run_train_loop(
    manager, args, model, device,
    train_loader, optimizer, scheduler, loss_func
):
    """Run minibatch training loop"""
    while not manager.stop_trigger:
        model.train()
        for batch in train_loader:
            x, t = batch
            with manager.run_iteration():
                optimizer.zero_grad()
                y = model(x.to(device))
                loss = loss_func(y, t.to(device))
                ppe.reporting.report({'train/loss': loss.item()})
                loss.backward()
                optimizer.step()
            if scheduler is not None:
                scheduler.step()
            
def run_eval(args, model, device, batch, eval_manager):
    """Run evaliation for val or test. this function is applied to each batch."""
    model.eval()
    x, t = batch
    y = model(x.to(device))
    eval_manager(y, t.to(device))

In [None]:
def get_optimizer(settings, model):
    optimizer = getattr(torch.optim, settings["optimizer"]["name"])(
        model.parameters(), **settings["optimizer"]["params"])
    return optimizer

def get_scheduler(settings, train_loader, optimizer):
    if settings["scheduler"]["name"] is None:
        scheduler = None
    else:
        if settings["scheduler"]["name"] == "OneCycleLR":
            settings["scheduler"]["params"]["epochs"] = settings["globals"]["max_epoch"]
            settings["scheduler"]["params"]["steps_per_epoch"] = len(train_loader)

        scheduler = getattr(torch.optim.lr_scheduler, settings["scheduler"]["name"])(
            optimizer, **settings["scheduler"]["params"])        
    return scheduler
    
def get_loss_function(settings):
    if hasattr(nn, settings["loss"]["name"]):
        loss_func = getattr(nn, settings["loss"]["name"])(**settings["loss"]["params"])
    else:
        loss_func = eval(settings["loss"]["name"])(**settings["loss"]["params"])
    return loss_func

def get_manager(
    settings, model, device, train_loader, val_loader, optimizer, eval_manager, output_path
):
    trigger = ppe.training.triggers.EarlyStoppingTrigger(
        check_trigger=(1, 'epoch'), monitor='val/metric', mode="min",
        patience=settings["globals"]["patience"], verbose=True,
        max_trigger=(settings["globals"]["max_epoch"], 'epoch'))
    
    manager = ppe.training.ExtensionsManager(
        model, optimizer, settings["globals"]["max_epoch"],
        iters_per_epoch=len(train_loader), stop_trigger=trigger, out_dir=output_path)
    
    log_extentions = [
        ppe_extensions.observe_lr(optimizer=optimizer),
        ppe_extensions.LogReport(),
        ppe_extensions.PlotReport(["train/loss", "val/loss"], 'epoch', filename='loss.png'),
        ppe_extensions.PlotReport(["lr"], 'epoch', filename='lr.png'),
        ppe_extensions.PrintReport([
            "epoch", "iteration", "lr", "train/loss", "val/loss", "val/metric", "elapsed_time"])
    ]
    for ext in log_extentions:
        manager.extend(ext)
        
    manager.extend( # evaluation
        ppe_extensions.Evaluator(
            val_loader, model,
            eval_func=lambda *batch: run_eval(settings, model, device, batch, eval_manager)),
        trigger=(1, "epoch"))
    
    manager.extend(  # model snapshot
        ppe_extensions.snapshot(target=model, filename="snapshot_epoch_{.epoch}.pth"),
        trigger=ppe.training.triggers.MinValueTrigger(key="val/metric", trigger=(1, 'epoch')))

    return manager

In [None]:
def set_random_seed(seed: int = 42, deterministic: bool = False):
    """Set seeds"""
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    if deterministic:
        torch.backends.cudnn.deterministic = True  # type: ignore

In [None]:
def run_train_one_fold(
    settings, model, train_all_dataset, train_val_index, device, output_path):
    """Run training for one fold"""
    train_dataset = data.Subset(train_all_dataset, train_val_index[0])
    val_dataset = data.Subset(train_all_dataset, train_val_index[1])
    train_loader = data.DataLoader(train_dataset, **settings["loader"]["train"])
    val_loader = data.DataLoader(val_dataset, **settings["loader"]["val"])
    print("train: {}, val: {}".format(len(train_dataset), len(val_dataset)))
        
    model.to(device)
    optimizer = get_optimizer(settings, model)
    scheduler = get_scheduler(settings, train_loader, optimizer)
    loss_func = get_loss_function(settings)
    loss_func.to(device)

    eval_mgr = EvalFuncManager(len(val_loader), {
        "loss": loss_func, 
        "metric": MyLogLoss(),
    })
    
    manager = get_manager(
        settings, model, device, train_loader, val_loader,
        optimizer, eval_mgr, output_path)
    
    run_train_loop(
        manager, settings, model, device, train_loader, optimizer, scheduler, loss_func)

In [None]:
def get_drug_multilabel_index(target_in_drug_df, target_cols_list, vc_num, n_folds, seed):
    """Reference: https://www.kaggle.com/c/lish-moa/discussion/195195"""

    # LOCATE DRUGS
    vc = target_in_drug_df.drug_id.value_counts()
    vc1 = vc.loc[vc <= vc_num].index
    vc2 = vc.loc[vc > vc_num].index

    # STRATIFY DRUGS vc_num OR LESS
    dct1 = {}
    dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits=n_folds, random_state=seed, shuffle=True)
    tmp = target_in_drug_df.groupby('drug_id')[target_cols_list].mean().loc[vc1]
    if tmp.shape[0] != 0:
        for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_cols_list])):
            dd = {k:fold for k in tmp.index[idxV].values}
            dct1.update(dd)
    # STRATIFY DRUGS MORE THAN vc_num
    skf = MultilabelStratifiedKFold(n_splits=n_folds, random_state=seed, shuffle=True)
    tmp = target_in_drug_df.loc[target_in_drug_df.drug_id.isin(vc2)].reset_index(drop = True)
    if tmp.shape[0] != 0:
        for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_cols_list])):
            dd = {k:fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

    # ASSIGN FOLDS
    folds = []
    target_in_drug_df['fold'] = target_in_drug_df.drug_id.map(dct1)
    target_in_drug_df.loc[target_in_drug_df.fold.isna(),'fold'] = target_in_drug_df.loc[target_in_drug_df.fold.isna(),'sig_id'].map(dct2)
    target_in_drug_df.fold = target_in_drug_df.fold.astype('int8')
    folds.append(target_in_drug_df.fold.values)

    return np.stack(folds)[0]


def drug_multilabel_stratified_kfold(k, target_in_drug_df, target_cols_list, n_folds, seed):
    """Split CV considering drug id"""
    folds_array = get_drug_multilabel_index(target_in_drug_df, target_cols_list, 18, n_folds, seed)
    train_idx = [i for i, x in enumerate(folds_array) if x != k]
    validation_idx = [i for i, x in enumerate(folds_array) if x == k]

    return train_idx, validation_idx

## preparation

In [None]:
target_in_drug = pd.merge(
    pd.read_csv("../input/lish-moa/train_targets_scored.csv"),
    pd.read_csv("../input/lish-moa/train_drug.csv"),
    how='left', on='sig_id')

print(target_in_drug.shape)
target_in_drug_trt = del_control(target_in_drug)
print(target_in_drug_trt.shape)

In [None]:
X = np.concatenate([df.values for df in oof_list], axis=1)
print(X.shape)

y = y_true.values
print(y.shape)

X_2d = np.stack([df.values for df in oof_list], axis=2)
print(X_2d.shape)

X_3d = X_2d[:, None, ...]
print(X_3d.shape)

X_node = np.stack([df.values for df in oof_list], axis=2)

In [None]:
# train_all_dataset = MoAStackingDataset(X, y)
# train_all_dataset = MoAStackingDatasetFor1DCNN(X_2d, y)
# train_all_dataset = MoAStackingDatasetForCNN(X_3d, y)
train_all_dataset = MoAStackingDatasetForGCN(X_node, y)

In [None]:
TRAIN_OUTPUT = Path.cwd().parent / "tmp"
TRAIN_OUTPUT.mkdir(exist_ok=True)

WORK_DIR = Path.cwd()

In [None]:
gc.collect()

## traininig

In [None]:
settings_str = """
globals:
  seed: 1990
  seeds_for_avg: [
    1990,
    42, 0, 1086, 39
  ]
  max_epoch: 20
  n_folds_split: 7
  patience: 10
  cuda_visible_devices: 0
  device: cuda
  fast_commit: False

loader:
  train: {
    batch_size: 128, shuffle: True, num_workers: 2,
    pin_memory: True, drop_last: True}
  val: {
    batch_size: 256, shuffle: False, num_workers: 2,
    pin_memory: True, drop_last: False}

model:
  name: GCNStacking
  params:
    n_classes: 206
    n_channels_list: [5, 16, 16, 16, 16]
    add_self_loop: True
    kwargs_head:
        n_features_list: [-1, 2048, 206]
        use_tail_as_out: True
        drop_rate: 0.8
        use_bn: False
        use_wn: True
        block_name: LABD
        
loss:
  name: MyLSLogLoss
  params: {k: 2, alpha: 1.0e-03}

optimizer:
  name: Adam
  params: {lr: 1.0e-03}

scheduler:
  name: OneCycleLR
  params: {pct_start: 0.1, div_factor: 1.0e+3, max_lr: 1.0e-02}
"""

settings = yaml.safe_load(settings_str)

In [None]:
device = torch.device(settings["globals"]["device"])
cv_score_df_list = []

# if settings["globals"]["fast_commit"] and len() == 3624:
if settings["globals"]["fast_commit"]:
    settings["globals"]["max_epoch"] = 1

stgs_list = []

# # seed avg
for tmp_seed in settings["globals"]["seeds_for_avg"]:
    stgs_list.append(deepcopy(settings))
    stgs_list[-1]["globals"]["seed"] = tmp_seed

In [None]:
for m_id, tmp_stgs in enumerate(stgs_list):
    n_folds = settings["globals"]["n_folds_split"]
    tmp_seed = tmp_stgs["globals"]["seed"]
    train_val_indexs = [
        drug_multilabel_stratified_kfold(
            fold_id, target_in_drug_trt, TARGET_COL, n_folds, tmp_seed)
        for fold_id in range(n_folds)]
    # # shffule order
    train_all_dataset.reset_model_order()
    train_all_dataset.shuffle_model_order(tmp_seed)
    print("[model {}'s order]: {}".format(m_id, train_all_dataset.model_order))
    
    for fold_id in range(n_folds):
        print("[fold: {} - model: {}]".format(fold_id, m_id))
        set_random_seed(tmp_seed, True)
        
        # # load model
        model = eval(tmp_stgs["model"]["name"])(**tmp_stgs["model"]["params"])
        run_train_one_fold(
            tmp_stgs, model, train_all_dataset, train_val_indexs[fold_id], device,
            TRAIN_OUTPUT / "fold{}_model{}".format(fold_id, m_id))
        
        del model
        gc.collect()
    
    best_eval_list = []
    for fold_id in range(n_folds):
        output_dir = TRAIN_OUTPUT / "fold{}_model{}".format(fold_id, m_id)
        log_df = pd.read_json(output_dir / "log")
        best_log = log_df.iloc[log_df["val/metric"].idxmin()][["epoch", "train/loss", "val/loss", "val/metric"]]
        best_eval_list.append(best_log)

        # # copy best model
        best_epoch = int(best_log["epoch"])
        model_path = output_dir / "snapshot_epoch_{}.pth".format(best_epoch)
        cp_path = WORK_DIR / "best_model_fold{}_model{}.pth".format(fold_id, m_id)
        shutil.copy(model_path, cp_path)

        # # remove models
        for model_path in output_dir.glob("*.pth"):
            model_path.unlink()

    cv_score_df = pd.DataFrame(best_eval_list)
    cv_score_df.insert(0, "fold", range(settings["globals"]["n_folds_split"]))
    cv_score_df.insert(0, "model", m_id)
    del best_eval_list
    cv_score_df_list.append(cv_score_df)

# Inference by Stacking Model

## load test prediction by Stage1 Models

In [None]:
def order_sub(sub) : 
    return sub.sort_values('sig_id').reset_index(drop=True)

In [None]:
if DO_SUBMISSION or DO_VIRTUAL_SUBMISSION:
    sub_list = [
        pd.read_csv('./submission-sinchir0-nn.csv'),
        pd.read_csv('./submission_sinchir0_tabnet.csv'),
        pd.read_csv("./submission_takapy_tf-resnet.csv"),
        pd.read_csv('./submission_tawara_thrnn_seed_cv.csv'),
        pd.read_csv('./submission_tawara_thrnn_drug_seed_cv.csv'),
    ]
else:
    sub_list = [
        pd.read_csv('../input/nn-use-train-public/submission.csv'),
        pd.read_csv('../input/tabnet-train-public-add-n-shared-1/submission.csv'),
        pd.read_csv("../input/moa-takapy-tf-resnet-transfer/submission.csv"),
        pd.read_csv('../input/moa-weight-thrnn-seed-cv/submission.csv'),
        pd.read_csv('../input/moa-weight-thrnn-drug-seed-cv/submission.csv'),
    ]
    
for i, name in enumerate(["NN(drugCV)", "TabNet", "ResNet", "ThrNN", "ThrNN(drugCV)"]):
    print(f"[{name}]:", sub_list[i].shape)

In [None]:
sub_list = [order_sub(sub_df) for sub_df in sub_list]

## inference test data

In [None]:
def inference_function(settings, model, loader, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for x, t in loader:
            y = model(x.to(device))
            pred_list.append(y.sigmoid().detach().cpu().numpy())
        
        pred_arr = np.concatenate(pred_list)
        del pred_list
    return pred_arr

In [None]:
X_test = np.concatenate([
    sub_df.iloc[:, 1:].values for sub_df in sub_list], axis=1)
print(X_test.shape)

X_test_2d = np.stack([
    sub_df.iloc[:, 1:].values for sub_df in sub_list], axis=2)
print(X_test_2d.shape)

X_test_3d = X_test_2d[:, None, ...]
print(X_test_3d.shape)

X_test_node = np.stack([
    sub_df.iloc[:, 1:].values for sub_df in sub_list], axis=2)

# test_dataset = MoAStackingDataset(X_test, None)
# test_dataset = MoAStackingDatasetFor1DCNN(X_test_2d, None)
# test_dataset = MoAStackingDatasetForCNN(X_test_3d, None)
test_dataset = MoAStackingDatasetForGCN(X_test_node, None)

In [None]:
oof_pred_arr_avg = np.zeros((len(X), 206))
test_pred_arr_avg = np.zeros((len(X_test), 206))

for m_id, tmp_stgs in enumerate(stgs_list):
    n_folds = tmp_stgs["globals"]["n_folds_split"]
    tmp_seed = tmp_stgs["globals"]["seed"]
    train_val_indexs = [
        drug_multilabel_stratified_kfold(
            fold_id, target_in_drug_trt, TARGET_COL, n_folds, tmp_seed)
        for fold_id in range(n_folds)]
    
    oof_pred_arr = np.zeros((len(X), 206))
    test_preds_arr = np.zeros((n_folds, len(X_test), 206))
    train_all_dataset.reset_model_order()
    train_all_dataset.shuffle_model_order(tmp_seed)
    test_dataset.model_order = train_all_dataset.model_order
    print("[model {}'s order]: {}".format(m_id, train_all_dataset.model_order))
    
    for fold_id in range(n_folds):
        print("[fold: {} - model: {}]".format(fold_id, m_id))
        model_path = WORK_DIR / "best_model_fold{}_model{}.pth".format(fold_id, m_id)
        model = GCNStacking(**tmp_stgs["model"]["params"])
        model.load_state_dict(torch.load(model_path))

        val_index = train_val_indexs[fold_id][1]
        val_dataset = data.Subset(train_all_dataset, val_index)
        val_loader = data.DataLoader(val_dataset, **tmp_stgs["loader"]["val"])

        val_pred = inference_function(tmp_stgs, model, val_loader, device)
        oof_pred_arr[val_index] = val_pred
        del val_dataset; del val_loader; del val_pred

        test_loader = data.DataLoader(test_dataset, **tmp_stgs["loader"]["val"])
        test_pred = inference_function(tmp_stgs, model, test_loader, device)
        test_preds_arr[fold_id] = test_pred

        del test_loader; del test_pred;
        gc.collect()
    
    oof_pred_arr_avg += oof_pred_arr
    test_pred_arr_avg += test_preds_arr.mean(axis=0)
    
    oof_score = log_loss(y.ravel(), oof_pred_arr.ravel())

    cv_score_df_list[m_id] = cv_score_df_list[m_id].append(
        {"model": m_id, "fold": "oof",  "val/metric": oof_score,},
        ignore_index=True)
    
oof_pred_arr_avg /= len(stgs_list)
test_pred_arr_avg /= len(stgs_list)

In [None]:
cv_score_all = pd.concat(cv_score_df_list, axis=0)
cv_score_all

In [None]:
cv_score_all.query("epoch != epoch")

In [None]:
print(log_loss(y.ravel(), oof_pred_arr_avg.ravel()))

In [None]:
BLEND = sub_list[0].copy()
BLEND.iloc[:, 1:] = test_pred_arr_avg

print("shape:", BLEND.shape)
display(BLEND.head())

## Make Submission

In [None]:
if DO_SUBMISSION or DO_TRAIN_FOR_ENSEMBLE:
    df_test = pd.read_csv("../input/lish-moa/test_features.csv")
    submission = pd.read_csv("../input/lish-moa/sample_submission.csv")

elif DO_VIRTUAL_SUBMISSION:
    df_test = pd.read_csv("../input/lish-moa/test_features.csv")
    submission = pd.read_csv("../input/lish-moa/sample_submission.csv")
    print(df_test.shape, submission.shape)
    df_test, submission = generate_virtual_private(df_test, submission)
    print(df_test.shape, submission.shape)

else:
    raise ValueError

In [None]:
# submission = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv')

# df = pd.read_csv("/kaggle/input/lish-moa/sample_submission.csv")
df = submission.copy()

public_id = list(df['sig_id'].values)

# df_test = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')
test_id = list(df_test['sig_id'].values)

private_id = list(set(test_id)-set(public_id))

df_submit = pd.DataFrame(index = public_id+private_id, columns=TARGET_COL)
df_submit.index.name = 'sig_id'
df_submit[:] = 0
df_predict = BLEND.copy()
df_submit.loc[df_predict.sig_id,:] = df_predict[TARGET_COL].values
df_submit.loc[df_test[df_test.cp_type =='ctl_vehicle'].sig_id] = 0
df_submit.to_csv('submission.csv',index=True)

In [None]:
print(df_submit.shape)
df_submit.head()