In [1]:
import sys

#sys.path.append("../input/adabeliefoptimizer/pypi_packages/adabelief_tf0.1.0")

In [2]:
import sys

#sys.path.append("../input/iterative-stratification/iterative-stratification-master")

In [3]:
## GPU使わない
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [4]:
import sys

#sys.path.append("../input/tabnet")

sys.path.append(r'C:\Users\81908\jupyter_notebook\poetry_work\tfgpu\01_MoA_compe\code')
from tabnet_tf import *

Tensorflow version 2.3.1


In [5]:
import pickle

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from adabelief_tf import AdaBeliefOptimizer

In [6]:
import tensorflow as tf


def build_callbacks(
    model_path, factor=0.1, mode="auto", monitor="val_loss", patience=0, verbose=0
):
    early_stopping = tf.keras.callbacks.EarlyStopping(
        mode=mode, monitor=monitor, patience=patience, verbose=verbose
    )
    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        model_path, mode=mode, monitor=monitor, save_best_only=True, verbose=verbose
    )
    reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(
        factor=factor, monitor=monitor, mode=mode, verbose=verbose
    )

    return [early_stopping, model_checkpoint, reduce_lr_on_plateau]

In [7]:
from sklearn.metrics import log_loss


def score(Y, Y_pred):
    _, n_classes = Y.shape

    losses = []

    for j in range(n_classes):
        loss = log_loss(Y.iloc[:, j], Y_pred.iloc[:, j], labels=[0, 1])

        losses.append(loss)

    return np.mean(losses)

In [8]:
import os
import random as rn

import tensorflow as tf
import numpy as np


def set_seed(seed=0):
    os.environ["PYTHONHASHSEED"] = str(seed)

    rn.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

    graph = tf.compat.v1.get_default_graph()
    session_conf = tf.compat.v1.ConfigProto(
        inter_op_parallelism_threads=1, intra_op_parallelism_threads=1
    )
    sess = tf.compat.v1.Session(graph=graph, config=session_conf)

    tf.compat.v1.keras.backend.set_session(sess)

In [9]:
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin


class ClippedFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, copy=True, high=0.99, low=0.01):
        self.copy = copy
        self.high = high
        self.low = low

    def fit(self, X, y=None):
        self.data_max_ = X.quantile(q=self.high)
        self.data_min_ = X.quantile(q=self.low)

        return self

    def transform(self, X):
        if self.copy:
            X = X.copy()

        X.clip(self.data_min_, self.data_max_, axis=1, inplace=True)

        return X

In [10]:
# https://arxiv.org/abs/1905.04899

import numpy as np
import tensorflow as tf


class Cutmix(tf.keras.utils.Sequence):
    def __init__(self, X, y=None, batch_size=32, alpha=1.0):
        self.X = np.asarray(X)

        if y is None:
            self.y = y
        else:
            self.y = np.asarray(y)

        self.batch_size = batch_size
        self.alpha = alpha

    def __getitem__(self, i):
        X_batch = self.X[i * self.batch_size : (i + 1) * self.batch_size]

        n_samples, n_features = self.X.shape
        batch_size = X_batch.shape[0]
        shuffle = np.random.choice(n_samples, batch_size)

        l = np.random.beta(self.alpha, self.alpha)
        mask = np.random.choice([0.0, 1.0], size=n_features, p=[1.0 - l, l])
        X_shuffle = self.X[shuffle]
        X_batch = mask * X_batch + (1.0 - mask) * X_shuffle

        if self.y is None:
            return X_batch, None

        y_batch = self.y[i * self.batch_size : (i + 1) * self.batch_size]
        y_shuffle = self.y[shuffle]
        y_batch = l * y_batch + (1.0 - l) * y_shuffle

        return X_batch, y_batch

    def __len__(self):
        n_samples = self.X.shape[0]

        return int(np.ceil(n_samples / self.batch_size))

In [11]:
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection._split import _BaseKFold


class MultilabelStratifiedGroupKFold(_BaseKFold):
    def __init__(self, n_splits=5, random_state=None, shuffle=False):
        super().__init__(n_splits=n_splits, random_state=random_state, shuffle=shuffle)

    def _iter_test_indices(self, X=None, y=None, groups=None):
        cv = MultilabelStratifiedKFold(
            n_splits=self.n_splits,
            random_state=self.random_state,
            shuffle=self.shuffle,
        )

        value_counts = groups.value_counts()
        regluar_indices = value_counts.loc[
            (value_counts == 6) | (value_counts == 12) | (value_counts == 18)
        ].index.sort_values()
        irregluar_indices = value_counts.loc[
            (value_counts != 6) & (value_counts != 12) & (value_counts != 18)
        ].index.sort_values()

        group_to_fold = {}
        tmp = Y.groupby(groups).mean().loc[regluar_indices]

        for fold, (_, test) in enumerate(cv.split(tmp, tmp)):
            group_to_fold.update({group: fold for group in tmp.index[test]})

        sample_to_fold = {}
        tmp = Y.loc[groups.isin(irregluar_indices)]

        for fold, (_, test) in enumerate(cv.split(tmp, tmp)):
            sample_to_fold.update({sample: fold for sample in tmp.index[test]})

        folds = groups.map(group_to_fold)
        is_na = folds.isna()
        folds[is_na] = folds[is_na].index.map(sample_to_fold).values

        for i in range(self.n_splits):
            yield np.where(folds == i)[0]

In [12]:
import tensorflow as tf
# from tabnet import StackedTabNet


class StackedTabNetClassifier(tf.keras.Model):
    def __init__(
        self,
        num_classes,
        batch_momentum=0.98,
        epsilon=1e-05,
        feature_columns=None,
        feature_dim=64,
        norm_type="group",
        num_decision_steps=5,
        num_features=None,
        num_groups=2,
        num_layers=1,
        output_dim=64,
        relaxation_factor=1.5,
        sparsity_coefficient=1e-05,
        virtual_batch_size=None,
        **kwargs
    ):
        super().__init__(**kwargs)

        self.stacked_tabnet = StackedTabNet(
            feature_columns,
            batch_momentum=batch_momentum,
            epsilon=epsilon,
            feature_dim=feature_dim,
            norm_type=norm_type,
            num_decision_steps=num_decision_steps,
            num_features=num_features,
            num_groups=num_groups,
            num_layers=num_layers,
            output_dim=output_dim,
            relaxation_factor=relaxation_factor,
            sparsity_coefficient=sparsity_coefficient,
            virtual_batch_size=virtual_batch_size,
        )

        self.classifier = tf.keras.layers.Dense(
            num_classes, activation="sigmoid", use_bias=False
        )

    def call(self, inputs, training=None):
        x = self.stacked_tabnet(inputs, training=training)

        return self.classifier(x)

In [13]:
#dtype = {"cp_type": "category", "cp_dose": "category"}
#index_col = "sig_id"
#
#train_features = pd.read_csv(
#    "../input/lish-moa/train_features.csv", dtype=dtype, index_col=index_col
#)
#X = train_features.select_dtypes("number")
#Y_nonscored = pd.read_csv(
#    "../input/lish-moa/train_targets_nonscored.csv", index_col=index_col
#)
#Y = pd.read_csv("../input/lish-moa/train_targets_scored.csv", index_col=index_col)
#groups = pd.read_csv(
#    "../input/lish-moa/train_drug.csv", index_col=index_col, squeeze=True
#)
#
#columns = Y.columns

In [14]:
def load_data():
    if "jupyter_notebook" in os.getcwd():
        # load
        dtype = {"cp_type": "category", "cp_dose": "category"}
        index_col = "sig_id"

        sys.path.append(
            r"C:\Users\81908\jupyter_notebook\poetry_work\tfgpu\01_MoA_compe\code"
        )
        import datasets

        DATADIR = datasets.DATADIR

        groups = pd.read_csv(
            f"{DATADIR}/train_drug.csv", dtype=dtype, index_col=index_col, squeeze=True
        )
        train_features = pd.read_csv(
            f"{DATADIR}/train_features.csv", dtype=dtype, index_col=index_col
        )
        X_test = pd.read_csv(
            f"{DATADIR}/test_features.csv", dtype=dtype, index_col=index_col
        )
        X = train_features.select_dtypes("number")
        Y_nonscored = pd.read_csv(
            f"{DATADIR}/train_targets_nonscored.csv", index_col=index_col
        )
        Y = pd.read_csv(f"{DATADIR}/train_targets_scored.csv", index_col=index_col)

        columns = Y.columns

    else:
        # load
        dtype = {"cp_type": "category", "cp_dose": "category"}
        index_col = "sig_id"

        groups = pd.read_csv(
            f"../input/lish-moa/train_drug.csv",
            dtype=dtype,
            index_col=index_col,
            squeeze=True,
        )
        train_features = pd.read_csv(
            "../input/lish-moa/train_features.csv", dtype=dtype, index_col=index_col
        )
        X_test = pd.read_csv(
            "../input/lish-moa/test_features.csv", dtype=dtype, index_col=index_col
        )
        X = train_features.select_dtypes("number")
        Y_nonscored = pd.read_csv(
            "../input/lish-moa/train_targets_nonscored.csv", index_col=index_col
        )
        Y = pd.read_csv(
            "../input/lish-moa/train_targets_scored.csv", index_col=index_col
        )

        columns = Y.columns

    return X, Y, Y_nonscored, train_features, columns, groups, X_test

X, Y, Y_nonscored, train_features, columns, groups, X_test = load_data()

In [15]:
#with open("../input/mlpclassifierfit/clipped_features.pkl", "rb") as f:
#    clipped_features = pickle.load(f)
#X = clipped_features.transform(X)
clipped_features = ClippedFeatures()
X = clipped_features.fit_transform(X)

with open("clipped_features.pkl", "wb") as f:
    pickle.dump(clipped_features, f)

#Y_nonscored = Y_nonscored.loc[:, Y_nonscored.sum(axis=0) > 0]
#Y = pd.concat([Y, Y_nonscored], axis=1)

In [16]:
train_size, n_features = X.shape
# _, n_classes_nonscored = Y_nonscored.shape
_, n_classes = Y.shape

In [17]:
# hyperparameters
batch_size = 8
factor = 0.5
label_smoothing = 1e-03
lr = 0.001
n_seeds = 5
n_splits = 5
patience = 30
shuffle = True
params = {
    "batch_momentum": 0.95,
    "feature_dim": 512,
    "norm_type": "batch",
    "num_decision_steps": 1,
    "num_layers": 2,
}
fit_params = {"epochs": 1_000, "verbose": 0}
#fit_params = {"epochs": 80, "verbose": 0}

In [None]:
%%time
Y_pred = np.zeros((train_size, n_classes))
Y_pred = pd.DataFrame(Y_pred, columns=Y.columns, index=Y.index)

for i in range(n_seeds):
    set_seed(seed=i)

    cv = MultilabelStratifiedGroupKFold(
        n_splits=n_splits, random_state=i, shuffle=shuffle
    )

    for j, (train, valid) in enumerate(cv.split(X, Y, groups)):
        model_path = f"model_seed_{i}_fold_{j}.h5"

        K.clear_session()
        model = StackedTabNetClassifier(
                num_classes=n_classes, num_features=n_features, **params
        )
        loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing)
        optimizer = AdaBeliefOptimizer(learning_rate=lr)

        model.compile(loss=loss, optimizer=optimizer)

        generator = Cutmix(X.iloc[train], Y.iloc[train], batch_size=batch_size)
        callbacks = build_callbacks(model_path, factor=factor, patience=patience)
        history = model.fit_generator(
            generator,
            callbacks=callbacks,
            validation_data=(X.iloc[valid], Y.iloc[valid]),
            **fit_params,
        )

        model.load_weights(model_path)

        Y_pred.iloc[valid] += model.predict(X.iloc[valid]) / n_seeds

Y_pred[train_features["cp_type"] == "ctl_vehicle"] = 0.0

with open("Y_pred.pkl", "wb") as f:
    pickle.dump(Y_pred[columns], f)



[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  -------------
adabelief-tf=0.0.1       1e-08  Not supported      Not supported
Current version (0.1.0)  1e-14  supported          default: True
[31mFor a complete table of recommended hyperparameters, see
[31mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[0m
Instructions for updating:
Please use Model.fit, which supports generators.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to d



[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  -------------
adabelief-tf=0.0.1       1e-08  Not supported      Not supported
Current version (0.1.0)  1e-14  supported          default: True
[31mFor a complete table of recommended hyperparameters, see
[31mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[0m


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    re



[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  -------------
adabelief-tf=0.0.1       1e-08  Not supported      Not supported
Current version (0.1.0)  1e-14  supported          default: True
[31mFor a complete table of recommended hyperparameters, see
[31mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[0m


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    re



[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  -------------
adabelief-tf=0.0.1       1e-08  Not supported      Not supported
Current version (0.1.0)  1e-14  supported          default: True
[31mFor a complete table of recommended hyperparameters, see
[31mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[0m


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[31mPlease check your arguments if you have upgraded adabelief-tf from version 0.0.1.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    re

In [None]:
score(Y[columns], Y_pred[columns])

# predict test

In [None]:
model = StackedTabNetClassifier(num_classes=n_classes, num_features=n_features, **params)
model(np.zeros((1, n_features)))
model.load_weights(f"./model_seed_0_fold_0.h5")
model.predict(X)