In [None]:
import sys
sys.path.append('../input/iterative-stratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import tensorflow_addons as tfa
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from tqdm.notebook import tqdm

In [None]:
train = pd.read_csv('../input/prep-lishmoa/data/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test = pd.read_csv('../input/prep-lishmoa/data/test_features.csv')

ss = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [None]:
train.shape

In [None]:
train_targets.shape

In [None]:
test.shape

In [None]:
train.head(10)

In [None]:
del train_targets['sig_id']
del train['sig_id']
del test['sig_id']

In [None]:
def create_model(num_columns):
    model = tf.keras.Sequential([
    tf.keras.layers.Input(num_columns),
    tf.keras.layers.BatchNormalization(),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(2048, activation="relu")),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(2048, activation="relu")),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation="sigmoid"))
    ])
    model.compile(optimizer=tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=10),
                  loss='binary_crossentropy'
                  )
    return model

In [None]:
def build_model(num_columns):
    model = tf.keras.Sequential([
    tf.keras.layers.Input(num_columns),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(1400, activation="relu")),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(840, activation="sigmoid")),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(588, activation="relu")),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3), 
    tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation="sigmoid"))
    ])
    model.compile(optimizer=tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=5),
                  loss='binary_crossentropy', metrics=["accuracy"]
                  )
    return model

In [None]:
### Permutation importance ~20 minutes
from typing import Tuple, List, Callable, Any

from sklearn.utils import check_random_state  # type: ignore

seed = tf.random.set_seed(42)
### from eli5
def iter_shuffled(X, columns_to_shuffle=None, pre_shuffle=False,
                  random_state=None):
    rng = check_random_state(random_state)

    if columns_to_shuffle is None:
        columns_to_shuffle = range(X.shape[1])

    if pre_shuffle:
        X_shuffled = X.copy()
        rng.shuffle(X_shuffled)

    X_res = X.copy()
    for columns in tqdm(columns_to_shuffle):
        if pre_shuffle:
            X_res[:, columns] = X_shuffled[:, columns]
        else:
            rng.shuffle(X_res[:, columns])
        yield X_res
        X_res[:, columns] = X[:, columns]



def get_score_importances(
        score_func,  # type: Callable[[Any, Any], float]
        X,
        y,
        n_iter=5,  # type: int
        columns_to_shuffle=None,
        random_state=None
    ):
    rng = check_random_state(random_state)
    base_score = score_func(X, y)
    scores_decreases = []
    for i in range(n_iter):
        scores_shuffled = _get_scores_shufled(
            score_func, X, y, columns_to_shuffle=columns_to_shuffle,
            random_state=rng, base_score=base_score
        )
        scores_decreases.append(scores_shuffled)

    return base_score, scores_decreases



def _get_scores_shufled(score_func, X, y, base_score, columns_to_shuffle=None,
                        random_state=None):
    Xs = iter_shuffled(X, columns_to_shuffle, random_state=random_state)
    res = []
    for X_shuffled in Xs:
        res.append(-score_func(X_shuffled, y) + base_score)
    return res

def metric(y_true, y_pred):
    metrics = []
    for i in range(y_pred.shape[1]):
        if y_true[:, i].sum() > 1:
            metrics.append(log_loss(y_true[:, i], y_pred[:, i]))
    return np.mean(metrics)   

perm_imp = np.zeros(train.shape[1])
for n, (tr, te) in enumerate(KFold(n_splits=2, random_state=seed, shuffle=True).split(train_targets)):
    print(f'Fold {n}')

    model = create_model(len(list(train.columns)))
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')

    model.fit(train.values[tr],
              train_targets.values[tr],
              validation_data=(train.values[te], train_targets.values[te]),
              epochs=40, batch_size=128,
              callbacks=[reduce_lr_loss], verbose=2
             )
        
    def _score(X, y):
        pred = model.predict(X)
        return metric(y, pred)

    base_score, local_imp = get_score_importances(_score, train.values[te], train_targets.values[te], n_iter=1, random_state=0)
    perm_imp += np.mean(local_imp, axis=0)
    print('')
    break
    
top_feats = np.argwhere(perm_imp < 0).flatten()

In [None]:
top_feats

In [None]:
def metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels=[0,1]))
    return np.mean(metrics)

# def metric(y_true, y_pred): 
#     return tf.keras.losses.binary_crossentropy(y_true, y_pred).numpy().mean()

In [None]:
N_STARTS = 5
tf.random.set_seed(42)

res = train_targets.copy()
ss.loc[:, train_targets.columns] = 0
res.loc[:, train_targets.columns] = 0

for seed in range(N_STARTS):
    for n, (tr, te) in enumerate(MultilabelStratifiedKFold(n_splits=7, random_state=seed, shuffle=True).split(train_targets, train_targets)):
        print(f'Fold {n}')
    
        model = build_model(len(top_feats))
        # checkpoint_path = f'repeat:{seed}_Fold:{n}.hdf5'
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.15, patience=3, verbose=1, epsilon=1e-4, mode='min')
        early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, mode= 'min')
        # cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 0, save_best_only = True,
        #                              save_weights_only = True, mode = 'min')

        model.fit(train.values[tr][:, top_feats],
                  train_targets.values[tr],
                  validation_data=(train.values[te][:, top_feats], train_targets.values[te]),
                  epochs=40, batch_size=128,
                  callbacks=[reduce_lr_loss, early_stop], verbose=2
                 )
        
        # model.load_weights(checkpoint_path)
        test_predict = model.predict(test.values[:, top_feats])
        val_predict = model.predict(train.values[te][:, top_feats])
        
        ss.loc[:, train_targets.columns] += test_predict
        res.loc[te, train_targets.columns] += val_predict
        print('')
    
ss.loc[:, train_targets.columns] /= ((n+1) * N_STARTS)
res.loc[:, train_targets.columns] /= N_STARTS

In [None]:
print(f'OOF Metric: {metric(train_targets, res)}')

In [None]:
ss.loc[test['cp_type']==1, train_targets.columns] = 0

In [None]:
ss.to_csv('submission.csv', index=False)