<center><h2 style='color:red'>MoA | Keras Multilabel Classifier NN | Starter </h2></center><hr>

## What is new in this Kernel?
1. Process all 'cp' columns.
2. training is done from scratch. Controlled using (ReduceLROnPlateau, EarlyStopping, LearningRateScheduler)
3. Add WeightNormalization.
4. Model checkpoints (from https://www.kaggle.com/ravy101/drug-moa-tf-keras-starter kernel)
5. Used Adamw as optimizer with initial LR and **weight decay**
6. Used 7-MultilabelStratifiedKFold, 7 seeds averaging.
7. Put zeros for `ctl_vehicle` predictions.

**Updates:**<br>
**V3:** (0.01888) public score.<br>
**V4:** Add **elu** activation to Dense layer<br>
**V7:** Use differnet model architecture with 100 epochs<br>
**V9:** New baseline model<br>
**V10:** V9 + different data process.<br>
**V11:** Ensemble 2 layer and 3 layer model results.<br>
**V12:** Use 2 baseline models with different activation functions.<br>
**V16:** Aplly RankGauss (from https://www.kaggle.com/kushal1506/moa-pytorch-0-01859-rankgauss-pca-nn/ kernel)<br>


<hr><h4>Pls <span style='color:red'>UPVOTE</span>, if you find it useful. Feedbacks is also very much appreciated.<h4>

In [None]:
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K

from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.losses import BinaryCrossentropy
import tensorflow_addons as tfa

from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from sklearn import preprocessing

from sklearn.decomposition import PCA

from tqdm.notebook import tqdm

import math

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')

data = train_features.append(test_features)

ss = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [None]:
train = train_features.copy()
target = train_targets.copy()
test = test_features.copy()

target = target[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
target.drop(['sig_id'], axis=1, inplace=True)

train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
train.drop(['sig_id', 'cp_type'], axis=1, inplace=True)

test.drop(['sig_id', 'cp_type'], axis=1, inplace=True)

In [None]:
def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})    
    #df = pd.get_dummies(df, columns=['cp_time','cp_dose'])
    return df

train = preprocess(train)
test = preprocess(test)
data = train.append(test)
#del train_targets['sig_id']

In [None]:
g_cols = [col for col in train_features.columns if col.startswith('g-')]
c_cols = [col for col in train_features.columns if col.startswith('c-')]

In [None]:
for col in (g_cols + c_cols):

    transformer = QuantileTransformer(n_quantiles=250, random_state=321, output_distribution="normal")
    vec_len = len(data[col].values)
    vec_len_train = len(train[col].values)
    vec_len_test = len(test[col].values)
    
    raw_vec = data[col].values.reshape(vec_len, 1)
    raw_vec_train = train[col].values.reshape(vec_len_train, 1)
    raw_vec_test = test[col].values.reshape(vec_len_test, 1)
    
    transformer.fit(raw_vec)

    train[col] = transformer.transform(raw_vec_train).reshape(1, vec_len_train)[0]
    test[col] = transformer.transform(raw_vec_test).reshape(1, vec_len_test)[0]

In [None]:
somthing_rate = 5e-4
P_MIN = somthing_rate
P_MAX = 1 - P_MIN

def loss_fn(yt, yp):
    yp = np.clip(yp, P_MIN, P_MAX)
    return log_loss(yt, yp, labels=[0,1])
    

In [None]:
def create_model(num_columns, actv='relu'):
    model = tf.keras.Sequential([tf.keras.layers.Input(num_columns)])
                
    #model.add(tf.keras.layers.BatchNormalization())
    #model.add(tf.keras.layers.Dropout(0.3))
    #model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(2048, activation=actv)))
    
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(1024, activation=actv)))
    
    if actv == 'elu':
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.AlphaDropout(0.25))
        model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(512, kernel_initializer='lecun_normal', activation='selu')))
    else:
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.25))
        model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(512, activation=actv))) 

    #============ Final Layer =================
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation="sigmoid")))
    
    model.compile(optimizer=tfa.optimizers.AdamW(lr = 1e-3, weight_decay = 1e-5, clipvalue = 900), 
                  loss=BinaryCrossentropy(label_smoothing=somthing_rate),
                  )
    return model

In [None]:
# Use All feats as top feats
top_feats = [i for i in range(train.shape[1])]
print("Top feats length:",len(top_feats))

In [None]:
mod = create_model(len(top_feats))
mod.summary()

In [None]:
def metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(loss_fn(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float)))
    return np.mean(metrics)

In [None]:
N_STARTS = 16
S_STARTS = int(N_STARTS/2) 
#seeds = np.random.randint(0, 100, size=N_STARTS)
train_targets = target

res_relu = train_targets.copy()
res_elu = train_targets.copy()
res_relu.loc[:, train_targets.columns] = 0
res_elu.loc[:, train_targets.columns] = 0

ss_relu = ss.copy()
ss_elu = ss.copy()
ss_relu.loc[:, train_targets.columns] = 0
ss_elu.loc[:, train_targets.columns] = 0

#ss.loc[:, train_targets.columns] = 0
ss_dict = {}

historys = dict()

tf.random.set_seed(42)
for seed in range(N_STARTS):
    for n, (tr, te) in enumerate(MultilabelStratifiedKFold(n_splits=5, random_state=seed, shuffle=True).split(train_targets, train_targets)):
        print(f"======{train_targets.values[tr].shape}========{train_targets.values[te].shape}=====")
        
        if seed < S_STARTS: # every actv. will train for 7 times seed.
            print(f'Seed: {seed} => Fold: {n} ==> (RELU MODEL)')
            model = create_model(len(top_feats), actv='relu')
        else:
            print(f'Seed: {seed} => Fold: {n} ==> (ELU MODEL)')
            model = create_model(len(top_feats), actv='elu')

        
        checkpoint_path = f'repeat:{seed}_Fold:{n}.hdf5'
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.2, min_lr=1e-6, patience=4, verbose=1, mode='min')
        cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 1, save_best_only = True,
                                     save_weights_only = True, mode = 'min')
        early = EarlyStopping(monitor="val_loss", mode="min", restore_best_weights=True, patience= 10, verbose = 1)
        
        history = model.fit(train.values[tr][:, top_feats],
                  train_targets.values[tr],
                  validation_data=(train.values[te][:, top_feats], train_targets.values[te]),
                  epochs=60, batch_size=128,
                  callbacks=[reduce_lr_loss, cb_checkpt, early], verbose=2
                 )
        
        historys[f'history_{seed+1}'] = history
        print("Model History Saved.")
        
        model.load_weights(checkpoint_path)
        
        test_predict = model.predict(test.values[:, top_feats])
        val_predict = model.predict(train.values[te][:, top_feats])

        if seed < S_STARTS: 
            ss_relu.loc[:, train_targets.columns] += test_predict
            res_relu.loc[te, train_targets.columns] += val_predict
        else:
            ss_elu.loc[:, train_targets.columns] += test_predict
            res_elu.loc[te, train_targets.columns] += val_predict
            
        print(f'OOF Metric For SEED {seed} => FOLD {n} : {metric(train_targets.loc[te, train_targets.columns], pd.DataFrame(val_predict, columns=train_targets.columns))}')
        print('+-' * 10)
        
ss_relu.loc[:, train_targets.columns] /= ((n+1) * S_STARTS)
res_relu.loc[:, train_targets.columns] /= S_STARTS

ss_elu.loc[:, train_targets.columns] /= ((n+1) * S_STARTS)
res_elu.loc[:, train_targets.columns] /= S_STARTS

In [None]:
# Show Model loss in plots

for k,v in historys.items():
    loss = []
    val_loss = []
    loss.append(v.history['loss'][:40])
    val_loss.append(v.history['val_loss'][:40])
    
import matplotlib.pyplot as plt
plt.figure(figsize = (15, 6))
plt.plot(np.mean(loss, axis=0))
plt.plot(np.mean(val_loss, axis=0))
plt.yscale('log')
plt.yticks(ticks=[1,1e-1,1e-2])
plt.xlabel('Epochs')
plt.ylabel('Average Logloss')
plt.legend(['Training','Validation'])

In [None]:
print(f'OOF Metric (relu): {metric(train_targets, res_relu)}')
print(f'OOF Metric (elu): {metric(train_targets, res_elu)}')

In [None]:
ss_relu.to_csv('submission_relu.csv', index=False)
ss_elu.to_csv('submission_elu.csv', index=False)

In [None]:
target_cols = list(ss_relu.columns[1:])
preds = [ss_relu, ss_elu]
avr_sub = pd.DataFrame()
avr_sub['sig_id'] = ss_relu['sig_id']

for column in target_cols:
    column_data = []
    for i in range(len(preds)):
        column_data.append(preds[i][column])
    avr_sub[column] = np.mean(column_data, axis=0)
    

preds.append(avr_sub)    
avr_sub.head()

In [None]:
avr_sub.to_csv('submission.csv', index=False)

Kernel still under modification.. **<span style='color:red'>Feedbacks</span>** is also very much appreciated.
Pls **<span style='color:red'>UPVOTE</span>**, if you find it useful. 
