<div>
    <img src="https://storage.googleapis.com/kaggle-datasets-images/88836/204662/70c8793a1e85f544086a1d4b19951b51/dataset-cover.jpg"/>
</div>

In [None]:
!pip install -qq tensorflow_decision_forests

from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.utils import shuffle

import math
import numpy as np
import pandas as pd
import glob

import gc

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers
import tensorflow_decision_forests as tfdf

from keras.models import Model

import warnings
warnings.filterwarnings('ignore')

<h1 id="dataset" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

In [None]:
dfs = []
for path in glob.glob('../input/emg-4/*'):
    df = pd.read_csv(path, header=None)
    dfs.append(df)
    
df = shuffle(pd.concat(dfs))

In [None]:
for i, col in enumerate(list(df.columns[:-1])):
    df[i] += (-1 * df[i].min())
    df[i] = pd.to_numeric(df[i], downcast='integer')

In [None]:
df.head()

In [None]:
# train/test - split
train, test = train_test_split(df, test_size=0.3, random_state=42)

In [None]:
# one hot encoding for ANN
targets = pd.get_dummies(train[64])

# normal target for Boosted Trees
target = train[64]

<h1 id="adds" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Additionals
        <a class="anchor-link" href="#adds" target="_self">¶</a>
    </center>
</h1>

In [None]:
cce = tf.keras.losses.CategoricalCrossentropy()

# custom loss function
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

# early stop function
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=1e-05, patience=5, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

# reduce learning rate
plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.7, patience=2, verbose=0,
    mode='min')

<h1 id="model" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Model
        <a class="anchor-link" href="#model" target="_self">¶</a>
    </center>
</h1>

In [None]:
def conv_model():

    conv_inputs = layers.Input(shape = (64))
    
    embed = layers.Embedding (input_dim = 256, 
                  output_dim = 7,
                  embeddings_regularizer='l2')(conv_inputs)
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)

    hidden = tfa.layers.WeightNormalization(
        layers.Dense(
        units=32,
        activation ='selu',
        kernel_initializer = "lecun_normal"))(hidden)

    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
        units = 32,
        activation='relu',
        kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output1 = tfa.layers.WeightNormalization(
    layers.Dense(
        units = 32, 
        activation = 'relu',
        kernel_initializer = "lecun_normal"))(output)

    conv_outputs = layers.Dense(
        units = 4, 
        activation ='softmax',
        kernel_initializer ="lecun_normal")(output1)

    model_conv = Model(conv_inputs,conv_outputs)
    
    return model_conv, output1

<h1 id="boosted" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Boosted Trees Parameters
        <a class="anchor-link" href="#boosted" target="_self">¶</a>
    </center>
</h1>

In [None]:
def get_params(model_conv, output1):
    nn_model_without_head = tf.keras.models.Model(inputs=model_conv.inputs,
                            outputs=output1)
    
    param = {
                'preprocessing': nn_model_without_head,
                'use_hessian_gain':True,
                'selective_gradient_boosting_ratio':0.2,
                'categorical_algorithm':'RANDOM', 
                'num_trees': 300,
                'selective_gradient_boosting_ratio':0.2,
                'subsample': 0.7870499728626467,
                'shrinkage': 0.018653897565237845,
                'max_depth' : 3,
                'min_examples' : 11,
                'l1_regularization': 3.5480988121992953,
                'l2_categorical_regularization': 0.11074398839677566
            }
    
    return param

<h1 id="train" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Training
        <a class="anchor-link" href="#train" target="_self">¶</a>
    </center>
</h1>

In [None]:
y_valids = []

oof_NN_a = np.zeros((train.shape[0],4))
pred_NN_a = np.zeros((test.shape[0],4))

oof_NN_g = np.zeros((train.shape[0],4))
pred_NN_g = np.zeros((test.shape[0],4))

NN_g_train_preds = []
NN_g_test_preds = []

NN_a_train_preds = []
NN_a_test_preds = []

N_FOLDS = 3
SEED = 41
EPOCH = 60

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state= SEED)

In [None]:
for fold, (tr_idx, ts_idx) in enumerate(skf.split(train,target)):
    print(f"\n - - - - Training fold: {fold + 1} - - - -\n")

    X_train = train.iloc[:,:-1].iloc[tr_idx]
    y_train = targets.iloc[tr_idx]
    y_train1 = target.iloc[tr_idx]
    X_test = train.iloc[:,:-1].iloc[ts_idx]
    y_test = targets.iloc[ts_idx]
    y_test1 = target.iloc[ts_idx]

    K.clear_session()
    
    # ANN
    model_conv, output1 = conv_model()
    
    print("\nTraining of ANN model\n")
    model_conv.compile(loss='categorical_crossentropy', 
    optimizer = keras.optimizers.Adam(learning_rate=2e-4), 
    metrics=custom_metric)
    model_conv.fit(X_train, y_train,
                batch_size = 256, epochs = EPOCH,
                validation_data=(X_test, y_test),
                callbacks=[es, plateau],
                verbose=0)
    
    # predict
    pred_a = model_conv.predict(X_test)
    oof_NN_a[ts_idx] += pred_a 
    score_NN_a = log_loss(y_test, pred_a)
    pred_NN_a += model_conv.predict(test.iloc[:,:-1]) / N_FOLDS
    
    # GBT
    param = get_params(model_conv, output1)
    
    model_Gradient_with_NN = tfdf.keras.GradientBoostedTreesModel(**param)
    
    print("\nTraining of GBT with ANN model\n")
    metrics = [tf.keras.metrics.CategoricalCrossentropy()]
    model_Gradient_with_NN.compile(metrics=metrics)
    model_Gradient_with_NN.fit(np.array(X_train),np.array(y_train1))
    
    pred_g = model_Gradient_with_NN.predict(X_test)
    oof_NN_g[ts_idx] += pred_g
    score_GBT_NN = log_loss(y_test1, pred_g)
    print(f"\nFOLD {fold + 1} Score of ANN Model: {score_NN_a}")
    print(f"\nFOLD {fold + 1} Score of GBT after ANN: {score_GBT_NN}\n")
    
    # prepare weight optimization
    y_valid = target.iloc[ts_idx]
    y_valids.append(y_valid)
    
    NN_a_train_preds.append(pred_a)
    NN_a_test_preds.append(model_conv.predict(test.iloc[:,:-1]))

    NN_g_train_preds.append(pred_g)
    NN_g_test_preds.append(model_Gradient_with_NN.predict(test.iloc[:,:-1]))

<h1 id="optimization" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Optimization
        <a class="anchor-link" href="#optimization" target="_self">¶</a>
    </center>
</h1>

In [None]:
scores = []
weights = []

for y, NN_a_pred,NN_g_pred in zip(y_valids, NN_a_train_preds, NN_g_train_preds):
    preds = []
    preds.append(NN_a_pred)
    preds.append(NN_g_pred)
    
    starting_values = [0]*len(preds)
    
    cons = ({'type':'eq','fun':lambda w: 1-sum(w)})
    
    def log_weight_loss(weights):
        weighted_pred = ((weights[0]*preds[0]) + (weights[1]*preds[1]))
        return log_loss(y, weighted_pred)
    
    res = minimize(log_weight_loss, 
                   starting_values, 
                   method='Nelder-Mead',
                   constraints=cons)
    
    weights.append(res['x'])
    scores.append(res['fun'])

In [None]:
results = pd.DataFrame(weights, columns=['Model ANN','Model GBT'])
results['better model'] = results.idxmax(axis=1, skipna=True)
results ['max_value'] = results.max(axis=1)
results['scores'] = scores
display(results)

<h1 id="ensemble" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Ensemble
        <a class="anchor-link" href="#ensemble" target="_self">¶</a>
    </center>
</h1>

In [None]:
folds = N_FOLDS
final_weights = sum(weights)/(folds)

# weighted preds
weighted_preds = np.array((final_weights[0] * sum(np.array(NN_a_test_preds)/(folds)))
                           +(final_weights[1] * sum(np.array(NN_g_test_preds)/(folds))))

# blends preds
blends = (pred_NN_g + pred_NN_a)/2

<h1 id="accuracy" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Accuracy
        <a class="anchor-link" href="#accuracy" target="_self">¶</a>
    </center>
</h1>

In [None]:
def get_accuracy(df, preds):
    return len(np.where(df.iloc[:,-1].values == np.argmax(preds, axis=1))[0]) / len(df)

In [None]:
weighted_acc = get_accuracy(test, weighted_preds)
print("Weighted Accuracy:%1.3f" % weighted_acc)

In [None]:
blends_acc = get_accuracy(test, blends)
print("Blends Accuracy:%1.3f" % blends_acc)

<h1 id="reference" style="color:#131315; background:#cec6c4; border:0.5px dotted #b1a9c0;"> 
    <center>Reference
        <a class="anchor-link" href="#reference" target="_self">¶</a>
    </center>
</h1>

Tabular June 2021 notebooks (models) + [Laurent Pourchot Gradient Boosting Notebook](https://www.kaggle.com/pourchot/a-neural-network-improved-by-a-gradient-boosting)