In [None]:
# https://www.kaggle.com/c/tabular-playground-series-jun-2021/discussion/248846

In [7]:
!pip install keras-tuner



In [37]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

from scipy.optimize import minimize

import gc

import tensorflow as tf
import tensorflow_addons as tfa

from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers
from tensorflow.keras.utils import plot_model

import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras.models import Model
from kerastuner import RandomSearch, BayesianOptimization

import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm

In [38]:
RANDOM_STATE = 2021

# OPTIM MODES
# 1 - Only Keras Tuner
# 2 - Keras Tuner + TOP models crossvalidation and submission
# 3 - TOP models (NN configuration in dictionaty - params from local experiments) crossvalidation and submission

OPTIM_MODE = 3

In [39]:
train = pd.read_csv('train.csv', index_col = 'id')
test = pd.read_csv("test.csv", index_col = 'id')
submission = pd.read_csv("sample_submission.csv")

target = train.target
targets = pd.get_dummies(train['target'])
target_optim = train['target'].apply(lambda x: int(x.split("_")[-1])-1)

train_knn = np.load("add_feat_train.npy")
test_knn = np.load("add_feat_test.npy")

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_knn = scaler.fit_transform(train_knn)
test_knn = scaler.transform(test_knn)

In [40]:
pd.DataFrame(train_knn).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.105567,0.080686,0.108759,0.082797,0.081172,0.078104,0.080682,0.098372,0.089345
1,0.029184,0.016157,0.020174,0.026179,0.023858,0.01239,0.027862,0.01693,0.017309
2,0.086819,0.070034,0.084438,0.069198,0.073911,0.08272,0.074481,0.08546,0.085939
3,0.233975,0.222624,0.25622,0.227895,0.203315,0.250494,0.199114,0.231126,0.226965
4,0.029336,0.019184,0.023764,0.021166,0.021726,0.027041,0.019323,0.031685,0.028023


In [41]:
train = pd.concat([train.drop('target', axis = 1), 
                   pd.DataFrame(train_knn, columns = ['knn_1', 'knn_2', 'knn_3', 'knn_4', 'knn_5', 'knn_6', 'knn_7', 'knn_8', 'knn_9'])], axis = 1)
test = pd.concat([test.reset_index().drop('id', axis = 1), 
                   pd.DataFrame(test_knn, columns = ['knn_1', 'knn_2', 'knn_3', 'knn_4', 'knn_5', 'knn_6', 'knn_7', 'knn_8', 'knn_9'])], axis = 1, ignore_index=False)

train['target'] = target

In [42]:
X_train, X_valid, y_train, y_valid = train_test_split(train.drop('target', axis =1), targets, test_size = 0.2, stratify = targets, random_state = RANDOM_STATE)

In [43]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=0.00001, patience=6, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.04, patience=5, verbose=0,
    mode='min')

In [44]:
def model_builder(hp):

    #--------- List of hyperparameters --------
    # This is example to illustrate how it works. 
    # Feel free to use list of parameters as you want. Be aware .... the more parameters you specify the more resources (time) it will take
    
    emb_units = hp.Int('emb_units', min_value = 7, max_value = 8, step = 1)
    #conv1d_filters = hp.Int('conv1d_units', min_value = 1, max_value = 2, step = 1)
    
    dropout_rates = [0.2, 0.4] #[0.2, 0.3, 0.4]
    dropout1 = hp.Choice("drop_out1", values = dropout_rates)
    dropout2 = hp.Choice("drop_out2", values = dropout_rates)
    dropout3 = hp.Float("drop_out3", min_value = 0.0, 
                        max_value = 0.5, 
                        default = 0.25, 
                        step = 0.05,)
    
    lin_nodes = [16, 64] #[16, 32, 64]
    l1_nodes = hp.Choice("l1_units", values = lin_nodes)
    l2_nodes = hp.Choice("l2_units", values = lin_nodes)
    l3_nodes = hp.Choice("l3_units", values = lin_nodes)
    
    learning_rates = hp.Choice("learning_rate", [1e-2]) #[1e-2, 1e-3]
    
    non_linears = ['relu', 'elu'] #['relu', 'selu', 'elu']
    act1 = hp.Choice('dense_act1', values = non_linears, default='relu')
    act2 = hp.Choice('dense_act2', values = non_linears, default='relu')
    act3 = hp.Choice('dense_act3', values = non_linears, default='relu')
    
    ker_inits = ['lecun_normal', 'he_uniform']
    ker_init1 = hp.Choice('kern_init1', values = ker_inits, default = 'lecun_normal')
    ker_init2 = hp.Choice('kern_init2', values = ker_inits, default = 'lecun_normal')
    ker_init3 = hp.Choice('kern_init3', values = ker_inits, default = 'lecun_normal')
    ker_init4 = hp.Choice('kern_init4', values = ker_inits, default = 'lecun_normal')
    
    conv_kernel = hp.Int('conv_kernel', min_value = 5, max_value = 20, step = 1)
    #--------------------------------------
    
    conv_inputs = layers.Input(shape = (75))
    knn_inputs = layers.Input(shape = (9))
        
    #----------- Embedding layers ----------------------
    embed = layers.Embedding (input_dim = 353, 
                              output_dim = emb_units,
                              embeddings_regularizer='l2')(conv_inputs)
    
    #----------- Convolution layers ----------------------
    
    embed = layers.Conv1D(conv_kernel, 1, activation = 'relu')(embed) 
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(dropout1)(embed)
    
    #----------- Residual blocks layers ----------------------
    hidden = tfa.layers.WeightNormalization(
                layers.Dense(
                units = l1_nodes,
                activation = act1, #selu
                kernel_initializer = ker_init1))(hidden)
   
    
    output = layers.Dropout(dropout2)(layers.Concatenate()([embed, hidden, knn_inputs]))
   
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = l2_nodes,
                activation = act2,
                kernel_initializer = ker_init2))(output) 
    

    output = layers.Dropout(dropout3)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = l3_nodes, 
                activation = act3, #elu
                kernel_initializer = ker_init3))(output)
    
    #----------- Final layer -----------------------
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation = 'softmax',
                kernel_initializer = ker_init4)(output)
    
    #----------- Model instantiation  ---------------
    model = Model([conv_inputs, knn_inputs],conv_outputs)
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = keras.optimizers.RMSprop(), 
                  metrics = custom_metric)
    
    return model

In [45]:
tuner = BayesianOptimization(
    model_builder,
    objective = "val_loss",
    max_trials = 100, # This is only demo - you can play with more trials on local machine (Kaggle is resource limited). I usually run from 100-1000 trials for best params.
    executions_per_trial = 2,
    overwrite = True,
    seed = RANDOM_STATE,
    directory = "tps-06",
    project_name = "nn-embeddings",
)

In [46]:
tuner.search_space_summary()

Search space summary
Default search space size: 16
emb_units (Int)
{'default': None, 'conditions': [], 'min_value': 7, 'max_value': 8, 'step': 1, 'sampling': None}
drop_out1 (Choice)
{'default': 0.2, 'conditions': [], 'values': [0.2, 0.4], 'ordered': True}
drop_out2 (Choice)
{'default': 0.2, 'conditions': [], 'values': [0.2, 0.4], 'ordered': True}
drop_out3 (Float)
{'default': 0.25, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.05, 'sampling': None}
l1_units (Choice)
{'default': 16, 'conditions': [], 'values': [16, 64], 'ordered': True}
l2_units (Choice)
{'default': 16, 'conditions': [], 'values': [16, 64], 'ordered': True}
l3_units (Choice)
{'default': 16, 'conditions': [], 'values': [16, 64], 'ordered': True}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01], 'ordered': True}
dense_act1 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
dense_act2 (Choice)
{'default': 'relu', 'conditions': [], 'value

In [47]:
# Epochs 2-5 is the best in this competition
if not (OPTIM_MODE == 3):
    tuner.search([X_train.iloc[:, :75], X_train.iloc[:, 75:]], y_train, epochs = 3, validation_data = ([X_valid.iloc[:, :75], X_valid.iloc[:, 75:]], y_valid))

In [48]:
if not (OPTIM_MODE == 3):
    tuner.results_summary(num_trials = 5)

In [49]:
if not (OPTIM_MODE == 3):
    best_hp = tuner.get_best_hyperparameters()[0]
    model = tuner.hypermodel.build(best_hp)
    model.summary()

In [50]:
if not (OPTIM_MODE == 3):
    plot_model(model)

In [51]:
# Here are list of TOP3 params found during my research (1000 trials)   
net_params = [{'emb_units': 8, 'conv1d_units': 1, 
               'drop_out1': 0.3, 'drop_out2': 0.4, 'drop_out3': 0.2, 
               'l1_units': 16, 'l2_units': 64, 'l3_units': 16, 
               'learning_rate': 0.001, 
               'dense_act1': 'elu', 'dense_act2': 'relu', 'dense_act3': 'relu',
              'kern_init1': 'he_uniform', 'kern_init2': 'he_uniform', 'kern_init3': 'he_uniform', 'kern_init4': 'lecun_normal'},
              {'emb_units': 8, 'conv1d_units': 1, 
               'drop_out1': 0.3, 'drop_out2': 0.4, 'drop_out3': 0.2, 
               'l1_units': 16, 'l2_units': 64, 'l3_units': 16, 
               'learning_rate': 0.001, 
               'dense_act1': 'elu', 'dense_act2': 'relu', 'dense_act3': 'relu',
              'kern_init1': 'he_uniform', 'kern_init2': 'he_uniform', 'kern_init3': 'he_uniform', 'kern_init4': 'lecun_normal'},
              {'emb_units': 7, 'conv1d_units': 1, 
               'drop_out1': 0.3, 'drop_out2': 0.2, 'drop_out3': 0.2, 
               'l1_units': 16, 'l2_units': 128, 'l3_units': 32, 
               'learning_rate': 0.001, 
               'dense_act1': 'elu', 'dense_act2': 'relu', 'dense_act3': 'relu',
              'kern_init1': 'he_uniform', 'kern_init2': 'he_uniform', 'kern_init3': 'he_uniform', 'kern_init4': 'lecun_normal'}
              ]

In [52]:
def model_builder_optimized(net_config):
 
    emb_units = net_config['emb_units']
    conv1d_filters = net_config['conv1d_units']
    
    dropout1 = net_config["drop_out1"]
    dropout2 = net_config["drop_out2"]
    dropout3 = net_config["drop_out3"]

    l1_nodes = net_config["l1_units"]
    l2_nodes = net_config["l2_units"]
    l3_nodes = net_config["l3_units"]
    
    learning_rates = net_config["learning_rate"]

    act1 = net_config['dense_act1']
    act2 = net_config['dense_act2']
    act3 = net_config['dense_act3']
    

    ker_init1 = net_config['kern_init1']
    ker_init2 = net_config['kern_init2']
    ker_init3 = net_config['kern_init3']
    ker_init4 = net_config['kern_init4']
    #--------------------------------------
    
    
    
    conv_inputs = layers.Input(shape = (75))
    knn_inputs = layers.Input(shape = (9))
    #----------- Embedding layers ----------------------
    embed = layers.Embedding (input_dim = 353, 
                              output_dim = emb_units,
                              embeddings_regularizer='l2')(conv_inputs)
    
    #----------- Convolution layers ----------------------
    
    embed = layers.Conv1D(10, conv1d_filters, activation = 'relu')(embed) 
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(dropout1)(embed)
    
    #----------- Residual blocks layers ----------------------
    hidden = tfa.layers.WeightNormalization(
                layers.Dense(
                units = l1_nodes,
                activation = act1, #selu
                kernel_initializer = ker_init1))(hidden)
    
   
    output = layers.Dropout(dropout2)(layers.Concatenate()([embed, hidden, knn_inputs]))
   
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = l2_nodes,
                activation = act2,
                kernel_initializer = ker_init2))(output) 
    

    output = layers.Dropout(dropout3)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = l3_nodes, 
                activation = act3, #elu
                kernel_initializer = ker_init3))(output)
    
    
    #----------- Final layer -----------------------
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation = 'softmax',
                kernel_initializer = ker_init4)(output)
    
    #----------- Model instantiation  ---------------
    model = Model([conv_inputs, knn_inputs], conv_outputs)
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = keras.optimizers.Adam(learning_rates), 
                  metrics = custom_metric)
    
    return model

In [53]:
def inter_class_optimizer(weights, a0, a1, a2, a3, a4, a5, a6, a7, a8):
    oof = np.array([weights[0]*a0, 
                    weights[1]*a1, 
                    weights[2]*a2, 
                    weights[3]*a3, 
                    weights[4]*a4, 
                    weights[5]*a5, 
                    weights[6]*a6, 
                    weights[7]*a7, 
                    weights[8]*a8]).transpose()
    
    oof = oof / np.sum(oof, axis=1).reshape(-1, 1)
    return log_loss(y_val, oof)


def pred_fold_optimizer(oof_preds, test_preds):
    
    cons = ({'type':'eq','fun':lambda w: 1-sum(w)})
    res = minimize(fun=inter_class_optimizer,
                   x0=[1/9 for _ in range(9)],
                   args=tuple(oof_preds[ :, i] for i in range(9)),
                   method= 'Nelder-Mead',
                   options={'maxiter': 300},
                   bounds=[(0.0, 1.0)] * len(oof_class_preds),
                   constraints=cons)

    oof_preds = np.array([res.x[i]*oof_preds[ :, i] for i in range(9)]).transpose()
    oof_preds = oof_preds / np.sum(oof_preds, axis=1).reshape(-1, 1)
    
    test_preds = np.array([res.x[i]*test_preds[:, i] for i in range(9)]).transpose()
    test_preds = test_preds / np.sum(test_preds, axis=1).reshape(-1, 1)

    return res["fun"], test_preds, oof_preds

In [54]:
def inter_model_optimizer(weights):
    final_prediction = 0
    
    for weight, prediction in zip(weights, oof_class_preds):
        final_prediction += weight * prediction
    
    return log_loss(y_val, final_prediction)

def pred_model_optimizer(oof_class_preds, test_class_preds):
    optmized_oof_nn_preds = 0
    optmized_test_nn_preds = 0
    
    starting_values = [1/len(oof_class_preds)] * len(oof_class_preds)
    
    cons = ({'type':'eq','fun':lambda w: 1-sum(w)})
    res = minimize(inter_model_optimizer, 
                   starting_values,
                   method='Nelder-Mead',
                   bounds=[(0.0, 1.0)] * len(oof_class_preds),
                   constraints=cons)
    
    print(f'--- Inter model optimized logloss: {(res["fun"]):.5f} using {res["x"]} weights (sum:{np.sum(res["x"])}) ---\n')

    for weight, prediction in zip(res["x"], oof_class_preds):
        optmized_oof_nn_preds += weight * prediction
    
    for weight, prediction in zip(res["x"], test_class_preds):
        optmized_test_nn_preds += weight * prediction

        
    return optmized_oof_nn_preds, optmized_test_nn_preds

In [55]:
EPOCH = 70

N_FOLDS = 10
RANDOM_STATES_NUM = 3
NUM_TOP_MODELS = 3

y_val = []
pred_NN_a = np.zeros((test.shape[0],9))
pred_NN_a_optimized = np.zeros((test.shape[0],9))


if not (OPTIM_MODE == 1): 
    tuners = tuner.get_best_hyperparameters(num_trials = NUM_TOP_MODELS)
    print(f'----- Training and blending {N_FOLDS * RANDOM_STATES_NUM * NUM_TOP_MODELS} models -----')

    for rs_n in range(RANDOM_STATES_NUM):
        print(F"\n- RANDOM STATE {RANDOM_STATE + rs_n} -")
        skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state = (RANDOM_STATE + rs_n))

        oof_NN_a = np.zeros((train.shape[0],9))
        oof_NN_a_optim = np.zeros((train.shape[0],9))
        oof_NN_fold_optimized = np.zeros((train.shape[0],9))
       
        for fold, (tr_idx, ts_idx) in enumerate(skf.split(train, train.target)):

            X_train = train.iloc[:, :75].iloc[tr_idx]
            X_train_knn = train.iloc[:, 75:-1].iloc[tr_idx]
            y_train = targets.iloc[tr_idx]
            
            X_test = train.iloc[:, :75].iloc[ts_idx]
            X_test_knn = train.iloc[:, 75:-1].iloc[ts_idx]
            y_test = targets.iloc[ts_idx]
            
            oof_class_preds = []
            test_class_preds = []

            for n_models in range(NUM_TOP_MODELS):

                K.clear_session()  

                if OPTIM_MODE == 2:
                    params = tuners[n_models]   
                    model_conv = tuner.hypermodel.build(params)
                    l_rate = best_hp.get('learning_rate')
                else:
                    model_conv = model_builder_optimized(net_params[n_models])
                    l_rate = net_params[n_models]["learning_rate"]

                model_conv.compile(loss='categorical_crossentropy', 
                                        optimizer = keras.optimizers.Adam(learning_rate = l_rate), 
                                        metrics=custom_metric)

                model_conv.fit([X_train, X_train_knn], y_train,
                          batch_size = 128, epochs = EPOCH,
                          validation_data=([X_test, X_test_knn], y_test),
                          callbacks=[es, plateau],
                          verbose = 0)

                pred_a = model_conv.predict([X_test, X_test_knn]) 
                score_NN_a = log_loss(y_test, pred_a)  
                
                test_NN_preds = model_conv.predict([test.iloc[:, :75], test.iloc[:, 75:]]) 
                
                y_val = target_optim.iloc[ts_idx]
                optim_score, test_preds_optim, oof_preds_optim = pred_fold_optimizer(pred_a, test_NN_preds)
                 
                print(f"  * FOLD {fold + 1} -> MODEL {n_models + 1} -> SCORE: {(score_NN_a):.5f} -> OPTIMIZED SCORE: {optim_score:.5f} (GAIN: {(optim_score-score_NN_a):.5f})")
                
                pred_NN_a += test_preds_optim
                oof_NN_a[ts_idx] += pred_a 
                oof_NN_a_optim[ts_idx] += oof_preds_optim 
                
                # ---
                oof_class_preds.append(oof_preds_optim)
                test_class_preds.append(test_preds_optim)   
                # ---
        
            oof_NN_fold_optimized[ts_idx], pred_NN_optimized = pred_model_optimizer(oof_class_preds, test_class_preds)
            pred_NN_a_optimized += pred_NN_optimized

        score_a = log_loss(targets, (oof_NN_a / NUM_TOP_MODELS))
        score_o = log_loss(targets, oof_NN_fold_optimized)
        print(f"- FINAL SCORE FOR {n_models + 1} MODELS IN RANDOM STATE {RANDOM_STATE + rs_n}: {score_a:.5f} - OPTIMIZED (inter class and model): {score_o:.5f} (GAIN: {(score_o-score_a):.5f})")

    pred_NN_a = pred_NN_a / (N_FOLDS * RANDOM_STATES_NUM * NUM_TOP_MODELS)
    pred_NN_a_optimized = pred_NN_a_optimized /  (N_FOLDS * RANDOM_STATES_NUM)

----- Training and blending 90 models -----

- RANDOM STATE 2021 -
  * FOLD 1 -> MODEL 1 -> SCORE: 1.74064 -> OPTIMIZED SCORE: 1.74062 (GAIN: -0.00003)
  * FOLD 1 -> MODEL 2 -> SCORE: 1.74106 -> OPTIMIZED SCORE: 1.74103 (GAIN: -0.00003)
  * FOLD 1 -> MODEL 3 -> SCORE: 1.74007 -> OPTIMIZED SCORE: 1.74006 (GAIN: -0.00001)
--- Inter model optimized logloss: 1.73918 using [0.29519231 0.23384731 0.46391193] weights (sum:0.9929515598267288) ---

  * FOLD 2 -> MODEL 1 -> SCORE: 1.73752 -> OPTIMIZED SCORE: 1.73745 (GAIN: -0.00007)
  * FOLD 2 -> MODEL 2 -> SCORE: 1.73834 -> OPTIMIZED SCORE: 1.73831 (GAIN: -0.00004)
  * FOLD 2 -> MODEL 3 -> SCORE: 1.73861 -> OPTIMIZED SCORE: 1.73854 (GAIN: -0.00008)
--- Inter model optimized logloss: 1.73685 using [0.51239596 0.24313905 0.24758235] weights (sum:1.0031173562413775) ---

  * FOLD 3 -> MODEL 1 -> SCORE: 1.74459 -> OPTIMIZED SCORE: 1.74382 (GAIN: -0.00077)
  * FOLD 3 -> MODEL 2 -> SCORE: 1.74368 -> OPTIMIZED SCORE: 1.74361 (GAIN: -0.00008)
  * FOLD 

  * FOLD 2 -> MODEL 1 -> SCORE: 1.74250 -> OPTIMIZED SCORE: 1.74194 (GAIN: -0.00056)
  * FOLD 2 -> MODEL 2 -> SCORE: 1.74125 -> OPTIMIZED SCORE: 1.74121 (GAIN: -0.00003)
  * FOLD 2 -> MODEL 3 -> SCORE: 1.74222 -> OPTIMIZED SCORE: 1.74220 (GAIN: -0.00002)
--- Inter model optimized logloss: 1.74002 using [0.29390235 0.37564475 0.3386028 ] weights (sum:1.0081498911036517) ---

  * FOLD 3 -> MODEL 1 -> SCORE: 1.73191 -> OPTIMIZED SCORE: 1.73187 (GAIN: -0.00004)
  * FOLD 3 -> MODEL 2 -> SCORE: 1.73216 -> OPTIMIZED SCORE: 1.73212 (GAIN: -0.00004)
  * FOLD 3 -> MODEL 3 -> SCORE: 1.73167 -> OPTIMIZED SCORE: 1.73167 (GAIN: -0.00001)
--- Inter model optimized logloss: 1.73077 using [0.29045546 0.30307765 0.40543768] weights (sum:0.998970787654125) ---

  * FOLD 4 -> MODEL 1 -> SCORE: 1.74405 -> OPTIMIZED SCORE: 1.74400 (GAIN: -0.00004)
  * FOLD 4 -> MODEL 2 -> SCORE: 1.74421 -> OPTIMIZED SCORE: 1.74414 (GAIN: -0.00008)
  * FOLD 4 -> MODEL 3 -> SCORE: 1.74552 -> OPTIMIZED SCORE: 1.74547 (GAIN: -0

In [56]:
if not (OPTIM_MODE == 1):
    pred_embedding = pred_NN_a_optimized
    submission['Class_1']=pred_embedding[:,0]
    submission['Class_2']=pred_embedding[:,1]
    submission['Class_3']=pred_embedding[:,2]
    submission['Class_4']=pred_embedding[:,3]
    submission['Class_5']=pred_embedding[:,4]
    submission['Class_6']=pred_embedding[:,5]
    submission['Class_7']=pred_embedding[:,6]
    submission['Class_8']=pred_embedding[:,7]
    submission['Class_9']=pred_embedding[:,8]

    submission.to_csv("26-tps06-keras-tuner.csv", index=False)