In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

from keras.layers import Dense, Flatten, Conv1D,MaxPooling1D, Dropout,BatchNormalization,Embedding,Concatenate, Input
from keras.models import Model

In [None]:
train = pd.read_csv('../input/tabular-playground-series-may-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-may-2021/test.csv')

<h2> Basic data cooking

<h3> Data for OneHot Models

In [None]:
!pip install git+https://github.com/Lpourchot/dfencoding.git

In [None]:
from dfencoding import utilities

In [None]:
train_dum = train.copy() # need to change type in string 
test_dum = test.copy()

In [None]:
train_dum = train_dum.iloc[:,1:].astype('str')
test_dum = test_dum.iloc[:,1:].astype('str')

In [None]:
dfe = utilities.dfencoding(train_dum,'target',test_dum, missing_value = 'Y', cat_limit = 150, dummies_limit = 150)

In [None]:
X = dfe.data.iloc[:len(train),:-1].astype('float')
test = dfe.data.iloc[len(train):,:-1].astype('float')
X.shape, test.shape

In [None]:
dfe.get_dummies()

In [None]:
X_Onehot = dfe.data.iloc[:len(train_dum),1:]
test_Onehot = dfe.data.iloc[len(train_dum):,1:]
print(X_Onehot.shape)
print(test_Onehot.shape)

<h3> Data for others Models

In [None]:
target = pd.get_dummies(train['target']).astype('float')
y = train['target']

In [None]:
# To avoid negative values (for embedding), we just add 8 to all categories :
X = X + 8
test = test + 8
X.shape, test.shape, y.shape, target.shape

<h3> Parameters for the training

In [None]:
es = callbacks.EarlyStopping(
                monitor = 'val_categorical_crossentropy', 
                min_delta = 0.0000001, 
                patience = 3,
                mode = 'min',
                baseline = None, 
                restore_best_weights = True,
                verbose = 1)

plateau  = callbacks.ReduceLROnPlateau(
                monitor = 'val_categorical_crossentropy',
                factor = 0.5, 
                patience = 2, 
                mode = 'min', 
                min_delt = 0.0000001,
                cooldown = 0, 
                min_lr = 1e-7,
                verbose = 1) 

metrics = [tf.keras.metrics.CategoricalCrossentropy()]
loss = tf.keras.losses.CategoricalCrossentropy(
                from_logits=False,
                label_smoothing=0,
                reduction="auto",
                name="categorical_crossentropy")


<h2> Base models : Row Embedding + Column Emedding + Conv1D + Onehot

<h3> OneHot Model

In [None]:
def api_onehot():
    inputs_Onehot = layers.Input(shape = (1285,))
    w = layers.Dense(40, activation="relu")(inputs_Onehot)
    w = layers.Dropout(0.3)(w)
    
    outputs_Onehot = layers.Dense(20, activation = "relu")(w)
    
    return  outputs_Onehot,inputs_Onehot

<h3> Row Embedding Model

In [None]:
def api_embedding_row():
    inputs_Embedding_row = layers.Input(shape = (50,))
    x = layers.Embedding(80, 10, input_length = 50)(inputs_Embedding_row)
    x = layers.Flatten()(x)
    x = layers.Dense(40, activation = 'relu')(x)
    x = layers.Dropout(0.3)(x)

    
    outputs_Embedding_row = layers.Dense(20, activation='relu')(x)
    
    return outputs_Embedding_row,inputs_Embedding_row

<h3> Conv1D Model

In [None]:
def api_conv1D():
    inputs_Conv1D = layers.Input(shape=(50,1)) 
    v = layers.Conv1D(
                filters = 256, 
                kernel_size = 4,
                padding = 'same', 
                activation = 'relu',
                )(inputs_Conv1D)

    v = layers.MaxPooling1D(pool_size = 3)(v)
    v = layers.Flatten()(v)
    v = layers.Dropout(0.3)(v)


    outputs_Conv1D = layers.Dense(20, activation = 'relu')(v)
    
    return outputs_Conv1D,inputs_Conv1D

<h3> Column Embedding Model

In [None]:
def api_embedding_col():    
    inputs_Embedding_col = layers.Input(shape = (50,))
    a = layers.Reshape((-1,1))(inputs_Embedding_col)
    a = layers.Embedding(80, 10,input_length = 128)(a)
    a = layers.Flatten()(a)
    a = layers.Dense(40, activation='relu')(a)
    a = layers.Dropout(0.3)(a)


    outputs_Embedding_col = layers.Dense(20, activation = 'relu')(a)

    return outputs_Embedding_col,inputs_Embedding_col

<h3> Sequential Model

In [None]:
def api_seq():    
    inputs_seq = layers.Input(shape = (50,))
    b = layers.Dense(40, activation="relu")(inputs_seq)
    b = layers.Dropout(0.3)(b)
        
    outputs_seq  = layers.Dense(20, activation = 'relu')(b)

    return outputs_seq ,inputs_seq 

In [None]:
N_FOLDS = 10
SEED = 2021
oof = np.zeros((X.shape[0],4))
pred = np.zeros((test.shape[0],4))

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

for fold, (tr_idx, ts_idx) in enumerate(skf.split(X, y)):
    print(f"===== FOLD {fold} =====")
       
    x_tr = X.iloc[tr_idx] 
    x_Onehot_tr = X_Onehot.iloc[tr_idx]
    y_tr = target.iloc[tr_idx] 
    x_ts = X.iloc[ts_idx] 
    x_Onehot_ts = X_Onehot.iloc[ts_idx] 
    y_ts = target.iloc[ts_idx] 
    
    #---------- Base models collection ---------------------
    
    outputs_Onehot,inputs_Onehot = api_onehot()
    outputs_Embedding_row,inputs_Embedding_row = api_embedding_row()
    outputs_Conv1D,inputs_Conv1D = api_conv1D()
    outputs_Embedding_col, inputs_Embedding_col = api_embedding_col()
    outputs_seq ,inputs_seq = api_seq()
    
   #---------- Final Model Layers --------------------------  
        
    z = layers.Concatenate(axis=1)(
                    [
                    outputs_Onehot,
                    outputs_Embedding_row,
                    outputs_Conv1D,                
                    outputs_Embedding_col,
                    outputs_seq])
    
    z = layers.Dense(20, activation = 'sigmoid')(z)
    out = layers.Dense(4, activation = 'softmax', name = 'out')(z)
    
    #----------Model creation---------------------------
    
    model_merged = Model(inputs=[
                    inputs_Onehot,
                    inputs_Embedding_row,
                    inputs_Conv1D,                
                    inputs_Embedding_col,
                    inputs_seq,
                    ],outputs=out,
                    name="model_merged")
    
    #----------Model compile--------------------------- 
    
    model_merged.compile(tf.keras.optimizers.Adam(learning_rate=0.0001),
                    loss = loss ,
                    metrics = metrics)

    #----------Model fit--------------------------- 
    
    model_merged.fit([
                    x_Onehot_tr,
                     x_tr,
                     x_tr,
                     x_tr,
                     x_tr,
                     ],
                    y_tr,
                    validation_data = ([
                    x_Onehot_ts,
                    x_ts,
                    x_ts,
                    x_ts,
                    x_ts,
                    ],y_ts),
                    batch_size = 256,
                    epochs = 40,
                    verbose = 1,
                    callbacks = [es,plateau]
                    )
    
    #----------Model prediction--------------------------- 
    
    oof[ts_idx] = model_merged.predict([
                    x_Onehot_ts,
                    x_ts,
                    x_ts,
                    x_ts,
                    x_ts,
                    ])

    score = log_loss(y_ts, oof[ts_idx])
    print(f"FOLD {fold} Score {score}\n")
    
    pred += model_merged.predict([
                    test_Onehot,
                    test,
                    test,
                    test,
                    test,
                    ]) / N_FOLDS

score = log_loss(target, oof)
print(f"Score total {score}\n")   

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-may-2021/sample_submission.csv')

In [None]:
submission_df = pd.DataFrame(pred)
submission_df.columns = ['Class_1', 'Class_2', 'Class_3', 'Class_4']
submission_df['id'] = submission['id']
submission_df = submission_df[['id', 'Class_1', 'Class_2', 'Class_3', 'Class_4']]
submission_df.to_csv("submission_Keras_18.csv", index=False)
display(submission_df.head())