In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_curve, auc
from tensorflow.keras import backend as K
import warnings
from scipy.optimize import differential_evolution

In [None]:
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv',dtype='float32')
df.head()

In [None]:
df.shape

In [None]:
FOLDS = 5

In [None]:
strf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=1234)

for i, (train_indices, val_indices) in enumerate(strf.split(df.drop(columns=['id', 'target']).values, df['target'].values)):
    fold = i + 1
    df.loc[val_indices, "KFold"] = fold

In [None]:
df.head()

In [None]:
def dense_block(x, out_feats=64):
    out = keras.layers.Dense(out_feats, activation='relu')(x)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.Dense(out_feats, activation='relu')(out)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.Dense(out_feats, activation='relu')(out)
    out = keras.layers.BatchNormalization()(out)
    merge = keras.layers.Add()([x, out])
    merge = keras.layers.Activation('relu')(merge)

    return merge

In [None]:
def get_model():
    input_layer_1 = keras.layers.Input(shape=(285))
    input_layer = keras.layers.BatchNormalization()(input_layer_1)
    input_256 = keras.layers.Dense(256, activation='relu')(input_layer)

    dense_out_1 = dense_block(input_256, 256)
    dense_out_1_128 = keras.layers.Dense(128, activation='relu')(dense_out_1)
    dense_out_1_64 = keras.layers.Dense(64, activation='relu')(dense_out_1)
    dense_out_1_32 = keras.layers.Dense(32, activation='relu')(dense_out_1)
    
    dense_out_1_add = keras.layers.Add()([input_256, dense_out_1])
    dense_out_1_add = keras.layers.Dense(128)(dense_out_1_add)

    dense_out_2 = dense_block(dense_out_1_add, 128)
    dense_out_2_64 = keras.layers.Dense(64, activation='relu')(dense_out_2)
    dense_out_2_32 = keras.layers.Dense(32, activation='relu')(dense_out_2)

    input_128 = keras.layers.Dense(128,  activation='relu')(input_layer)
    dense_out_2_add = keras.layers.Add()([input_128, dense_out_1_128, dense_out_2])
    output = keras.layers.Dropout(0.4)(dense_out_2_add)
    output = keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.L2(0.001))(output)
    output = keras.layers.Dropout(0.4)(output)
    output = keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.L2(0.001))(output)
    output = keras.layers.Dropout(0.3)(output)
    output = keras.layers.Dense(16, activation='relu', kernel_regularizer=keras.regularizers.L2(0.001))(output)
    output = keras.layers.Dense(1, activation='sigmoid', bias_regularizer=keras.regularizers.L2(0.001))(output)

    return keras.Model(inputs=input_layer_1, outputs=output)

In [None]:
#K.clear_session()

In [None]:
for i in range(FOLDS):
    fold = i + 1
    print(f"\n\nFold - {fold}\n\n")
    train = df[df['KFold'] != fold]
    val = df[df['KFold'] == fold]
    
    x_train, x_val, y_train, y_val = train.drop(columns=['id', 'target', 'KFold']).values, val.drop(columns=['id', 'target', 'KFold']).values, train['target'].values, val['target'].values
    
    ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128).shuffle(100).prefetch(tf.data.AUTOTUNE)
    ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(128).prefetch(tf.data.AUTOTUNE)
    
    model = get_model()

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.8, mode='min')

    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                                filepath=f'model_checkpoint_{fold}.hdf5',
                                save_weights_only=True,
                                monitor='val_loss',
                                mode='min',
                                save_best_only=True,
                                verbose=1)

    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=2, verbose=1)


    model.compile(optimizer=keras.optimizers.Adam(0.0003), loss='binary_crossentropy', metrics=['acc', keras.metrics.AUC()])
    with tf.device('/gpu:0'):
        model.fit(ds_train, epochs=10, validation_data=ds_val, callbacks=[early_stopping, reduce_lr, model_checkpoint_callback])
    K.clear_session()

In [None]:
import gc

In [None]:
del train, val, x_train, x_val, y_train, y_val, ds_train, ds_val
gc.collect()

In [None]:
X = df.drop(columns=['id', 'KFold', 'target']).values
for i in range(FOLDS):
    fold = i + 1
    print(f"Fold {fold} ...")
    model = get_model()
    model.load_weights(f'./model_checkpoint_{fold}.hdf5')    
    preds = model.predict(X)
    K.clear_session()
    df[f'fold_{fold}'] = preds.flatten()

In [None]:
del X
gc.collect()

In [None]:
df.head()

In [None]:
def objective(x):
    x = x/np.linalg.norm(x, 1)
    fold1 = df['fold_1'].values
    fold2 = df['fold_2'].values    
    fold3 = df['fold_3'].values    
    fold4 = df['fold_4'].values    
    fold5 = df['fold_5'].values    
    
    preds = np.tensordot(x , np.array([fold1, fold2, fold3, fold4, fold5]), axes=((0),(0)))    
    target = df['target'].values
    
    fpr, tpr, thresholds = roc_curve(target, preds, pos_label=1)
    res_auc = auc(fpr, tpr)
    return 1 - res_auc

In [None]:
bounds = [(-1,1), (-1,1), (-1,1), (-1,1), (-1,1)]
result = differential_evolution(objective, bounds)
result

In [None]:
test = pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv',dtype='float32')
test.head()

In [None]:
X = test.drop(columns=['id']).values
predictions = []
for i in range(FOLDS):
    fold = i + 1
    print(f"Fold {fold} ...")
    model = get_model()
    model.load_weights(f'./model_checkpoint_{fold}.hdf5')    
    preds = model.predict(X)
    K.clear_session()
    predictions.append(preds.flatten())

In [None]:
weigths = result.x/np.linalg.norm(result.x, 1)    
w_predictions = np.tensordot(weigths , predictions, axes=((0),(0)))  

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-oct-2021/sample_submission.csv')

In [None]:
submission['target'] = w_predictions

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
from IPython.display import FileLink
FileLink('submission.csv')