In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import pandas as pd
import numpy as np
import gc

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import StratifiedKFold

from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.models import *
import tensorflow as tf
from tensorflow import keras

In [None]:
fold = 5

# Memory reduce

### Thanks a lot for sharing
- https://www.kaggle.com/hrshuvo/tps-oct-21-lgbm-kfold?scriptVersionId=76104876

In [None]:
def reduce_memory_usage(df, verbose=True):
    numerics = ["int8", "int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                elif (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

# Load Data

In [None]:
train = reduce_memory_usage(pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv'))
test = reduce_memory_usage(pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv'))
ss = reduce_memory_usage(pd.read_csv('../input/tabular-playground-series-oct-2021/sample_submission.csv'))
train

# Simple Feature Generation

In [None]:
train['std'] = train.std(axis=1)
train['min'] = train.min(axis=1)
train['max'] = train.max(axis=1)

test['std'] = test.std(axis=1)
test['min'] = test.min(axis=1)
test['max'] = test.max(axis=1)

In [None]:
train2 = train.drop(columns = ['id', 'target'])
test2 = test.drop(columns = ['id'])
y = train['target']
train2

# Scaling

In [None]:
rs = RobustScaler()

rs.fit(train2)

train2 = rs.transform(train2)
test2 = rs.transform(test2)

train2 = train2.astype(np.float32)
test2 = test2.astype(np.float32)

gc.collect()

# Reshape for LSTM

In [None]:
train2 = train2.reshape(-1, 1, train2.shape[-1])
test2 = test2.reshape(-1, 1, test2.shape[-1])

# Build Model

In [None]:
def get_model():
    model = Sequential()
    model.add(Input(shape = train2.shape[-2:]))
    model.add(Bidirectional(LSTM(512, return_sequences = True)))
    model.add(Bidirectional(LSTM(256, return_sequences = True)))
    model.add(Bidirectional(GRU(128, return_sequences = True)))
    model.add(Bidirectional(GRU(64, return_sequences = True)))
    model.add(Dense(100, activation = 'selu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(50, activation = 'swish'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(1, activation = 'sigmoid'))
    return model

### Plot Model

In [None]:
tf.keras.utils.plot_model(get_model())

# Train & Predict

In [None]:
stk = StratifiedKFold(n_splits = fold, random_state = 42, shuffle = True)
results = 0

for counter, (train_index, valid_index) in enumerate(stk.split(train2, y)):
    x_train, y_train = train2[train_index], y[train_index]
    x_valid, y_valid = train2[valid_index], y[valid_index]

    model = get_model()

    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['AUC'])

    es = EarlyStopping(patience = 3,
                       verbose = 1,
                       restore_best_weights = True)

    print('------------ Fold', counter+1, 'Start! ------------')

    model.fit(x_train, y_train,
              validation_data = (x_valid, y_valid),
              callbacks = [es],
              batch_size = 256,
              epochs = 50)

    results += model.predict(test2) / fold

    print('------------ Fold', counter+1, 'Done! ------------')

    del x_train, y_train, x_valid, y_valid, model
    gc.collect()

# Submit

In [None]:
sub = pd.read_csv('../input/tabular-playground-series-oct-2021/sample_submission.csv')
sub['target'] = results.squeeze()
sub.to_csv('sub.csv', index = 0)
sub