In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time 
import gc

from sklearn.model_selection import GroupKFold, KFold, cross_val_score, train_test_split
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV

import lightgbm as lgb
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras

scorer = make_scorer(metrics.roc_auc_score)

In [None]:
train = pd.read_csv("../input/tabular-playground-series-nov-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-nov-2021/test.csv")
sub = pd.read_csv("../input/tabular-playground-series-nov-2021/sample_submission.csv")

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train.drop(['id','target'],axis=1), train['target'], test_size = 0.1, random_state = 0)

In [None]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(test.drop('id',axis=1))

In [None]:
def build_model(n_hidden=2, n_neurons=16, learning_rate=1e-4,decay=1e-4, input_shape=X_train.shape[1:]):
    tf_model = keras.models.Sequential()
    tf_model.add(keras.layers.InputLayer(input_shape=input_shape))
    tf_model.add(keras.layers.Dense(n_neurons, activation='elu'))
    tf_model.add(keras.layers.Dropout(rate=0.1))
    for layer in range(n_hidden):
        tf_model.add(keras.layers.Dense(n_neurons, activation="relu"))
    tf_model.add(keras.layers.Dense(1, activation="sigmoid"))
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    tf_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=[tf.keras.metrics.AUC()])
    return tf_model
keras_model = keras.wrappers.scikit_learn.KerasRegressor(build_model)

param_distribs = {
    "n_hidden": [2,3,4],
    "n_neurons": np.arange(8, 32),
    "learning_rate": reciprocal(1e-4, 5e-2),
}

rnd_search_cv = RandomizedSearchCV(keras_model, param_distribs, n_iter=5, cv=5)
rnd_search_cv.fit(X_train, y_train,
                  verbose=0, 
                  epochs=1000, 
                  batch_size=512,
                  validation_data=(X_val, y_val),
                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])

In [None]:
rnd_search_cv.best_params_

In [None]:
rnd_search_cv.best_score_

In [None]:
scores = []
feature_importance = pd.DataFrame()
models = []
columns = [col for col in train.columns if col not in ['id', 'target']]
X = train[columns]
y = train['target']
k_split = 5

folds = KFold(n_splits=k_split)
for fold_n, (train_index, valid_index) in enumerate(folds.split(train, y)):
    print(f'Fold {fold_n} started at {time.ctime()}')
    X_train, X_valid = X[columns].iloc[train_index], X[columns].iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    keras_model = keras.wrappers.scikit_learn.KerasRegressor(build_model, **rnd_search_cv.best_params_)
    keras_model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                    verbose=2, epochs=1000, batch_size=512,
                    callbacks=[keras.callbacks.EarlyStopping(patience=5)])
    score = metrics.roc_auc_score(y_valid, keras_model.predict(X_valid))
    
    models.append(keras_model)
    scores.append(score)



    
keras_sub = sub
for model in models:
    keras_sub['target'] += keras_model.predict(test[columns])
keras_sub['target'] /= k_split

keras_sub.to_csv('keras_submission.csv',index=False)