![](https://storage.googleapis.com/kaggle-competitions/kaggle/28009/logos/header.png?)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from sklearn.pipeline import Pipeline


import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Embedding,  Flatten
from tensorflow.keras.models import Model, Sequential
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import RMSprop

from tensorflow.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer
from tensorflow import keras
from sklearn import metrics
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split

from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings('ignore')

# Load Dataset

In [None]:
%%time
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
test  = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
sub   = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

# Preprocessing

In [None]:
%%time
train['n_missing'] = train.isna().sum(axis=1)
test['n_missing'] = test.isna().sum(axis=1)
train['target'] = train['target'].astype(str)

features = [col for col in train.columns if col not in ['target', 'id']]
pipe = Pipeline([
        ('imputer', SimpleImputer(strategy='median',missing_values=np.nan)),
        ("scaler", QuantileTransformer(n_quantiles=64,output_distribution='uniform')),
        ('bin', KBinsDiscretizer(n_bins=64, encode='ordinal',strategy='uniform'))
        ])
train[features] = pipe.fit_transform(train[features])
test[features] = pipe.transform(test[features])

# Modeling

In [None]:
model = Sequential([
    Input(train[features].shape[1:]),
    Embedding(input_dim=64, output_dim=4),
    Flatten(),
    Dense(64,  activation='relu'),
    Dropout(0.5),
    Dense(32,  activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid'),
])

auc = tf.keras.metrics.AUC(name='aucroc')
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=5e-4,
        decay_steps = 900,
        decay_rate= 0.9)
callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

optimizer = RMSprop(lr=5e-4, rho=0.9, epsilon=1e-08, decay=0.0)

model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=[auc]) 


In [None]:
model.fit(x = np.float32(train[features]), y = np.float32(train.target),
          batch_size = 1024, shuffle = True, epochs = 100,callbacks=[callback])

In [None]:
sub['target'] = model.predict(np.float32(test[features]))
sub=sub.set_index('id')
sub.to_csv('submission.csv')