In [None]:
import os

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer
from sklearn.model_selection import train_test_split
import warnings

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
train = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')
train.head()

In [None]:
train['nan_count'] = train.isna().sum(axis=1)
test['nan_count'] = test.isna().sum(axis=1)

features = [c for c in train.columns if c not in ('id', 'claim')]
pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='median', missing_values=np.nan)),
    ('scaler', QuantileTransformer(n_quantiles=128, output_distribution='uniform')),
    ('bin', KBinsDiscretizer(n_bins=128, encode='ordinal', strategy='uniform'))
])
train[features] = pipe.fit_transform(train[features])
test[features] = pipe.transform(test[features])

xtrain = train[features]
ytrain = train['claim']
xtest = test[features]

#splliting the dataset into train and validation data
X_train, X_valid, y_train, y_valid = train_test_split(xtrain, ytrain, 
                                                    test_size=0.25,
                                                    random_state=0)

In [None]:
#centering the data
scaler = StandardScaler()
xtrain = scaler.fit_transform(X_train)
xvalid = scaler.transform(X_valid)
test = scaler.transform(xtest)

In [None]:
input_shape = xtrain.shape[1:][0]

model = keras.Sequential([
    layers.Dense(input_shape, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.LayerNormalization(axis=-1),
    #layers.Dropout(0.1),
    layers.Dense(16, activation='relu'),
    layers.LayerNormalization(axis=-1),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')    
])

auc = keras.metrics.AUC(name='auc')
optimizer = keras.optimizers.RMSprop(lr = 1e-2, epsilon=1e-9, decay=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics = [auc])
callback = tf.keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=5, restore_best_weights=True
            )
history = model.fit(x=np.float32(xtrain), 
          y=np.float32(y_train), 
          batch_size=1024, shuffle=True, 
          epochs=20,
          validation_data=(xvalid, y_valid),
          callbacks=[callback],
         )

In [None]:
submission_sample = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')
submission_sample['claim'] = model.predict(np.float32(test))
submission_sample.to_csv('submission.csv', index=False)