In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from tensorflow import keras
import tensorflow
import matplotlib.pyplot as plt
import gc

tensorflow.random.set_seed(3)
keras.backend.clear_session()

In [None]:
train_full = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv')
X_test = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-mar-2021/sample_submission.csv')

In [None]:
print(f'Train shape:{train_full.shape}')
print(f'Test shape:{X_test.shape}')

In [None]:
X = train_full.drop('target', axis=1).copy() 
y = train_full['target']
del train_full
gc.collect()

In [None]:
continues_cols = [val for val in X.columns if val.__contains__('cont')]
category_cols = [val for val in X.columns if val.__contains__('cat')]

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify=y,test_size=0.1, random_state=3)

In [None]:
X_train_cont, X_train_cat = X_train[continues_cols], X_train[category_cols]
X_valid_cont, X_valid_cat = X_valid[continues_cols], X_valid[category_cols]

#dropping variables with many categories
X_train_cat = X_train_cat.drop(['cat3','cat5','cat6', 'cat7','cat8','cat10'], axis=1)
X_valid_cat = X_valid_cat.drop(['cat3','cat5','cat6', 'cat7','cat8','cat10'], axis=1)

In [None]:
import seaborn as sns
sns.countplot(X_train_cat['cat0'])
plt.show()

In [None]:
from sklearn.preprocessing import OneHotEncoder
onehot = OneHotEncoder()

X_train_cat_tr = onehot.fit_transform(X_train_cat)
X_train_cat_tr = X_train_cat_tr.toarray()

X_valid_cat_tr = onehot.transform(X_valid_cat)
X_valid_cat_tr = X_valid_cat_tr.toarray()

In [None]:
X_train = np.concatenate((X_train_cont, X_train_cat_tr), axis=1)
X_valid = np.concatenate((X_valid_cont, X_valid_cat_tr), axis=1)

In [None]:
model = keras.models.Sequential([keras.layers.Input(shape=[X_train.shape[1]]),
    keras.layers.Dense(200, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(500, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(50, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss=keras.losses.binary_crossentropy, optimizer='adam')

In [None]:
callbacks=[keras.callbacks.ModelCheckpoint('best_nn.h5', save_best_only=True), 
          keras.callbacks.EarlyStopping(patience=30)]

In [None]:
history = model.fit(X_train, y_train, epochs=100, 
                    validation_data=(X_valid, y_valid),
                    callbacks=callbacks)

In [None]:
X_test_cont = X_test[continues_cols]
X_test_cat = X_test[category_cols]

X_test_cat = X_test_cat.drop(['cat3','cat5','cat6', 'cat7','cat8','cat10'], axis=1)

X_test_cat_tr = onehot.transform(X_test_cat)
X_test_cat_tr = X_test_cat_tr.toarray()

X_test = np.concatenate((X_test_cont, X_test_cat_tr), axis=1)

In [None]:
model = keras.models.load_model('best_nn.h5')
submission['target'] = model.predict(X_test)

In [None]:
submission.to_csv('submission.csv',columns=submission.columns, index=None)

In [None]:
pred = pd.read_csv('./submission.csv')

pred['target'].hist()