In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
import warnings
warnings.filterwarnings("ignore")

In [None]:
train_data = pd.read_csv("../input/tabular-playground-series-may-2022/train.csv")
test_data = pd.read_csv("../input/tabular-playground-series-may-2022/test.csv")

In [None]:
print(f"the shape of train data is : {train_data.shape}")
print(f"the shape of test data is : {test_data.shape}")

In [None]:
# Taking Reference from the beautiful notebook crated by AMBROSM
# Link of the notebook : https://www.kaggle.com/code/ambrosm/tpsmay22-keras-quickstart

features = [f for f in test_data.columns if f != 'id' and f != 'f_27']
float_features = [f for f in features if test_data[f].dtype == float]
for df in [train_data, test_data]:
    # Extract the 10 letters of f_27 into individual features
    for i in range(10):
        df[f'ch{i}'] = df.f_27.str.get(i).apply(ord) - ord('A')
        
    # unique_characters feature is from https://www.kaggle.com/code/cabaxiom/tps-may-22-eda-lgbm-model
    df["unique_characters"] = df.f_27.apply(lambda s: len(set(s)))
    
    # Feature interactions: create three ternary features
    # Every ternary feature can have the values -1, 0 and +1
    df['i_02_21'] = (df.f_21 + df.f_02 > 5.2).astype(int) - (df.f_21 + df.f_02 < -5.3).astype(int)
    df['i_05_22'] = (df.f_22 + df.f_05 > 5.1).astype(int) - (df.f_22 + df.f_05 < -5.4).astype(int)
    i_00_01_26 = df.f_00 + df.f_01 + df.f_26
    df['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)
    
features = [f for f in test_data.columns if f != 'id' and f != 'f_27']
float_features = [f for f in features if test_data[f].dtype == float]
int_features = [f for f in features if test_data[f].dtype == int and f.startswith('f')]
ch_features = [f for f in features if f.startswith('ch')]

In [None]:
train_data.head(5)

In [None]:
test_data.head(5)

In [None]:
useful_features = [feature for feature in train_data.columns if feature not in ["id", "target","f_27"]]

In [None]:
X = train_data[useful_features]
y = train_data["target"]

# Dividing the dataset into training and validation set
X_train,X_valid,y_train,y_valid = train_test_split(X, y, test_size=0.3, random_state=30)

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128,activation="relu",kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
model.add(tf.keras.layers.Dense(64,activation="relu",kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
model.add(tf.keras.layers.Dense(64,activation="relu",kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
model.add(tf.keras.layers.Dense(64,activation="relu",kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
model.add(tf.keras.layers.Dense(16,activation="relu",kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
model.add(tf.keras.layers.Dense(1,activation="sigmoid"))

In [None]:
model.compile(optimizer="adam",
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=tf.keras.metrics.AUC())

In [None]:
lr = ReduceLROnPlateau(monitor="val_loss", factor=0.7, 
                               patience=4, verbose=0)
es = EarlyStopping(monitor="val_loss",
                           patience=12, 
                           verbose=1,
                           mode="min", 
                           restore_best_weights=True)
CALLBACKS = [lr, es, tf.keras.callbacks.TerminateOnNaN()]

In [None]:
# Scaling the features using Standard Scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

In [None]:
history = model.fit(X_train,y_train,epochs=200,validation_data=(X_valid,y_valid),batch_size=2048, callbacks=CALLBACKS)

In [None]:
histoy_df = pd.DataFrame(model.history.history)

histoy_df.plot(figsize=(15,10))

In [None]:
model_predictions = model.predict(scaler.fit_transform(test_data.drop(["id","f_27"],axis=1)))

In [None]:
ids = test_data["id"]
target = model_predictions.flatten()

In [None]:
nn_output = pd.DataFrame({"id":ids,"target":target})
nn_output

In [None]:
nn_output.to_csv("nn_output_submission5.csv",index=False)