## Exploratory Data Analysis

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn

In [None]:
df = pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/train.csv')

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/train.csv', index_col='id')
df_train.shape

In [None]:
df_train.describe()

In [None]:
df_train.f_05.head(6)

In [None]:
variables = ['f_00', 'f_01', 'f_02','f_03', 'f_04', 'f_05']
sns.pairplot(df_train, hue="target", vars=variables)

## Binary Classification

In [None]:
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [None]:
df_train.corr().tail(1)

In [None]:
f = plt.figure(figsize=(20, 20))
plt.matshow(df_train.corr(), fignum=f.number)
plt.xticks(range(df_train.select_dtypes(['number']).shape[1]), df_train.select_dtypes(['number']).columns)
plt.yticks(range(df_train.select_dtypes(['number']).shape[1]), df_train.select_dtypes(['number']).columns)
cb = plt.colorbar()
plt.title('Correlation Matrix');

In [None]:
df_test = pd.read_csv("../input/tabular-playground-series-may-2022/test.csv")


In [None]:
df_test.describe()

In [None]:
df_test.head(6)

In [None]:
for df in [df_train, df_test]:
    for i in range(10):
        df[f'ch{i}'] = df.f_27.str.get(i).apply(ord) - ord('A')
        
    df["unique_characters"] = df.f_27.apply(lambda s: len(set(s)))
    
features = [f for f in df_test.columns if f != 'id' and f != 'f_27']

In [None]:
X_train = df_train.drop(['target'],axis=1)[features]
Y_train = df_train['target'].to_numpy()
X_test = df_test[features].copy()

In [None]:
StSc = StandardScaler()
X_train = StSc.fit_transform(X_train)
X_test  = StSc.transform(X_test)

In [None]:
print("X_Train Shape : ", X_train.shape)
print("X_test shape : ", X_test.shape)

In [None]:
L2 = 0.000003
model_class = tf.keras.models.Sequential([
    tf.keras.layers.Input(41),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(41, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(1,activation = 'sigmoid')
])

In [None]:
loss = tf.keras.losses.BinaryCrossentropy()
opt= tf.keras.optimizers.Adam()
model_class.compile(optimizer = opt,loss = loss,metrics = [tf.keras.metrics.BinaryAccuracy(),
                                                                               tf.keras.metrics.Precision(),
                                                                               tf.keras.metrics.Recall()])

In [None]:
earlystopping = tf.keras.callbacks.EarlyStopping( monitor= 'val_loss', patience=6, verbose=0,mode='auto', baseline=None, restore_best_weights=True)

LR = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0,mode='auto')

In [None]:
history = model_class.fit(
    x=X_train,
    y=Y_train,
    batch_size=500,
    epochs=100,
    verbose=1,
    callbacks=[LR,earlystopping],
    validation_split=0.1,
    validation_data=None,
    steps_per_epoch=1800,
    validation_freq=1,
    max_queue_size=10
)

In [None]:
acc_train = history.history['binary_accuracy']
acc_val  =  history.history['val_binary_accuracy']

epochs = range(len(acc_train))
plt.plot(epochs, acc_train, 'r', label='Training')
plt.plot(epochs, acc_val, 'b', label='Validation')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
pred = model_class.predict(X_test)
df_test['target'] = pred
submit   = df_test[['id','target']]
submit.to_csv("Binary_Subsission.csv",index=False)

## Neural Network

In [None]:
import keras 
from keras.models import Sequential
from keras import layers
from tensorflow.keras.utils import to_categorical
from keras import models
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from tensorflow.keras import regularizers

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/train.csv')
df_train['i_02_21'] = (df_train.f_21 + df_train.f_02 > 5.2).astype(int) - (df_train.f_21 + df_train.f_02 < -5.3).astype(int)
df_train['i_05_22'] = (df_train.f_22 + df_train.f_05 > 5.1).astype(int) - (df_train.f_22 + df_train.f_05 < -5.4).astype(int)
i_00_01_26 = df_train.f_00 + df_train.f_01 + df_train.f_26
df_train['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)

In [None]:
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/test.csv')
df_test['i_02_21'] = (df_test.f_21 + df_test.f_02 > 5.2).astype(int) - (df_test.f_21 + df_test.f_02 < -5.3).astype(int)
df_test['i_05_22'] = (df_test.f_22 + df_test.f_05 > 5.1).astype(int) - (df_test.f_22 + df_test.f_05 < -5.4).astype(int)
i_00_01_26 = df_test.f_00 + df_test.f_01 + df_test.f_26
df_test['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)

In [None]:
df_sample_sub = pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/sample_submission.csv')

In [None]:
df_train.head(6)

In [None]:
df_test.head(6)

In [None]:
df_test.columns

In [None]:
y_train = df_train['target']
df_train = df_train.drop(['target'],axis=1)

In [None]:
def latter_counter(data):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    for char in letters:
        data[char] = data['f_27'].str.count(char)
        
    return data

In [None]:
def zero_remover(data):
    cross_check = 'UVWXYZ'
    for char in cross_check:
        if data[char].sum() == 0:
            data = data.drop([char], axis=1)
            
    return data

In [None]:
def position_adder(data):
    for i in range(10):
        data['pos' + str(i)] = (data['f_27'].str[i]).apply(lambda x: ord(x)) - 75
        
    return data

In [None]:
df_train = latter_counter(df_train)
df_test = latter_counter(df_test)
df_train.head(6)

In [None]:
df_train = zero_remover(df_train)
df_test = zero_remover(df_test)
df_train.head(6)

In [None]:
df_train = position_adder(df_train)
df_test = position_adder(df_test)
df_train.head(6)

In [None]:
df_train.columns

In [None]:
df_train['f_27'].head(6)

In [None]:
df_train = df_train.drop('f_27',axis=1)
df_test = df_test.drop('f_27',axis=1)

In [None]:
df_train.columns

In [None]:
df_train = df_train.set_index('id')
df_test = df_test.set_index('id')

train_columns = df_train.columns
test_columns = df_test.columns

In [None]:
scaler = StandardScaler()
le = preprocessing.LabelEncoder()

df_train = scaler.fit_transform(df_train)
df_test = scaler.fit_transform(df_test)

In [None]:
df_train = pd.DataFrame(data=df_train,columns=train_columns)
df_test = pd.DataFrame(data=df_test,columns=test_columns)

In [None]:
df_test.columns

In [None]:
df_test.describe()

In [None]:
model = models.Sequential()
model.add(layers.Dense(750, kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), activation="relu",input_shape=(63,)))
model.add(layers.BatchNormalization())
model.add(layers.Dense(512, kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), activation="relu"))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(200,kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), activation="relu"))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(60,kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), activation="relu"))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(16,kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4), activation="relu"))
model.add(layers.Dense(1,activation='sigmoid'))

model.summary()

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['AUC'])

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
model.fit(df_train, y_train, validation_split=0.35, shuffle=True, epochs=250,batch_size=2000,callbacks=[callback])

In [None]:
predis = model.predict(df_test)

In [None]:
df_sample_sub['target'] = predis

In [None]:
df_sample_sub.to_csv('NN_submission.csv', index=False)

## Thank You
