# Libraries

In [None]:
# Core
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
%matplotlib inline
from itertools import combinations

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler 

# Tensorflow
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks

# Data

In [None]:
train_data=pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv', index_col='id')
test_data=pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv', index_col='id')

test_index=test_data.index # save for submission

print(train_data.shape)
train_data.head()

**Check for null values**

In [None]:
print('Number of null values in training set:',train_data.isnull().sum().sum())
print('')
print('Number of null values in test set:',test_data.isnull().sum().sum())

**Labels and features**

In [None]:
# Labels
y=train_data.target

# Features
X=train_data.drop('target', axis=1)

**Feature correlations**

In [None]:
sns.heatmap(X.corr())

**Remove low variance columns**

In [None]:
fig, axes = plt.subplots(len(X.columns)//4, 4, figsize=(14, 52))

i = 0
for triaxis in axes:
    for axis in triaxis:
        X.hist(column = X.columns[i], bins = 100, ax=axis)
        plt.title(X.columns[i]+'\n')
        i = i+1

In [None]:
# This doesn't help
'''
# Remove 'spiky' columns
columns_to_drop=['f0','f2','f4','f9','f12','f16','f19','f20','f23','f24','f27','f28',
                'f30','f31','f32','f33','f35','f36','f39','f42','f44','f46','f48','f49',
                'f51','f52','f53','f56','f58','f59','f60','f61','f62','f63','f64','f68','f69',
                'f72','f73','f75','f76','f78','f79','f81','f83','f84','f86','f88','f89',
                'f90','f92','f93','f94','f95','f98']

# Loop over bad columns
for col in columns_to_drop:
    X.drop(col, axis=1, inplace=True)
    test_data.drop(col, axis=1, inplace=True)

# Shape and preview
print(X.shape)
X.head()
'''

**Scale data**

In [None]:
scaler = StandardScaler()
X=scaler.fit_transform(X)
test_data = scaler.transform(test_data)

**Break off validation set**

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X,y,train_size=0.9,
                                                             test_size=0.1,random_state=0)

# Model

In [None]:
# Define model
model = keras.Sequential([
    
    # hidden layer 1
    layers.Dense(units=256, activation='relu', input_shape=[X.shape[1]]),
    layers.Dropout(rate=0.3),
    
    # hidden layer 2
    layers.Dense(units=256, activation='relu'),
    layers.Dropout(rate=0.3),
    
    # hidden layer 3
    layers.Dense(units=128, activation='relu'),
    layers.Dropout(rate=0.2),
    
    # output layer
    layers.Dense(units=1, activation='sigmoid')
])

# Define loss, optimizer and metric
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

# Define early stopping callback
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.0001,
    restore_best_weights=True,
)

**Train model**

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=500,
    epochs=150,
    callbacks=[early_stopping],
    verbose=True
)

**Learning curves**

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy")

print('Final accuracy on validation set:', 
      history_df.loc[len(history_df)-1,'val_binary_accuracy'])

# Make predictions

In [None]:
preds=model.predict(test_data)
pred_classes = np.round(np.squeeze(preds),0).astype(int)

# Save predictions to file
output = pd.DataFrame({'id': test_index,
                       'target': pred_classes})

# Check format
output.head()

In [None]:
output.to_csv('submission.csv', index=False)
