## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential

from keras.layers import *
from datetime import datetime
import kerastuner as kt
from kerastuner import HyperModel
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import sklearn.metrics
from sklearn.model_selection import train_test_split

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score


#### + In an earlier notebook we did the [EDA](http://www.kaggle.com/saileshnair/tps202109-normal-and-quick-eda)
#### + In the next we loooked at machine learning algorithms and their comparison after feature transformations. [Algorithm Comparion+Transformation](https://www.kaggle.com/saileshnair/tps2021-feature-transformation)

#### In this notebook my attempt is to create an ANN model using HyperModel from keras tuner.

In [None]:
train=pd.read_csv("../input/tabular-playground-series-sep-2021/train.csv")
testdf=pd.read_csv("../input/tabular-playground-series-sep-2021/test.csv")
sub=pd.read_csv("../input/tabular-playground-series-sep-2021/sample_solution.csv")

In [None]:
train.head(2)

In [None]:
testdf.head(2)

In [None]:
sub.head(2)

In [None]:
features = train.columns[1:-1]
df=train[features].copy()

### Imputing missing values

In [None]:
si=SimpleImputer(strategy='median',copy=False)
si.fit_transform(df)
idf=pd.DataFrame(data=df,columns=features)

### Scaling Values

In [None]:
st=StandardScaler(copy = False)
st.fit_transform(idf)
stidf=pd.DataFrame(data=idf,columns=features)

In [None]:
X=stidf.values
Y=train[['claim']].values
testdf.drop(columns="id",inplace=True)

### Splitting the dataset

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.20,  random_state=21)

## ANN Algorithm Tuning using Hypermodel

In [None]:
class ANNHyperModel(HyperModel):
    
    def build(self, hp):
        model = tf.keras.Sequential()
        # Tune the number of units in the first Dense layer
        # Choose an optimal value between 32-512
        hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
        hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
        hp_units3 = hp.Int('units3', min_value=32, max_value=512, step=32)
        model.add(Dense(units=hp_units1, activation='relu'))
        model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))
        model.add(tf.keras.layers.Dense(units=hp_units3, activation='relu'))
        model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

        # Tune the learning rate for the optimizer
        # Choose an optimal value from 0.01, 0.001, or 0.0001
        hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
            loss='binary_crossentropy',
            metrics=[tf.keras.metrics.AUC()]
        )

        return model

hypermodel = ANNHyperModel()

tuner = kt.Hyperband(
    hypermodel,
    objective=kt.Objective("auc", direction="max"),
    max_epochs=10,
    factor=3,
    directory='keras_tuner_dir',
    project_name="Hyperband_TPS"
)

tuner.search(x_train, y_train, epochs=10, validation_split=0.2)

###  Finalize Model
###  Predictions on Validation dataset

In [None]:
best_model = tuner.get_best_models()[0]
best_model.build(x_train.shape)
best_model.summary()

In [None]:
best_model.fit(
    x_train, 
    y_train,
    epochs=50,
    batch_size=1024
)

In [None]:
y_pred=best_model.predict(x_test)
print(y_pred.shape,y_test.shape)

In [None]:
roc_auc_score(y_test,y_pred)

###  Predictions on Test dataset


In [None]:
si.transform(testdf)
st.transform(testdf)

In [None]:
test = testdf.values

In [None]:
pred = best_model.predict(test)
print(pred.shape)
pred

In [None]:
sub['claim'] = pred

In [None]:
sub.head()

###  Submission

In [None]:
sub.to_csv("submission_ann2.csv",index=False)