In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Exploring the dataset

In [None]:
df_tr=pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv',nrows=200000)
df_te=pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')

In [None]:
df_tr.head()

In [None]:
(df_tr.isnull().sum()*100/len(df_tr)).sort_values(ascending=False)

In [None]:
df_tr = df_tr.replace({pd.NA: np.nan})
df_te = df_te.replace({pd.NA: np.nan})

# Creating Training datasets and Imputing null-values

In [None]:
features = df_tr.columns.drop(['id','claim'])

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy = "median")
for col in features:
    df_tr[col] = imputer.fit_transform(np.array(df_tr[col]).reshape(-1,1))
    df_te[col] = imputer.transform(np.array(df_te[col]).reshape(-1,1))

# Scaling the datasets

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
for col in features:
    df_tr[col] = scaler.fit_transform(np.array(df_tr[col]).reshape(-1,1))
    df_te[col] = scaler.transform(np.array(df_te[col]).reshape(-1,1))

In [None]:
X = df_tr.drop(['id','claim'], axis=1)
Xte = df_te.drop('id', axis=1)
Y = df_tr["claim"]

# Feature-selection!

In [None]:
from sklearn.feature_selection import mutual_info_classif

In [None]:
mi_scores = mutual_info_classif(X,Y)
mi_scores = pd.Series(mi_scores, name="MI Scores",index=X.columns)
mi_scores = mi_scores.sort_values(ascending=False)

In [None]:
mi_scores.head(50).index

In [None]:
mi_fea=['f5', 'f51', 'f57', 'f31', 'f34', 'f99', 'f42', 'f6', 'f44', 'f74',
       'f46', 'f81', 'f40', 'f13', 'f70', 'f111', 'f50', 'f94', 'f32', 'f35',
       'f114', 'f102', 'f64', 'f87', 'f48', 'f106', 'f8', 'f12', 'f118',
       'f109', 'f18', 'f14', 'f71', 'f30', 'f72', 'f21', 'f117', 'f95', 'f25',
       'f110', 'f54', 'f68', 'f7', 'f86', 'f33', 'f36', 'f93', 'f90', 'f10',
       'f98']

# Building *ANN*

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import callbacks, layers

In [None]:
from sklearn.model_selection import train_test_split
xtr,xval,ytr,yval=train_test_split(X[mi_fea],Y,random_state=108,test_size=0.3)

In [None]:
model=keras.Sequential([
    layers.Dense(units=512,input_shape=(50,),activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(units=512,activation='relu'),
    layers.Dropout(0.32),
    layers.BatchNormalization(),
    layers.Dense(units=824,activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(units=1224,activation='relu'),
    layers.Dropout(0.32),
    layers.BatchNormalization(),
    layers.Dense(units=1024,activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(units=1,activation='sigmoid')
])
adam=keras.optimizers.Adam(
    learning_rate=0.001, beta_1=0.7, beta_2=0.977, epsilon=1e-07, amsgrad=True,
    name='Adam'
)
auc=keras.metrics.AUC(num_thresholds=25,summation_method='interpolation', curve='ROC')
model.compile(optimizer=adam,loss='binary_crossentropy',metrics=[auc,'binary_crossentropy'])
call=callbacks.EarlyStopping(patience=15,min_delta=0.0001,restore_best_weights=True)
history=model.fit(xtr,ytr,validation_data=(xval,yval),batch_size=50,callbacks=[call],epochs=50)

# Predicting the claims

In [None]:
ypred=model.predict(Xte[mi_fea])

In [None]:
sub=pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')

In [None]:
sub

In [None]:
ypred=pd.DataFrame(ypred)

In [None]:
df_te['id']=np.array(df_te['id']).reshape(-1,1)

In [None]:
ypred=ypred.reset_index()

In [None]:
df_te['id'].shape

In [None]:
submission=pd.DataFrame({'id':df_te['id'],'claim':ypred[0]})

# Submitting!

In [None]:
submission.reset_index()
submission.to_csv('submission.csv',index=False)