In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras
from keras import layers

In [None]:
train=pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/train.csv')
test=pd.read_csv('/kaggle/input/tabular-playground-series-may-2022/test.csv')
sample=pd.read_csv("/kaggle/input/tabular-playground-series-may-2022/sample_submission.csv")


In [None]:
train.info()

In [None]:
test.info()

In [None]:
train.describe()

In [None]:
enc = OrdinalEncoder()
def transform_f27(df):
    df=df.copy()
    df['char']=df['f_27'].apply(lambda x: len(set(x)))
    for i in range(df.f_27.str.len().max()):
        df['f_27_char{}'.format(i+1)]=enc.fit_transform(df['f_27'].str.get(i).values.reshape(-1,1))
    return df.drop(['f_27'],axis=1)


In [None]:
train_df=transform_f27(df=train)
test_df=transform_f27(df=test)

In [None]:
train['target'].value_counts()

In [None]:
corr = train_df.corr().round(1)
plt.figure(figsize=(30,20))
sns.heatmap(corr, vmin=-1, vmax=1, center=0, square=False, annot=True, cmap='viridis')
plt.show()

In [None]:
X=train_df.drop(['id','target'],axis=1)
y = train_df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.04,random_state=10)
#scaler = MinMaxScaler()
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = keras.Sequential([
  layers.Input(shape=(41,)),

  layers.Dense(256, activation='sigmoid'),
  layers.BatchNormalization(),
  tf.keras.layers.Dropout(0.01),
 
  layers.Dense(256, activation='relu'),
  layers.BatchNormalization(),
  
  
  layers.Dense(256, activation='relu'),
  layers.BatchNormalization(),
  
 
  layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer = 'adam',
    loss='binary_crossentropy',
    metrics = ['AUC']
)

model.summary()

In [None]:
history = model.fit(X_train, y_train,
                    verbose = 1,
                    batch_size = 500,
                    epochs = 10,
                    shuffle=False,
                    max_queue_size=10,
                    validation_data = (X_test, y_test))

In [None]:
epochs = range(len(history.history['auc']))
plt.plot(epochs, history.history['auc'], 'r', label='Training AUC')
plt.plot(epochs, history.history['val_auc'], 'b', label='Validation AUC')
plt.title('Training and validation auc')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25,random_state=42)
#scaler = MinMaxScaler()
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


params = {'n_estimators': 150,
          'max_depth': 20,
          'learning_rate': 0.1,
          'subsample': 0.95,
          'colsample_bytree': 0.60,
          'reg_lambda': 1.50,
          'reg_alpha': 0.1,
          'gamma': 2.40,
          'random_state': 64,
          'objective': 'binary:logistic',
          'tree_method': 'hist',
         }

In [None]:
xgb_class = xgb.XGBClassifier(**params)
xgb_class.fit(X_test, y_test, eval_set = [(X_test,y_test)], eval_metric = ['auc'], early_stopping_rounds = 128, verbose = 10)

In [None]:
val_test = test_df.drop(['id'], axis=1)
test = scaler.transform(val_test)
predict = model.predict(test)

In [None]:
sample['target'] = predict
sample.to_csv('submission.csv', index=False)
