# Import Libraries 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import seaborn as sns

from IPython.display import display
import datetime
import scipy.stats
import math
import random

import lightgbm as lgb

from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.calibration import CalibrationDisplay
from sklearn.preprocessing import StandardScaler, QuantileTransformer,LabelEncoder
from sklearn.linear_model import LinearRegression

from sklearn.calibration import calibration_curve, CalibratedClassifierCV

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Dense, Input, InputLayer, Add
from tensorflow.keras.utils import plot_model

In [None]:
CALIBRATION = True
SCALING = True

EPOCHS = 2000
EARLY_STOP = 30 

# Get competition data

In [None]:
train = pd.read_csv('../input/tabular-playground-series-may-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-may-2022/test.csv')
sub = pd.read_csv("../input/tabular-playground-series-may-2022/sample_submission.csv",index_col = 0)

# Feature Engineering 
This below model is modified version of public notebooks of @ambrosm. Consider upvoting the original work. <br>
Two additional features are introduced (Feature interaction between f_00, f_01 with f_26 ) <br>

In [None]:
features = [f for f in test.columns if f != 'id' and f != 'f_27']
float_features = [f for f in features if test[f].dtype == float]
for df in [train, test]:
    # Extract the 10 letters of f_27 into individual features
    for i in range(10):
        df[f'ch{i}'] = df.f_27.str.get(i).apply(ord) - ord('A')
        
    # unique_characters feature is from https://www.kaggle.com/code/cabaxiom/tps-may-22-eda-lgbm-model
    df["unique_characters"] = df.f_27.apply(lambda s: len(set(s)))
    
    # Feature interactions: create three ternary features
    # Every ternary feature can have the values -1, 0 and +1
    df['i_02_21'] = (df.f_21 + df.f_02 > 5.2).astype(int) - (df.f_21 + df.f_02 < -5.3).astype(int)
    df['i_05_22'] = (df.f_22 + df.f_05 > 5.1).astype(int) - (df.f_22 + df.f_05 < -5.4).astype(int)
    df['i_00_26'] = (df.f_00 + df.f_26 > 5.1).astype(int) - (df.f_00 + df.f_26 < -5.4).astype(int)  #addtional feature
    df['i_01_26'] = (df.f_01 + df.f_26 > 5.1).astype(int) - (df.f_01 + df.f_26 < -5.4).astype(int)  #addtional feature
    i_00_01_26 = df.f_00 + df.f_01 + df.f_26
    df['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)
    
features = [f for f in test.columns if f != 'id' and f != 'f_27']
float_features = [f for f in features if test[f].dtype == float]
int_features = [f for f in features if test[f].dtype == int and f.startswith('f')]
ch_features = [f for f in features if f.startswith('ch')]

In [None]:
# It has been noticed that in addtion to various features extracted in f_27, by label encoding the F_27 the results are improved slightly.
# there are several unique lables are noticed in f_27 column of test data set. Hence both train and test data is merged for label encoding.

le = LabelEncoder()
temp_df =(pd.concat([train.f_27, test.f_27],ignore_index=True)).to_frame() # dataframe co
temp_df['f_27']  = le.fit_transform(temp_df['f_27'])
test['f_27']  = le.transform(test['f_27'])
train['f_27'] = le.transform(train['f_27'])

# Get prediction data 

## Neural network data
This below model is modified version of public notebooks of @ambrosm. Consider upvoting the original work. The main differnce is number of Epchos and features 

In [None]:
X = train.drop(["target","f_27"],axis =1)
y= train["target"]

In [None]:
def my_model():
    """Simple sequential neural network with four hidden layers.
    
    Returns a (not yet compiled) instance of tensorflow.keras.models.Model.
    """
    activation = 'swish'
    inputs = Input(shape=(len(features)))
    x = Dense(64, kernel_regularizer=tf.keras.regularizers.l2(40e-6),
              activation=activation,
             )(inputs)
    x = Dense(64, kernel_regularizer=tf.keras.regularizers.l2(40e-6),
              activation=activation,
             )(x)
    x = Dense(64, kernel_regularizer=tf.keras.regularizers.l2(40e-6),
              activation=activation,
             )(x)
    x = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(40e-6),
              activation=activation,
             )(x)
    x = Dense(1, #kernel_regularizer=tf.keras.regularizers.l2(1e-6),
              activation='sigmoid',
             )(x)
    model = Model(inputs, x)
    return model

plot_model(my_model(), show_layer_names=False, show_shapes=True)

In [None]:
NN_EPOCHS =500
EPOCHS_COSINEDECAY = 150
CYCLES = 1
VERBOSE = 0 # set to 0 for less output, or to 2 for more output
DIAGRAMS = True
USE_PLATEAU = False
BATCH_SIZE = 2048
ONLY_FIRST_FOLD = False

# see https://keras.io/getting_started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
np.random.seed(1)
random.seed(1)
tf.random.set_seed(1)
features = [f for f in test.columns if f != 'id']
print(features)

In [None]:
def fit_model(X_tr, y_tr, X_va=None, y_va=None, run=0):


    global y_va_pred
    start_time = datetime.datetime.now()
    
    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_tr)
    
    if X_va is not None:
        X_va = scaler.transform(X_va)
        validation_data = (X_va, y_va)
    else:
        validation_data = None

    # Define the learning rate schedule and EarlyStopping
    lr_start=0.01
    if USE_PLATEAU and X_va is not None: # use early stopping
        epochs = NN_EPOCHS
        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.7, 
                               patience=4, verbose=VERBOSE)
        es = EarlyStopping(monitor="val_loss",
                           patience=12, 
                           verbose=1,
                           mode="min", 
                           restore_best_weights=True)
        callbacks = [lr, es, tf.keras.callbacks.TerminateOnNaN()]

    else: # use cosine learning rate decay rather than early stopping
        epochs = EPOCHS_COSINEDECAY
        lr_end = 0.0002
        def cosine_decay(epoch):
            # w decays from 1 to 0 in every cycle
            # epoch == 0                  -> w = 1 (first epoch of cycle)
            # epoch == epochs_per_cycle-1 -> w = 0 (last epoch of cycle)
            epochs_per_cycle = epochs // CYCLES
            epoch_in_cycle = epoch % epochs_per_cycle
            if epochs_per_cycle > 1:
                w = (1 + math.cos(epoch_in_cycle / (epochs_per_cycle-1) * math.pi)) / 2
            else:
                w = 1
            return w * lr_start + (1 - w) * lr_end

        lr = LearningRateScheduler(cosine_decay, verbose=0)
        callbacks = [lr, tf.keras.callbacks.TerminateOnNaN()]
        
    # Construct and compile the model
    model = my_model()
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_start),
                  metrics='AUC',
                  loss=tf.keras.losses.BinaryCrossentropy())

    # Train the model
    history = model.fit(X_tr, y_tr, 
                        validation_data=validation_data, 
                        epochs=epochs,
                        verbose=VERBOSE,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        callbacks=callbacks)

    history_list.append(history.history)
    callbacks, es, lr, history = None, None, None, None
    
    if X_va is None:
        print(f"Training loss: {history_list[-1]['loss'][-1]:.4f}")
    else:
        lastloss = f"Training loss: {history_list[-1]['loss'][-1]:.4f} | Val loss: {history_list[-1]['val_loss'][-1]:.4f}"
        
        # Inference for validation
        y_va_pred = model.predict(X_va, batch_size=len(X_va), verbose=0)
        #oof_list[run][val_idx] = y_va_pred
        test_preds = model.predict(scaler.transform(test[features]), batch_size=len(X_va), verbose=0)
        
        # Evaluation: Execution time, loss and AUC
        score = roc_auc_score(y_va, y_va_pred)
        
        print(f"Fold {run}.{fold} | {str(datetime.datetime.now() - start_time)[-12:-7]}"
              f" | {lastloss} | AUC: {score:.5f}")
        score_list.append(score)
    return model, scaler, y_va_pred, test_preds

In [None]:
print(f"{len(features)} features")
history_list = []
score_list = []
val_preds = []
test_preds = []
kf = KFold(n_splits=5)
for fold, (idx_tr, idx_va) in enumerate(kf.split(X,y)):
    X_tr = train.iloc[idx_tr][features]
    X_va = train.iloc[idx_va][features]
    y_tr = train.iloc[idx_tr].target
    y_va = train.iloc[idx_va].target
    
    model, scaler, y_va_pred, test_pred = fit_model(X_tr, y_tr, X_va, y_va)
    val_preds.extend(y_va_pred)
    test_preds.append(test_pred)
    
    if ONLY_FIRST_FOLD: 
        break 

print(f"OOF AUC:                       {np.mean(score_list):.5f}")

In [None]:
neural_test = pd.DataFrame(np.array(test_preds).mean(axis =0),columns = ["target"])
neural_test.to_csv("neural_network_test.csv")
neural_test

In [None]:
neural_vals = pd.DataFrame(val_preds,columns = ["target"])
neural_vals.to_csv("neural_network_train.csv")
neural_vals

In [None]:
roc_auc_score(train["target"],neural_vals )

## LightGBM data
Taken from my other https://www.kaggle.com/code/slythe/relative-features-w-lightgbm  <br>

In [None]:
lgb_vals = pd.read_csv("../input/relative-features-w-lightgbm/lgb_vals.csv",index_col = 0)
lgb_test = pd.read_csv("../input/relative-features-w-lightgbm/submission_csv.csv",index_col = 0)
lgb_vals

In [None]:
roc_auc_score(train["target"],lgb_vals )

## Pytorch Data

In [None]:
py_vals = pd.read_csv("../input/tps-may-pytorch-with-gpu/pytorch_vals.csv",index_col = 0)
py_test = pd.read_csv("../input/tps-may-pytorch-with-gpu/submission.csv",index_col = 0)
py_vals

In [None]:
roc_auc_score(train["target"],py_vals )

# Meta modelling 

In [None]:
#merging all the Level-0 prediction of the traing data
Meta_train =pd.DataFrame()
Meta_train.index=train.index
Meta_train['target']=train['target'].values
Meta_train['LGBM_train'] = lgb_vals["target"].values
Meta_train['neural_train'] = neural_vals["target"].values
Meta_train['pytorch_train'] = py_vals["target"].values
Meta_train.head(50)

In [None]:
roc_auc_score(train["target"],Meta_train['LGBM_train'] )

In [None]:
Meta_train.round()[Meta_train.round()['LGBM_train'] != Meta_train.round()['pytorch_train']]

In [None]:
Meta_test =pd.DataFrame()
Meta_test.index=test.index
Meta_test['LGBM_test'] = lgb_test["target"].values
Meta_test['neural_test'] = neural_test["target"].values
Meta_test['pytorch_test'] = py_test["target"].values
Meta_test.head(20)

In [None]:
Meta_test.round()[Meta_test.round()['LGBM_test'] != Meta_test.round()['neural_test']]

## Split and show

In [None]:
meta_features = Meta_test.columns 

X = Meta_train.drop("target",axis =1)
y = Meta_train['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state = 42)

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(Meta_train['LGBM_train'] ,color = "blue" , alpha = 0.8,label = "LGB")
sns.histplot(Meta_train['neural_train'],color = "red" , alpha = 0.5,label = "NN")
plt.title("Train meta data")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(Meta_test['LGBM_test'] ,color = "blue" , alpha = 0.8,label = "LGB")
sns.histplot(Meta_test['neural_test'],color = "red" , alpha = 0.5,label = "NN")
plt.title("Test meta data")
plt.legend()
plt.show()

## Linear Regression Meta Model

In [None]:
cv = KFold(n_splits = 5)

val_preds = []
lin_preds= []
auc_cv = []
for fold, (idx_train, idx_val) in enumerate(cv.split(X,y)):
    print("\n")
    print("#"*10, f"Fold: {fold}","#"*10)
    X_train , X_test = X.iloc[idx_train] , X.iloc[idx_val]
    y_train , y_test = y[idx_train] , y[idx_val]

    #scaling
    if SCALING:
        print("Scaling")
        qt = StandardScaler()
        X_train = qt.fit_transform(X_train)
        X_test = qt.transform(X_test)
        
        test_s = Meta_test.copy(deep = True)
        test_s = qt.transform(test_s)
    else:
        test_s = test.copy(deep = True)

    model = LinearRegression()        
    model.fit(X_train,y_train)

    val_pred = model.predict(X_test)
    val_preds.extend(val_pred)

    auc = roc_auc_score(y_test, val_pred)
    print("\n Validation AUC:" , auc)
    lin_preds.append(model.predict(test_s))

    auc_cv.append(auc)

print("FINAL AUC: ", np.mean(auc_cv))

In [None]:
sub_lin = sub.copy(deep=True)
sub_lin["target"] = np.array(lin_preds).mean(axis =0)
sub_lin.to_csv("submission_lin.csv")
sub_lin

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(sub_lin["target"])
plt.show()

## LightGBM Meta Model

In [None]:
def build_model( epochs = EPOCHS):
    
    model = lgb.LGBMClassifier(
    objective= 'binary',
    metric= "auc, binary_logloss, binary_error",
    num_iterations = epochs,
    num_threads= -1,
    learning_rate= 0.18319492258552644,
    boosting= 'gbdt',
    lambda_l1= 0.00028648667113792726,
    lambda_l2= 0.00026863027834978876,
    num_leaves= 229,
    max_depth= 0,
    min_child_samples=80,
    device = 'cpu',
    max_bins=511, 
    random_state=42 
    )
        
    return model

In [None]:

cv = KFold(n_splits = 5, shuffle = True,random_state=42)

val_preds = []
lgb_preds= []
auc_cv = []
for fold, (idx_train, idx_val) in enumerate(cv.split(X,y)):
    print("\n")
    print("#"*10, f"Fold: {fold}","#"*10)
    X_train , X_test = X.iloc[idx_train] , X.iloc[idx_val]
    y_train , y_test = y[idx_train] , y[idx_val]

    model = build_model()

    #scaling
    if SCALING:
        qt = QuantileTransformer(n_quantiles=1000, 
                         output_distribution='normal', 
                         random_state=42).fit(X_train)
        X_train = qt.transform(X_train)
        X_test = qt.transform(X_test)
        test_s = Meta_test.copy(deep = True)
        test_s = qt.transform(test_s)
    else:
        test_s = test.copy(deep = True)

    model.fit(X_train,y_train, eval_set=[(X_test,y_test)], callbacks = [lgb.early_stopping(EARLY_STOP)],eval_metric="auc")

    if CALIBRATION:
        calibrator = CalibratedClassifierCV(model, method = "isotonic", cv='prefit')
        calibrator.fit(X_test, y_test)

        val_pred = calibrator.predict_proba(X_test)[:, 1]
        val_preds.extend(val_pred)

        auc = roc_auc_score(y_test, val_pred)
        print("\n Calibration AUC:" , auc)
        lgb_preds.append(calibrator.predict_proba(test_s)[:, 1])
    else:

        val_pred = model.predict_proba(X_test)[:, 1]
        val_preds.extend(val_pred)

        auc = roc_auc_score(y_test, val_pred)
        print("\n Validation AUC:" , auc)
        lgb_preds.append(model.predict_proba(test_s)[:, 1])

    auc_cv.append(auc)

print("FINAL AUC: ", np.mean(auc_cv))

In [None]:
feat_importance = pd.DataFrame(model.feature_importances_, index  = meta_features, columns=["importance"])
feat_importance.plot(kind = 'bar', figsize = (20,7))

In [None]:
sub_lgb = sub.copy(deep=True)
sub_lgb["target"] = np.array(lgb_preds).mean(axis =0)
sub_lgb.to_csv("submission_lgb.csv")
sub_lgb

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(sub_lgb["target"])
plt.show()

# NN prediction 

In [None]:
print(f"{len(features)} features")

history_list = []
score_list = []
val_preds = []
test_preds = []
kf = KFold(n_splits=5)

for fold, (idx_tr, idx_va) in enumerate(kf.split(X,y)):
    X_tr = train.iloc[idx_tr][features]
    X_va = train.iloc[idx_va][features]
    y_tr = train.iloc[idx_tr].target
    y_va = train.iloc[idx_va].target
    
    model, scaler, y_va_pred, test_pred = fit_model(X_tr, y_tr, X_va, y_va)
    val_preds.extend(y_va_pred)
    test_preds.append(test_pred)

print(f"OOF AUC:                       {np.mean(score_list):.5f}")

In [None]:
sub_nn = sub.copy(deep=True)
sub_nn["target"] = np.array(test_preds).mean(axis =0)
sub_nn.to_csv("submission_nn.csv")
sub_nn

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(sub_nn["target"])
plt.show()

# Ensemble Meta predictions 

In [None]:
ensemble_df = pd.DataFrame()
ensemble_df["target1"] = sub_nn["target"]
ensemble_df["target2"] = sub_lgb["target"]
ensemble_df["target3"] = sub_lin["target"]
ensemble_df

In [None]:
sub_final = sub.copy(deep = True)
sub_final["target"] = ensemble_df.mean(axis =1).values
sub_final.to_csv("submission_ensemble.csv")
sub_final

In [None]:
plt.figure(figsize = (20,8))
sns.histplot(sub_final["target"])
plt.show()