In [None]:
import sys
sys.path.append('../input/deeptables')

import os, gc
import numpy as np
import pandas as pd
import string 
from sklearn.model_selection import StratifiedKFold 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score


from deeptables.models.deeptable import DeepTable, ModelConfig
from deeptables.models.deepnets import DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
SEED=2021
epochs=20
batch_size=128
N_FOLDS=5

In [None]:
def seed_everything(seed):
    #random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(SEED)

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv')
sub = pd.read_csv('../input/tabular-playground-series-mar-2021/sample_submission.csv')

fet_cat_list   = df_train.select_dtypes(include='object').columns.tolist()
fet_num_list   = df_train.select_dtypes(include='float64').columns.tolist()
fet_cat_long  = ['cat5','cat7','cat8','cat10']
fet_cat_short  = [cat for cat in fet_cat_list if cat not in fet_cat_long]

target=df_train.target

df_train=df_train.drop(['id','target'], axis=1)
df_test=df_test.drop(['id'], axis=1)

In [None]:
# clip numerical features values based on trainig values.
for feat in fet_num_list:
    df_test[feat] = np.clip(df_test[feat], df_train[feat].min(), df_train[feat].max())

In [None]:
# Concat Train and test
data = pd.concat([df_train, df_test]).reset_index(drop=True)

#Remove outlier in Feature cat5 andremove from training .
data.loc[data['cat5'] == 'ZZ','cat5']='A'

del df_train,df_test
gc.collect()

In [None]:
#Ordinal Encoding.
scii_letters_list_s=list(string.ascii_uppercase)
scii_letters_list_l=scii_letters_list_s + [i+j for i in scii_letters_list_s for j in scii_letters_list_s]

map_ord_short = dict(zip(scii_letters_list_s,range(0, len(scii_letters_list_s))))
map_ord_long = dict(zip(scii_letters_list_l,range(0, len(scii_letters_list_l))))

for cat in fet_cat_long:
    data[cat] = data[cat].replace(map_ord_long).astype('int16')
    
for cat in fet_cat_short:
    data[cat] = data[cat].replace(map_ord_short).astype('int8')

data[fet_cat_list].head(3)

In [None]:
#Extra Features form low catgorical.
data['fet_temp0'] = data['cat16']+data['cat17'] + data['cat13']   
data['fet_temp1'] = data['cat15']+data['cat18'] + data['cat14']   
data['fet_ext0']    = data['fet_temp0']-data['fet_temp1']
data['fet_ext0']    = data['fet_ext0'] + abs(data['fet_ext0'].min())

data['fet_ext1']=data['cat2']+data['cat11']
data['fet_ext2']=data['cat9']+data['cat12']
data['fet_ext3']=data['cat8']+data['cat10']

data=data.drop(['fet_temp0','fet_temp1'], axis=1)

In [None]:
#scale numerical Features.
scaler = MinMaxScaler(feature_range=(0, 60))
data[fet_num_list]=pd.DataFrame(scaler.fit_transform(data[fet_num_list]), columns=fet_num_list).round().astype('int16')
data[fet_num_list].head()

In [None]:
train=data[:len(target)]
test=data[len(target):]
oof_df=pd.DataFrame()
oof_preds=pd.DataFrame()

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##DeepFM

oof_DeepFM= np.zeros((len(train)))
preds_DeepFM = 0   



rlr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_auc', factor=0.1,
                                      patience=1, min_lr=1e-8, mode='max', verbose=1)


skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= DeepFM,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=2,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_DeepFM[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    preds_DeepFM+=dt_model.predict_proba(test).ravel()/skf.n_splits

print(f'oof_DeepFM ROC score :{roc_auc_score(target,oof_DeepFM)}')
oof_df['DeepFM']=oof_DeepFM
oof_preds['DeepFM']=preds_DeepFM

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##xDeepFM

oof_xDeepFM= np.zeros((len(train)))
preds_xDeepFM = 0   



skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= xDeepFM,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_xDeepFM[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    preds_xDeepFM+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_xDeepFM ROC score :{roc_auc_score(target,oof_xDeepFM)}')
oof_df['xDeepFM']=oof_xDeepFM
oof_preds['xDeepFM']=preds_xDeepFM

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##DCN

oof_DCN= np.zeros((len(train)))
preds_DCN = 0   




skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= DCN,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_DCN[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    preds_DCN+=dt_model.predict_proba(test).ravel()/skf.n_splits
    
print(f'oof_DCN ROC score :{roc_auc_score(target,oof_DCN)}')
oof_df['DCN']=oof_DCN
oof_preds['DCN']=preds_DCN

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##PNN

oof_PNN= np.zeros((len(train)))
preds_PNN = 0   



skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= PNN,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_PNN[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    print(f'Fold_{fold} ROC Score :{roc_auc_score(y_valid,oof_PNN[valid_idx])}')
    preds_PNN+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_PNN ROC score :{roc_auc_score(target,oof_PNN)}')
oof_df['PNN']=oof_PNN
oof_preds['PNN']=preds_PNN

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##WideDeep

oof_WideDeep= np.zeros((len(train)))
preds_WideDeep = 0   





skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= WideDeep,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_WideDeep[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    print(f'Fold_{fold} ROC Score :{roc_auc_score(y_valid,oof_WideDeep[valid_idx])}')
    preds_WideDeep+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_WideDeep ROC score :{roc_auc_score(target,oof_WideDeep)}')
oof_df['WideDeep']=oof_WideDeep
oof_preds['WideDeep']=preds_WideDeep

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##WideDeep

oof_AutoInt= np.zeros((len(train)))
preds_AutoInt = 0   




skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= AutoInt,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_AutoInt[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    print(f'Fold_{fold} ROC Score :{roc_auc_score(y_valid,oof_AutoInt[valid_idx])}')
    preds_AutoInt+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_AutoInt ROC score :{roc_auc_score(target,oof_AutoInt)}')
oof_df['AutoInt']=oof_AutoInt
oof_preds['AutoInt']=preds_AutoInt

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##AFM

oof_AFM= np.zeros((len(train)))
preds_AFM = 0   



skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= AFM,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_AFM[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    print(f'Fold_{fold} ROC Score :{roc_auc_score(y_valid,oof_AFM[valid_idx])}')
    preds_AFM+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_AFM ROC score :{roc_auc_score(target,oof_AFM)}')
oof_df['AFM']=oof_AFM
oof_preds['AFM']=preds_AFM

In [None]:
##DeepFM,xDeepFM,DCN,PNN,WideDeep,AutoInt,AFM,FGCNN

##AFM

oof_FGCNN= np.zeros((len(train)))
preds_FGCNN = 0   


skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train.iloc[train_idx], train.iloc[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]
    
    conf = ModelConfig(
                    nets= FGCNN,
                    categorical_columns=data.columns.tolist(), 
                    metrics=['AUC'], 
                    auto_categorize=False,
                    dnn_params={
                                'hidden_units':((300, 0.3, True),(300, 0.3, True),), #hidden_units
                                'dnn_activation':'relu'},
    
                    auto_discrete=False,
                    earlystopping_patience=3,
                    embeddings_output_dim=20,
                    embedding_dropout=0.3)
    
    dt_model = DeepTable(config=conf)

    dt_model.fit(X_train, y_train, 
            validation_data =(X_valid,y_valid),
            epochs=epochs, 
            batch_size = batch_size, 
            callbacks = [rlr],
            verbose=0)

    
    oof_FGCNN[valid_idx] = dt_model.predict_proba(X_valid).ravel()
    print(f'Fold_{fold} ROC Score :{roc_auc_score(y_valid,oof_FGCNN[valid_idx])}')
    preds_FGCNN+=dt_model.predict_proba(test).ravel()/skf.n_splits
    

print(f'oof_FGCNN ROC score :{roc_auc_score(target,oof_FGCNN)}')
oof_df['FGCNN']=oof_FGCNN
oof_preds['FGCNN']=preds_FGCNN

In [None]:
oof_df['All_models']=oof_df.mean(axis=1)
print(f'ALL_models ROC score :{roc_auc_score(target,oof_df.All_models)}')

In [None]:
sub['target']=oof_preds.mean(axis=1)
sub.to_csv('submission-Deeptables-test.csv', index=False)