In [None]:
import numpy as np
import pandas as pd
import string 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

# Shared Input Layer
import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model

import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings("ignore")

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv')
sub = pd.read_csv('../input/tabular-playground-series-mar-2021/sample_submission.csv')
target=df_train['target']

In [None]:
N_FOLDS=10
SEED=2021
batch_size=1024
learning_rate=2*1e-3

In [None]:
fet_cat_list   = df_train.select_dtypes(include='object').columns.tolist()
fet_num_list   = df_train.select_dtypes(include='float64').columns.tolist()
fet_cat_long   = ['cat5','cat7','cat8','cat10']
fet_cat_short  = [cat for cat in fet_cat_list if cat not in fet_cat_long]

In [None]:
#remove outlier and concatenate train and test.
df_train.loc[df_train['cat5'] == 'ZZ','cat5']='A'
df_test.loc[df_test['cat5'] == 'ZZ','cat5']='A'

for feat in fet_num_list:
    df_test[feat] = np.clip(df_test[feat], df_train[feat].min(), df_train[feat].max())
    
data = pd.concat([df_train, df_test]).reset_index(drop=True)

In [None]:
#Ordinal Encoding.
scii_letters_list_s=list(string.ascii_uppercase)
scii_letters_list_l=scii_letters_list_s + [i+j for i in scii_letters_list_s for j in scii_letters_list_s]

map_ord_short = dict(zip(scii_letters_list_s,range(0, len(scii_letters_list_s))))
map_ord_long = dict(zip(scii_letters_list_l,range(0, len(scii_letters_list_l))))

for cat in fet_cat_long:
    data[cat] = data[cat].replace(map_ord_long).astype('int16')
    
for cat in fet_cat_short:
    data[cat] = data[cat].replace(map_ord_short).astype('int16')

data[fet_cat_list].head(3)

In [None]:
#Extra Features.
data['cat16_cat17'] = data['cat16']+data['cat17'] + data['cat13']   
data['cat15_cat18'] = data['cat15']+data['cat18'] + data['cat14']   
data['fet_ext0']    = data['cat16_cat17']-data['cat15_cat18']
data['fet_ext0']    = data['fet_ext0'] + abs(data['fet_ext0'].min())

data['fet_ext1']=data['cat2']+data['cat11']
fet_ext_list=['fet_ext0','fet_ext1']

feat_all_list = fet_cat_list+fet_num_list

In [None]:
#Scale numerical Features.
scaler = MinMaxScaler(feature_range=(0, 64))
data[fet_num_list]=pd.DataFrame(scaler.fit_transform(data[fet_num_list]), columns=fet_num_list).round().astype('int16')
data[fet_num_list].head(3)

In [None]:
#Convert integers to binary.
binary_df=pd.DataFrame()
for col in feat_all_list:
    temp=data[col].apply(lambda x: list(format(int(x), '09b')))
    colnames=[f'{col}_B_{i}' for i in range(9)]
    binary_df=pd.concat([binary_df, pd.DataFrame(temp.tolist(),columns=colnames).astype('int8')],axis=1)
    
binary_df.head()  

In [None]:
img_rows, img_cols = 9, 30
channnels=1

for i in range(5):
    fig = plt.figure()
    plt.imshow(binary_df.iloc[i].values.reshape(img_rows, img_cols))
    plt.title('row_{} target_{} '.format(i,target[i]))
    plt.show()

In [None]:
# Reshape image in 3 dimensions.
def data_prep(raw,channnels):
    num_images = raw.shape[0]
    shaped_array = raw.values.reshape(num_images, img_rows, img_cols, channnels)
    return shaped_array

In [None]:
def make_model(img_rows,img_cols,channnels):    
    
    input_model = tf.keras.layers.Input(shape=(img_rows,img_cols,channnels))
    
    ########################################################
    
    conv1 = tf.keras.layers.Conv2D(32, kernel_size=3,padding='same', activation='relu')(input_model)
    conv1 = tf.keras.layers.Conv2D(32, kernel_size=3,strides=2, padding='same', activation='relu')(conv1)
    conv1 = tf.keras.layers.BatchNormalization()(conv1)
    conv1 = tf.keras.layers.Dropout(0.6)(conv1)
    flat_c = tf.keras.layers.Flatten()(conv1) 
        
    hidden = tf.keras.layers.Dense(300, activation='relu')(flat_c)
    hidden = tf.keras.layers.Dropout(0.2)(hidden) 
    hidden = tf.keras.layers.BatchNormalization()(hidden)      
    
    output = tf.keras.layers.Dense(1, activation="sigmoid")(hidden)     

    model = tf.keras.Model(inputs=input_model, outputs=output)   
  
    
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate),
                  loss = tf.keras.losses.BinaryCrossentropy(), 
                  metrics = tf.keras.metrics.BinaryAccuracy(name='ba'))    
    
                                   

    return model

In [None]:
model=make_model(img_rows,img_cols,channnels)
plot_model(model, show_shapes=True, show_layer_names=True, to_file='model.png')
from IPython.display import Image
Image(retina=True, filename='model.png')

In [None]:
train=data_prep(binary_df[:len(df_train)],channnels)
test=data_prep(binary_df[len(df_train):],channnels)

oof= np.zeros((len(train)))
preds = 0   


es = tf.keras.callbacks.EarlyStopping(monitor='val_ba', patience=6,min_delta=1e-4,
                                 verbose=0, mode='max', baseline=None, restore_best_weights=True)

rlr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_ba', factor=0.5,min_delta=1e-4,
                                      patience=2, min_lr=1e-8, mode='max', verbose=0)


skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)


for fold, (train_idx, valid_idx) in enumerate(skf.split(train,target)):
    X_train, X_valid = train[train_idx], train[valid_idx]
    y_train, y_valid = target.iloc[train_idx], target.iloc[valid_idx]

    model=make_model(img_rows,img_cols,channnels)
    
    model.fit(X_train,y_train,
              validation_data =(X_valid,y_valid),
              epochs = 50, 
            batch_size = batch_size, 
              callbacks = [rlr,es], 
              verbose = 0) 
    
    oof[valid_idx] = model.predict(X_valid).ravel()
    oof_score=roc_auc_score(y_valid,oof[valid_idx])
    print(f'Fold_{fold} roc_auc_score: {oof_score}')
    preds+=model.predict(test).ravel()/skf.n_splits

      
#0.891843680760386

In [None]:
print(f'oof roc_auc_score: {roc_auc_score(target, oof)}')
#oof roc_auc_score: 0.8919673830957107

In [None]:
sub['target']=preds
sub.to_csv('submission.csv', index=False)