# Classification with Gated Residual and Variable Selection Networks
Description: Using Gated Residual and Variable Selection Networks for income level prediction.</br>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import gc
gc.enable()
import time
import random
import warnings

import feather
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn import svm
from sklearn import tree
from sklearn import impute
from sklearn import metrics
from sklearn import ensemble
from sklearn import linear_model
from sklearn import decomposition
from sklearn import preprocessing
from sklearn import model_selection

warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('float_format', '{:f}'.format)

sns.set_style("darkgrid")
mpl.rcParams['figure.dpi'] = 600
%matplotlib inline


In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import callbacks

import tensorflow as tf
import random
import os

from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer, MinMaxScaler

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import time
from sklearn import model_selection

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import math

In [None]:
TRN_PATH  = '/kaggle/input/tabular-playground-series-nov-2021/train.csv'
TST_PATH  = '/kaggle/input/tabular-playground-series-nov-2021/test.csv'
SUB_PATH  = '/kaggle/input/tabular-playground-series-nov-2021/sample_submission.csv'

In [None]:
SEED = 42
def seed_everything(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seed_everything(SEED)

In [None]:
# Read the dataset from the specified train_path...
trn = pd.read_csv(TRN_PATH)

# Read the dataset from the specified train_path...
tst = pd.read_csv(TST_PATH)

# Read the dataset from the specified train_path...
sub = pd.read_csv(SUB_PATH)

In [None]:
trn.info()

In [None]:
trn.head()

In [None]:
trn.describe()

In [None]:
TARGET = 'target'
FEATURES = [col for col in trn.columns if col not in ['id', TARGET]]

In [None]:
# Remove the id and the targets from the train and test datasets...
train = trn.drop(['id', 'target'], axis = 1)
test  = tst.drop(['id',], axis = 1)

# Extract the target variable from the dataset...
target = trn['target']

In [None]:
def create_stat_features(df):
    '''Creates multiple statistical features.'''
    df['f_mean'] = df.mean(axis=1)
    df['f_std']  = df.std(axis=1)
    df['f_skew'] = df.skew(axis=1)
    df['f_max']  = df.min(axis=1)
    df['f_min']  = df.max(axis=1)
    df['f_var']  = df.var(axis=1)
    df['f_med']  = df.median(axis=1)
    df['f_mad']  = df.mad(axis=1)

    
    return df

train = create_stat_features(train)
test = create_stat_features(test)

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

for col in FEATURES:
    trn[col] = scaler.fit_transform(trn[col].to_numpy().reshape(-1,1))
    tst[col] = scaler.transform(tst[col].to_numpy().reshape(-1,1))
    
X = trn[FEATURES].to_numpy().astype(np.float32)
Y = trn[TARGET].to_numpy().astype(np.float32)
X_test = tst[FEATURES].to_numpy().astype(np.float32)

#del train_df, test_df
#gc.collect()

In [None]:
def create_model_inputs():
    inputs = {}
    for feature_name in FEATURES:
        inputs[feature_name] = layers.Input(
            name=feature_name, shape=(), dtype=tf.float32
        )
    return inputs

In [None]:
def encode_inputs(inputs, encoding_size):
    encoded_features = []
    for i in range(inputs.shape[1]):
        encoded_feature = tf.expand_dims(inputs[:, i], -1)
        encoded_feature = layers.Dense(units=encoding_size)(encoded_feature)
        encoded_features.append(encoded_feature)
    return encoded_features   

In [None]:
# Creates the units to be used in the model...
class GatedLinearUnit(layers.Layer):
    def __init__(self, units):
        super(GatedLinearUnit, self).__init__()
        self.linear = layers.Dense(units)
        self.sigmoid = layers.Dense(units, activation="sigmoid")

    def call(self, inputs):
        return self.linear(inputs) * self.sigmoid(inputs)

In [None]:
class GatedResidualNetwork(layers.Layer):
    def __init__(self, units, dropout_rate):
        super(GatedResidualNetwork, self).__init__()
        self.units = units
        self.elu_dense = layers.Dense(units, activation="elu")
        self.linear_dense = layers.Dense(units)
        self.dropout = layers.Dropout(dropout_rate)
        self.gated_linear_unit = GatedLinearUnit(units)
        self.layer_norm = layers.LayerNormalization()
        self.project = layers.Dense(units)

    def call(self, inputs):
        x = self.elu_dense(inputs)
        x = self.linear_dense(x)
        x = self.dropout(x)
        if inputs.shape[-1] != self.units:
            inputs = self.project(inputs)
        x = inputs + self.gated_linear_unit(x)
        x = self.layer_norm(x)
        return x

In [None]:
class VariableSelection(layers.Layer):
    def __init__(self, num_features, units, dropout_rate):
        super(VariableSelection, self).__init__()
        self.grns = list()
        # Create a GRN for each feature independently
        for idx in range(num_features):
            grn = GatedResidualNetwork(units, dropout_rate)
            self.grns.append(grn)
        # Create a GRN for the concatenation of all the features
        self.grn_concat = GatedResidualNetwork(units, dropout_rate)
        self.softmax = layers.Dense(units=num_features, activation="softmax")

    def call(self, inputs):
        v = layers.concatenate(inputs)
        v = self.grn_concat(v)
        v = tf.expand_dims(self.softmax(v), axis=-1)

        x = []
        for idx, input in enumerate(inputs):
            x.append(self.grns[idx](input))
        x = tf.stack(x, axis=1)

        outputs = tf.squeeze(tf.matmul(v, x, transpose_a=True), axis=1)
        return outputs

In [None]:
def create_model(encoding_size, dropout_rate=0.15):
    inputs = layers.Input(len(FEATURES))
    feature_list = encode_inputs(inputs, encoding_size)
    num_features = len(feature_list)

    features = VariableSelection(num_features, encoding_size, dropout_rate)(
        feature_list
    )

    outputs = layers.Dense(units=1, activation="sigmoid")(features)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
def format_time(seconds):
    """
    Formates time in human readable form

    Args:
        seconds: seconds passed in a process
    Return:
        formatted string in form of MM:SS or HH:MM:SS
    """
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = int(seconds % 60)
    result = ''
    _h = ('0' + str(h)) if h < 10 else str(h)
    result += (_h + ' hr ') if h > 0 else ''
    _m = ('0' + str(m)) if m < 10 else str(m)
    result += (_m + ' min ') if m > 0 else ''
    _s = ('0' + str(s)) if s < 10 else str(s)
    result += (_s + ' sec')
    return result

In [None]:
from collections import defaultdict
import tensorflow as tf

oof_df = defaultdict(lambda : [])
test_df = defaultdict(lambda : np.zeros((X_test.shape[0])))

N_FOLDS = 5
ENCODING_SIZE = 32
EPOCHS = 250
VERBOSE = 1
BATCH_SIZE = 2048
start = time.time()

skfolds = model_selection.StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

for fold, (t, v) in enumerate(skfolds.split(X, Y)):
    x_train, x_val = X[t], X[v]
    y_train, y_val = Y[t], Y[v]
    
    oof_df[TARGET].extend(y_val)
    print(f"\n{'-'*15} FOLD-{fold} {'-'*15}")
    
    tic = time.time()
    
#     clf = tf.keras.Sequential([
#         tf.keras.layers.Input(len(FEATURES)),
#         tf.keras.layers.Dense(256, activation='swish'),
#         tf.keras.layers.Dropout(0.3),
#         tf.keras.layers.Dense(128, activation='swish'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(64, activation='swish'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(32, activation='swish'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(16, activation='swish'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(1, activation='sigmoid'),
#     ])
    clf = create_model(ENCODING_SIZE)
    
    clf.compile(loss='binary_crossentropy', 
                optimizer='adam', 
                metrics=[tf.keras.metrics.AUC(name='auc'), 'acc'])
    
    lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)

    es = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=15, 
                       verbose=VERBOSE, mode="min", 
                       restore_best_weights=True)
    
    clf.fit(x_train, y_train, 
            epochs=EPOCHS, batch_size=BATCH_SIZE,
            validation_data=(x_val, y_val),
            validation_batch_size=len(y_val),
            callbacks=[es, lr],
            shuffle=True,
            verbose=VERBOSE)
    
    preds = np.squeeze(clf.predict(x_val, batch_size=len(y_val)))
    oof_df[f'nn'].extend(preds)
    test_df[f'nn'] += (np.squeeze(clf.predict(X_test, batch_size=BATCH_SIZE) / N_FOLDS))

    score = metrics.roc_auc_score(y_val, preds)
    print(f"MODEL: nn\tSCORE: {score}\tTIME: {format_time(time.time()-tic)}")

    del clf
    gc.collect()
        
    del x_train, x_val, y_train, y_val
    gc.collect()
        
oof_df = pd.DataFrame(oof_df)
test_df = pd.DataFrame(test_df)

print()
print(f'TOTAL TIME: {format_time(time.time() - start)}')

In [None]:
score = metrics.roc_auc_score(oof_df[TARGET], oof_df['nn'])
print(f'Overall ROC AUC of: {score}')

In [None]:
# Overall ROC AUC of: 0.7488767791332664
# Overall ROC AUC of: 0.7504944878993653
# Overall ROC AUC of: 0.7552784685463579

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')
submission[TARGET] = test_df['nn']
submission.to_csv('submission.csv', index=False)

submission.head()