'''
Credit to this kernel:
https://www.kaggle.com/remidi/neural-compression-auto-encoder-lb-0-55/code

I do some change and make it work for Santander . It is my first time to use denoising autoencoder.
Please provide feedback and upvote if you like it :)
'''

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
from sklearn.linear_model import ElasticNetCV, LassoLarsCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline, make_union
from sklearn.utils import check_array
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection
from sklearn.decomposition import PCA, FastICA
from sklearn.decomposition import TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import NMF
from sklearn.cluster import FeatureAgglomeration
import scipy
from sklearn.ensemble import RandomForestRegressor
import random

In [2]:
import torch
import random

seed = 71

def seed_numpy_and_pytorch(s):
    random.seed(s)
    os.environ['PYTHONHASHSEED'] = str(s)
    np.random.seed(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed(s)
    torch.backends.cudnn.deterministic = True
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

seed_numpy_and_pytorch(seed)

In [3]:
import warnings
warnings.filterwarnings('ignore')


class StackingEstimator(BaseEstimator, TransformerMixin):
    
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, y=None, **fit_params):
        self.estimator.fit(X, y, **fit_params)
        return self
    def transform(self, X):
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            X_transformed = np.hstack((self.estimator.predict_proba(X), X))

        # add class prodiction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed

train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

mix = pd.concat([train, test])

mix.drop(['var_90', 'var_161', 'var_88', 'var_100'], 1)

# copy from https://www.kaggle.com/mathormad/knowledge-distillation-with-nn-rankgauss
class GaussRankScaler():

    def __init__( self ):
        self.epsilon = 1e-9
        self.lower = -1 + self.epsilon
        self.upper =  1 - self.epsilon
        self.range = self.upper - self.lower

    def fit_transform( self, X ):

        i = np.argsort( X, axis = 0 )
        j = np.argsort( i, axis = 0 )

        assert ( j.min() == 0 ).all()
        assert ( j.max() == len( j ) - 1 ).all()

        j_range = len( j ) - 1
        self.divider = j_range / self.range

        transformed = j / self.divider
        transformed = transformed - self.upper
        transformed = scipy.special.erfinv( transformed )
        ############
        # transformed = transformed - np.mean(transformed)

        return transformed

target_col='target'
id_col='ID_code'
submission = pd.read_csv('../input/sample_submission.csv')
id_test = submission[id_col].values
# function for auto encoder with a compressed components n_comp = 12
def neural_compression_v2(train, test):
    dataset = pd.concat([train.drop(target_col, axis=1), test], axis=0)
    ids = dataset[id_col]
    dataset.drop(id_col, axis=1, inplace=True)
    y_train = train[target_col]
    
    cat_vars = [c for c in dataset.columns if dataset[c].dtype == 'object']
    for c in cat_vars:
        t_data = pd.get_dummies(dataset[c], prefix=c)
        dataset = pd.concat([dataset, t_data], axis=1)

    dataset.drop(cat_vars, axis=1, inplace=True)
    # We scale both train and test data so that our NN works better.
    sc = StandardScaler()
#     sc = GaussRankScaler()# Gauss Rank does not work...
    sc.fit_transform(dataset)

    dataset = sc.fit_transform(dataset)

    train = dataset[:train.shape[0]]
    test = dataset[train.shape[0]:]

    print("one hot encoded train shape :: {}".format(train.shape))
    print("one hot encoded test shape :: {}".format(test.shape))
    
    ''' neural network compression code '''
    
    import keras
    from keras import regularizers
    from keras.layers import Input, Dense,BatchNormalization,Dropout
    from keras.models import Model
    from keras.regularizers import l2
    # adding some noise to data before feed them to nn
    train = train + 0.5 * np.random.normal(loc=0.0, scale=1.0, size=train.shape) 
    test = test + 0.5 * np.random.normal(loc=0.0, scale=1.0, size=test.shape)
    l2_reg_embedding = 1e-5
    print(keras.__version__)
    init_dim = train.shape[1]

    input_row = Input(shape=(init_dim, ))
    encoded = Dense(512, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(input_row)
    encoded = Dense(256, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(encoded)
    encoded = Dense(128, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(encoded)
    encoded = Dense(64, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(encoded)
    encoded = Dense(32, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(encoded)

    encoded = Dense(16, activation='elu')(encoded)
    
    decoded = Dense(32, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(encoded)
    decoded = Dense(64, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(decoded)
    decoded = Dense(128, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(decoded)
    decoded = Dense(256, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(decoded)
    decoded = Dense(512, activation='elu',kernel_regularizer=l2(l2_reg_embedding))(decoded)
    decoded = Dense(init_dim, activation='sigmoid')(decoded)

    autoencoder = Model(inputs=input_row, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
    #we use the train data to train
    autoencoder.fit(train, train,
                    batch_size=512,verbose=2,
                    shuffle=True, validation_data=(test, test), epochs=4)

    # compressing the data
    encoder = Model(inputs=input_row, outputs=encoded)
    train_compress = encoder.predict(train,batch_size=4096)
    test_compress = encoder.predict(test,batch_size=4096)

    # denoising the data
    denoised_train = autoencoder.predict(train,batch_size=4096)
    denoised_test = autoencoder.predict(test,batch_size=4096)
    
    return train_compress, test_compress, denoised_train, denoised_test

train_compress, test_compress, denoised_train, denoised_test = neural_compression_v2(train, test)



for c in train.columns:
    if train[c].dtype == 'object':
        lbl = LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))
        

n_comp = 12
# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train.drop([target_col], axis=1))
ica2_results_test = ica.transform(test)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train.drop([target_col], axis=1))
grp_results_test = grp.transform(test)

# SRP
srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
srp_results_train = srp.fit_transform(train.drop([target_col], axis=1))
srp_results_test = srp.transform(test)

# FAG
fag = FeatureAgglomeration(n_clusters=n_comp, linkage='ward')
fag_results_train = fag.fit_transform(train.drop([target_col], axis=1))
fag_results_test = fag.transform(test)

usable_columns = list(set(train.columns) - set([target_col]))

# Append decomposition components to datasets
for i in range(1, n_comp + 1):

    train['ica_' + str(i)] = ica2_results_train[:, i - 1]
    test['ica_' + str(i)] = ica2_results_test[:, i - 1]

    train['grp_' + str(i)] = grp_results_train[:, i - 1]
    test['grp_' + str(i)] = grp_results_test[:, i - 1]

    train['srp_' + str(i)] = srp_results_train[:, i - 1]
    test['srp_' + str(i)] = srp_results_test[:, i - 1]
    
    train['fag_' + str(i)] = fag_results_train[:, i - 1]
    test['fag_' + str(i)] = fag_results_test[:, i - 1]

for j in range(1, train_compress.shape[1]):
    train['aen_' + str(j)] = train_compress[:, j-1]
    test['aen_' + str(j)] = test_compress[:, j-1]
    train['aen_' + str(j)] = denoised_train[:, j-1]
    test['aen_' + str(j)] = denoised_test[:, j-1]
    
    
    


y = train[target_col].values


# finaltrainset and finaltestset are data to be used only the stacked model (does not contain PCA, SVD... arrays) 
finaltrainset = train[usable_columns].values
finaltestset = test[usable_columns].values

#--training & test stratified split
np.savetxt('finaltrainset_dae2.csv',finaltrainset,delimiter=',')
np.savetxt('finaltrestset_dae2.csv',finaltestset,delimiter=',')

one hot encoded train shape :: (200000, 200)
one hot encoded test shape :: (200000, 200)


Using TensorFlow backend.


2.2.4
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 200000 samples, validate on 200000 samples
Epoch 1/4
 - 19s - loss: -8.6396e-01 - val_loss: -1.1600e+00
Epoch 2/4
 - 19s - loss: -1.3341e+00 - val_loss: -1.4469e+00
Epoch 3/4
 - 19s - loss: -1.5692e+00 - val_loss: -1.6171e+00
Epoch 4/4
 - 18s - loss: -1.7226e+00 - val_loss: -1.7739e+00


It seems that stacked_pipeline make performance worse. 