In [1]:
# https://www.kaggle.com/hiro5299834/tps06-1d-2dcnn-xgb-as-output-layer/

# generates oof and pred for use in 02

In [2]:
!pip install tensorflow 



In [3]:
!pip install tensorflow_addons



In [5]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.4.2-py3-none-manylinux2010_x86_64.whl (166.7 MB)
[K     |████████████████████████████████| 166.7 MB 8.2 MB/s eta 0:00:012  |                                | 358 kB 5.9 MB/s eta 0:00:29     |▎                               | 1.3 MB 5.9 MB/s eta 0:00:28     |██████▊                         | 35.0 MB 10.8 MB/s eta 0:00:13     |██████▉                         | 35.6 MB 10.8 MB/s eta 0:00:13     |███████▋                        | 39.6 MB 9.3 MB/s eta 0:00:14     |█████████▊                      | 50.9 MB 10.2 MB/s eta 0:00:12     |█████████████████████████▋      | 133.7 MB 6.4 MB/s eta 0:00:06     |████████████████████████████▏   | 146.5 MB 10.0 MB/s eta 0:00:03     |████████████████████████████▋   | 148.8 MB 6.8 MB/s eta 0:00:03     |████████████████████████████▉   | 150.2 MB 6.8 MB/s eta 0:00:03     |███████████████████████████████▏| 162.3 MB 7.8 MB/s eta 0:00:01
Installing collected packages: xgboost
Successfully installed xgboost-1.4.2


In [25]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import datetime
import random
import os
import gc

from sklearn.metrics import log_loss, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow.keras import layers
from tensorflow.keras import Model

import xgboost as xgb

from scipy.optimize import minimize

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [26]:
CFG = {
    'debug': False,
    'target': 'target',
    'n_class': 9,
    'seed': 299792458,
    'seed_l': [299, 792, 458],
    'k': [5, 2],
    'n_clusters': [9, 2],
    'n_components': [9, 2],
    'emb_out_dim': 16,
    'sigma': [0.7, 0.9],
    'n_conv2d': [1, 3],
    'max_epochs': 100,
    'batch_size': 256,
    'learning_rate': 1e-3,
    'es_patience': 10,
    'lr_patience': 2,
    'lr_factor': 0.7,
    'n_splits': 10,
    'nn_verbose': 0,
    'save_path': './outputs/',
    'n_estimators': 3000,
    'early_stopping_rounds': 100,
    'gbt_verbose': 0,
}

if CFG['debug']:
    CFG['max_epochs'] = 2
    CFG['n_splits'] = 3

In [27]:
def seed_everything(seed=2021):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(CFG['seed'])

In [28]:
os.makedirs(CFG['save_path'], exist_ok=True)

## Datasets

In [29]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
submission = pd.read_csv("sample_submission.csv")

target_ohe = pd.get_dummies(train[CFG['target']])
target = train[CFG['target']].apply(lambda x: int(x.split("_")[-1])-1)
features = [col for col in train.columns if col.startswith('feature_')]

In [31]:
scaler = MinMaxScaler()
all_df = pd.concat([train, test]).reset_index(drop=True)
all_df = scaler.fit_transform(all_df[features])
train_2d = all_df[:train.shape[0]].reshape(-1, 5, 5, 3)
test_2d = all_df[train.shape[0]:].reshape(-1, 5, 5, 3)

train_km, train_pca, train_knn = [], [], []
test_km, test_pca, test_knn = [], [], []

for n_model in range(len(CFG['k'])):
    km = KMeans(n_clusters=CFG['n_clusters'][n_model], random_state=CFG['seed'])
    all_km = km.fit_transform(all_df)
    train_km.append(all_km[:train.shape[0]])
    test_km.append(all_km[train.shape[0]:])

    pca = PCA(n_components=CFG['n_components'][n_model], random_state=CFG['seed'])
    all_pca = pca.fit_transform(all_df)
    train_pca.append(all_pca[:train.shape[0]])
    test_pca.append(all_pca[train.shape[0]:])
    
    all_knn = np.concatenate([
        np.load(f"./knn_saved/add_feat_k{CFG['k'][n_model]}_train.npy"),
        np.load(f"./knn_saved/add_feat_k{CFG['k'][n_model]}_test.npy")
        ])
    all_knn = scaler.fit_transform(all_knn)
    train_knn.append(all_knn[:train.shape[0]])
    test_knn.append(all_knn[train.shape[0]:])

## Train and Pred

In [32]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

es_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric',
    min_delta=1e-05,
    patience=CFG['es_patience'],
    verbose=CFG['nn_verbose'],
    mode='min',
    baseline=None,
    restore_best_weights=True)

sch_cb = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric',
    factor=CFG['lr_factor'],
    patience=CFG['lr_patience'],
    min_lr=1e-5,
    verbose=CFG['nn_verbose'],
    mode='min')

In [33]:
emb_in_dim = pd.concat([train, test]).reset_index(drop=True)[features].max().max()+1
emb_out_dim = CFG['emb_out_dim']
emb_dims = [emb_in_dim, emb_out_dim]

## Model

In [34]:
def create_model(shape, emb_dims, sigma, n_conv2d, n_model):
    #--------------------------------------
    conv_inputs = layers.Input(shape=shape[0])
    conv2_inputs = layers.Input(shape=shape[1])
    knn_inputs = layers.Input(shape=shape[2])
    kms_inputs = layers.Input(shape=shape[3])
    pca_inputs = layers.Input(shape=shape[4])
    
    #----------- Embedding layers ----------------------
    embed = layers.Embedding(
        input_dim=emb_dims[0], 
        output_dim=emb_dims[1],
        embeddings_regularizer='l2'
        )(conv_inputs)

    #----------- Convolution1 layers ----------------------
    embed = layers.Conv1D(8, 1, activation='relu')(embed)
    embed = layers.Flatten()(embed)
    hidden_emb = layers.Dropout(0.4)(embed)

    #----------- Convolution2 layers ----------------------
    cnv2 = layers.SeparableConv2D(8, 3, padding='same', activation='relu')(conv2_inputs)
    cnv2 = layers.BatchNormalization()(cnv2)
    
    for _ in range(n_conv2d[n_model]-1):
        cnv2 = layers.SeparableConv2D(8, 3, padding='same', activation='relu')(cnv2)
        cnv2 = layers.BatchNormalization()(cnv2)

    cnv2 = layers.Flatten()(cnv2)
    hidden_cnv2 = layers.Dropout(0.4)(cnv2)

    #----------- Residual blocks layers ----------------------
    hidden_emb = tfa.layers.NoisyDense(units=16, sigma=sigma[n_model], activation='relu')(hidden_emb)
    hidden_emb = tfa.layers.WeightNormalization(
        layers.Dense(
            units=16,
            activation='relu',
            kernel_initializer='he_normal'
            ))(hidden_emb)
    
    hidden_cnv2 = tfa.layers.NoisyDense(units=16, sigma=sigma[n_model], activation='relu')(hidden_cnv2)
    hidden_cnv2 = tfa.layers.WeightNormalization(
        layers.Dense(
            units=16,
            activation='relu',
            kernel_initializer='he_normal'
            ))(hidden_cnv2)

    hidden = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden_emb, hidden_cnv2, knn_inputs, kms_inputs, pca_inputs]))
    hidden = tfa.layers.WeightNormalization(
        layers.Dense(
            units=16,
            activation='relu',
            kernel_initializer='he_normal'
        ))(hidden)
    hidden = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden_emb, hidden_cnv2, knn_inputs, kms_inputs, pca_inputs, hidden]))

    hidden2 = tfa.layers.WeightNormalization(
        layers.Dense(
            units=16,
            activation='relu',
            kernel_initializer='he_normal'
        ))(hidden)
    hidden2 = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden_emb, hidden_cnv2, knn_inputs, kms_inputs, pca_inputs, hidden, hidden2]))

    hidden_out= tfa.layers.WeightNormalization(
        layers.Dense(
            units=16,
            activation='relu',
            kernel_initializer='he_normal'
        ))(hidden2)

    #----------- Final layer -----------------------
    conv_outputs = layers.Dense(
        units=9, 
        activation='softmax',
        kernel_initializer='lecun_normal')(hidden_out)
    
    #----------- Model instantiation  ---------------
    model = Model([conv_inputs, conv2_inputs, knn_inputs, kms_inputs, pca_inputs], conv_outputs)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=tfa.optimizers.LazyAdam(learning_rate=CFG['learning_rate'], amsgrad=False), 
        metrics=custom_metric
    )
    
    #----------- Model instantiation  ---------------
    hidden_model = Model([conv_inputs, conv2_inputs, knn_inputs, kms_inputs, pca_inputs], hidden_out)
    
    return model, hidden_model

for i in range(len(CFG['k'])):
    shape = [
        train[features].shape[1],
        train_2d.shape[1:],
        CFG['k'][i]*CFG['n_class'],
        CFG['n_clusters'][i],
        CFG['n_components'][i]
    ]
    model, hidden_model = create_model(shape, emb_dims, CFG['sigma'], CFG['n_conv2d'], i)
    print(model.summary())

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 75)]         0                                            
__________________________________________________________________________________________________
input_7 (InputLayer)            [(None, 5, 5, 3)]    0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 75, 16)       5648        input_6[0][0]                    
__________________________________________________________________________________________________
separable_conv2d_1 (SeparableCo (None, 5, 5, 8)      59          input_7[0][0]                    
____________________________________________________________________________________________

In [35]:
xgb_params = {
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'max_depth': 6,
    'learning_rate': 5e-3,
    'colsample_bytree': 0.4,
    'subsample': 0.6,
    'reg_alpha': 6,
    'min_child_weight': 100,
    'n_jobs': -1,
    'num_class': CFG['n_class'],
    'seed': CFG['seed'],
    'tree_method': 'gpu_hist',
}

In [None]:
nn_oof = np.zeros((len(CFG['k']), len(CFG['seed_l']), train.shape[0], CFG['n_class']))
gbt_oof = np.zeros((len(CFG['k']), len(CFG['seed_l']), train.shape[0], CFG['n_class']))
nn_pred = np.zeros((len(CFG['k']), len(CFG['seed_l']), test.shape[0], CFG['n_class']))
gbt_pred = np.zeros((len(CFG['k']), len(CFG['seed_l']), test.shape[0], CFG['n_class']))
eval_fold_result = {}

for n_model in range(len(CFG['k'])):
    print(f"===== MODEL {n_model} cross validation =====")

    for n_seed, seed in enumerate(CFG['seed_l']):
    
        skf = StratifiedKFold(n_splits=CFG['n_splits'], shuffle=True, random_state=seed)

        for fold, (trn_idx, val_idx) in enumerate(skf.split(train, train[CFG['target']])):
            X_train, y_train_ohe, y_train = train[features].iloc[trn_idx], target_ohe.iloc[trn_idx], target.iloc[trn_idx]
            X_valid, y_valid_ohe, y_valid = train[features].iloc[val_idx], target_ohe.iloc[val_idx], target.iloc[val_idx]
            X_test = test[features]

            X_train_2d, X_valid_2d = train_2d[trn_idx], train_2d[val_idx]
            X_test_2d = test_2d
            
            X_train_knn, X_valid_knn = train_knn[n_model][trn_idx], train_knn[n_model][val_idx]
            X_test_knn = test_knn[n_model]

            X_train_km, X_valid_km = train_km[n_model][trn_idx], train_km[n_model][val_idx]
            X_test_km = test_km[n_model]

            X_train_pca, X_valid_pca = train_pca[n_model][trn_idx], train_pca[n_model][val_idx]
            X_test_pca = test_pca[n_model]

            log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            tb_cb = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

            K.clear_session()  
            shape = [
                X_train.shape[1],
                X_train_2d.shape[1:],
                CFG['k'][n_model]*CFG['n_class'],
                CFG['n_clusters'][n_model],
                CFG['n_components'][n_model]
            ]

            model, hidden_model = create_model(shape, emb_dims, CFG['sigma'], CFG['n_conv2d'], n_model)
            model.fit(
                [X_train, X_train_2d, X_train_knn, X_train_km, X_train_pca], y_train_ohe,
                batch_size=CFG['batch_size'],
                epochs=CFG['max_epochs'],
                validation_data=([X_valid, X_valid_2d, X_valid_knn, X_valid_km, X_valid_pca], y_valid_ohe),
                callbacks=[es_cb, sch_cb, tb_cb],
                verbose=CFG['nn_verbose']
            )

            nn_oof[n_model, n_seed, val_idx] = model.predict([X_valid, X_valid_2d, X_valid_knn, X_valid_km, X_valid_pca])
            nn_pred[n_model, n_seed] += model.predict([X_test, X_test_2d, X_test_knn, X_test_km, X_test_pca]) / CFG['n_splits']
            m_logloss = log_loss(y_valid_ohe, nn_oof[n_model, n_seed, val_idx])
            print(f"nn  model{n_model} seed{seed} fold{fold}: m_logloss {m_logloss}")
            
            hidden_train = hidden_model.predict([X_train, X_train_2d, X_train_knn, X_train_km, X_train_pca])
            hidden_valid = hidden_model.predict([X_valid, X_valid_2d, X_valid_knn, X_valid_km, X_valid_pca])
            hidden_test = hidden_model.predict([X_test, X_test_2d, X_test_knn, X_test_km, X_test_pca])
            trn_data = xgb.DMatrix(data=hidden_train, label=y_train)
            val_data = xgb.DMatrix(data=hidden_valid, label=y_valid)

            xgb_params['seed'] = seed
            gbt_model = xgb.train(
                params=xgb_params,
                dtrain=trn_data,
                evals=[(trn_data, "train"), (val_data, "valid")],
                evals_result=eval_fold_result,
                num_boost_round = CFG['n_estimators'],
                verbose_eval=CFG['gbt_verbose'],
                early_stopping_rounds=CFG['early_stopping_rounds'],
                )
        
            gbt_oof[n_model, n_seed, val_idx] = gbt_model.predict(xgb.DMatrix(hidden_valid), ntree_limit=gbt_model.best_ntree_limit)
            gbt_pred[n_model, n_seed] += gbt_model.predict(xgb.DMatrix(hidden_test), ntree_limit=gbt_model.best_ntree_limit) / CFG['n_splits']
            m_logloss = log_loss(y_valid, gbt_oof[n_model, n_seed, val_idx])
            print(f"xgb model{n_model} seed{seed} fold{fold}: m_logloss {m_logloss}")
        
        print("-"*60)
        m_logloss = log_loss(target, nn_oof[n_model, n_seed])
        print(f"nn  model{n_model} seed{seed}: m_logloss {m_logloss}")
        m_logloss = log_loss(target, gbt_oof[n_model, n_seed])
        print(f"xgb model{n_model} seed{seed}: m_logloss {m_logloss}\n")

        np.save(CFG['save_path'] + f"nn_model{n_model}_seed{seed}_oof", nn_oof[n_model, n_seed])
        np.save(CFG['save_path'] + f"nn_model{n_model}_seed{seed}_pred", nn_pred[n_model, n_seed])
        np.save(CFG['save_path'] + f"xgb_model{n_model}_seed{seed}_oof", gbt_oof[n_model, n_seed])
        np.save(CFG['save_path'] + f"xgb_model{n_model}_seed{seed}_pred", gbt_pred[n_model, n_seed])

===== MODEL 0 cross validation =====


2021-07-24 15:50:16.670018: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2021-07-24 15:50:16.670095: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.
2021-07-24 15:50:16.670146: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.
2021-07-24 15:50:20.125514: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2021-07-24 15:50:20.125592: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.
2021-07-24 15:50:20.153224: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-07-24 15:50:20.160024: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.
2021-07-24 15:50:20.197261: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/fit/20210724-155016/train/plugins/profile/2021_07_24_15_50_20
2021-07-24 15:50:20.205648: I tenso

## Post-process

In [None]:
def class_optimizer(X, a0, a1, a2, a3, a4, a5, a6, a7, a8):
    oof = np.array([X[0]*a0, X[1]*a1, X[2]*a2, X[3]*a3, X[4]*a4, X[5]*a5, X[6]*a6, X[7]*a7, X[8]*a8]).transpose()
    oof = oof / np.sum(oof, axis=1).reshape(-1, 1)
    
    return log_loss(target, oof)

def get_optimized(X, vals):
    opt_val = 0
    for i, val in enumerate(vals):
        if i != len(X):
            opt_val += X[i]*val
        else:
            coef = 1
            for j in range(i):
                coef -= X[j]
            opt_val += coef*val

    return opt_val

def model_optimizer(X, oofs):
    opt_oof = get_optimized(X, oofs)
    
    return log_loss(target, opt_oof)

In [None]:
oof = np.concatenate([nn_oof, gbt_oof])
pred = np.concatenate([nn_pred, gbt_pred])

res_l = []
for idx0 in tqdm(range(oof.shape[0])):
    for idx1 in tqdm(range(oof.shape[1]), leave=False):
        res = minimize(
            fun=class_optimizer,
            x0=[1.0 for _ in range(CFG['n_class'])],
            args=tuple(oof[idx0, idx1, :, i] for i in range(CFG['n_class'])),
            method='Nelder-Mead',
            options={'maxiter': 300})

        oof[idx0, idx1] = np.array([res.x[i]*oof[idx0, idx1, :, i] for i in range(CFG['n_class'])]).transpose()
        oof[idx0, idx1] = oof[idx0, idx1] / np.sum(oof[idx0, idx1], axis=1).reshape(-1, 1)

        pred[idx0, idx1] = np.array([res.x[i]*pred[idx0, idx1, :, i] for i in range(CFG['n_class'])]).transpose()
        pred[idx0, idx1] = pred[idx0, idx1] / np.sum(pred[idx0, idx1], axis=1).reshape(-1, 1)

        res_l.append(res)

In [None]:
avg_oof = np.mean(np.array(oof), axis=1)
avg_pred = np.mean(np.array(pred), axis=1)

res = minimize(
    fun=model_optimizer,
    x0=[1/oof.shape[0] for _ in range(avg_oof.shape[0]-1)],
    args=tuple([avg_oof]),
    method='Nelder-Mead',
    options={'maxiter': 1000})

opt_oof = get_optimized(res.x, avg_oof)
opt_pred = get_optimized(res.x, avg_pred)

print(f"logloss score: {log_loss(target, opt_oof)}")
print(res)

## Check results

### Target distribution

In [None]:
plt.figure(figsize=(16, 4), tight_layout=True)

plt.subplot(1, 3, 1)
target.hist()

plt.subplot(1, 3, 2)
pd.Series(opt_oof.argmax(axis=1)).hist()

plt.subplot(1, 3, 3)
pd.Series(opt_pred.argmax(axis=1)).hist()

### Confusion matrix

In [None]:
cm = confusion_matrix(target, opt_oof.argmax(axis=1))

plt.figure(figsize=((16,4)))
sns.heatmap(cm, annot=True, fmt='5d', cmap='Blues')
plt.savefig("confusion_matrix.png")

### Classification report

In [None]:
print(classification_report(target, opt_oof.argmax(axis=1), digits=4))

report = pd.DataFrame(classification_report(target, opt_oof.argmax(axis=1), digits=4, output_dict=True)).transpose()
report.to_csv("report.csv")

## Submission

In [None]:
submission.iloc[:, 1:] = opt_pred  
submission.to_csv("submission.csv", index=False)

In [None]:
plt.figure(figsize=(16, 8), tight_layout=True)
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.title(f"Class_{i+1}")
    submission[f'Class_{i+1}'].hist()