In [1]:
input_path = '../input/tabular-playground-series-apr-2022/'
output_path = './'

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score

def load_raw_data(train_or_test='train'):
    file_name = f'{input_path}/{train_or_test}.csv'
    df = pd.read_csv(file_name)
    return df

def load_label(train_or_test='train'):
    file_name = input_path + ('train_labels.csv' if train_or_test=='train' else 'sample_submission.csv')
    df = pd.read_csv(file_name)
    return df['state'].values

def competition_metric(y_true, y_score):
    return roc_auc_score(y_true, y_score)

def evaluate(model, X, y):
    return competition_metric(y, model.predict_proba(X)[:, 1])

def submit(arr):
    df = pd.read_csv(f'{input_path}/sample_submission.csv')
    df['state'] = arr
    df.to_csv(f'{output_path}/submission.csv', index=False)

In [3]:
import tensorflow as tf
from tensorflow import keras
from sklearn.base import BaseEstimator, TransformerMixin

class ResNetModel(keras.Model):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.fns = [
            keras.layers.Conv1D(filters=20, kernel_size=8, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)), 
            keras.layers.Conv1D(filters=20, kernel_size=8, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.Conv1D(filters=20, kernel_size=8, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.AveragePooling1D(2),
            
            keras.layers.Conv1D(filters=20, kernel_size=6, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.Conv1D(filters=20, kernel_size=6, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.Conv1D(filters=20, kernel_size=6, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.AveragePooling1D(2),
            
            keras.layers.Conv1D(filters=20, kernel_size=4, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.Conv1D(filters=20, kernel_size=4, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.Conv1D(filters=20, kernel_size=4, padding='same', activation='elu', kernel_regularizer=keras.regularizers.L2(1e-3)),
            keras.layers.AveragePooling1D(3),
            
            keras.layers.GlobalAveragePooling1D(),
            keras.layers.Dense(1, activation='sigmoid')
        ]
        self.bns = [
            keras.layers.BatchNormalization(), 
            keras.layers.BatchNormalization(), 
            
            keras.layers.BatchNormalization(), 
            keras.layers.BatchNormalization(), 
            
            keras.layers.BatchNormalization(), 
            keras.layers.BatchNormalization(), 
            
            keras.layers.BatchNormalization(), 
            keras.layers.BatchNormalization(), 
        ]
        
    def call(self, inputs):
        outputs = inputs
        
        outputs = self.fns[0](outputs)
        res = outputs
        res = self.fns[1](res)
        res = self.fns[2](res)
        outputs += res
        outputs = self.fns[3](outputs)
        
        outputs = self.fns[4](outputs)
        res = outputs
        res = self.fns[5](res)
        res = self.fns[6](res)
        outputs += res
        outputs = self.fns[7](outputs)
        
        outputs = self.fns[8](outputs)
        res = outputs
        res = self.fns[9](res)
        res = self.fns[10](res)
        outputs += res
        outputs = self.fns[11](outputs)
        
        outputs = self.fns[-2](outputs)
        outputs = self.fns[-1](outputs)
        
        return outputs
    
    def predict_proba(self, X):
        return np.concatenate([1-self.predict(X), self.predict(X)], axis=1)

def random_sensor_swap(x, y, random_state=None):
    rng = np.random.default_rng(random_state)
    p_swap = 0.5
    indices = rng.choice(np.arange(x.shape[0]), int(p_swap*x.shape[0]), replace=False)
    x_aug, y_aug = x[indices], y[indices]
    swap_codes = rng.integers(0, 13, (x_aug.shape[0], 2))
    for i in range(x_aug.shape[0]):
        a, b = swap_codes[i]
        x_aug[i, :, [a, b]] = x_aug[i, :, [b, a]]
    x = np.concatenate([x, x_aug], axis=0)
    y = np.concatenate([y, y_aug], axis=0)
    return x, y

def group_splitter(df, nfold=5, random_state=None):
    subject_nums = df['subject'].unique()
    rng = np.random.default_rng(random_state)
    subject_to_setnum = rng.integers(0, nfold, subject_nums.shape[0])
    for i in range(nfold):
        val_subjects = subject_nums[subject_to_setnum == i]
        mask_df_val = df['subject'].isin(val_subjects)
        mask_y_val = mask_df_val.iloc[::60]
        yield mask_df_val, mask_y_val


class DF2arr(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        return X.loc[:, 'sensor_00':'sensor_12'].values.reshape(-1, 60, 13)
    
    
class MyPreprocessor(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        X = normalize(X)
        return X
    
def normalize(x):
    x = x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-10)
    return x

In [4]:
import tensorflow as tf
from tensorflow import keras
from sklearn.base import BaseEstimator, TransformerMixin

class RNNThickModel(keras.Model):
    def __init__(self):
        super(RNNThickModel, self).__init__()
        self.fns = [
            keras.layers.LSTM(
                units=256, 
                kernel_regularizer=keras.regularizers.L2(2e-3),
#                 recurrent_regularizer=keras.regularizers.L2(1e-5),
#                 dropout=0.05,
#                 recurrent_dropout=0.01,
                return_sequences=True
            ),
            keras.layers.LSTM(
                units=128,
                kernel_regularizer=keras.regularizers.L2(2e-3),
#                 recurrent_regularizer=keras.regularizers.L2(1e-5),
#                 dropout=0.05,
#                 recurrent_dropout=0.01,
            ),
            keras.layers.Dense(units=32, activation='elu'),
            keras.layers.Dense(units=1, activation='sigmoid')
        ]
    
    def call(self, inputs):
        outputs = inputs
        for layer in self.fns:
            outputs = layer(outputs)
        return outputs
    
    def predict_proba(self, X):
        return np.concatenate([1-self.predict(X), self.predict(X)], axis=1)

    
class MySoftVoter():
    def __init__(self, *models):
        self.models = models
    
    def predict(self, X):
        result = np.zeros((X.shape[0], ), dtype=X.dtype)
        for model in self.models:
            add = model.predict(X)
            if len(add.shape) > 1:
                add = add[:, 0]
            result += add
        result /= len(self.models)
        return result
    
    def predict_proba(self, X):
        return np.stack([1-self.predict(X), self.predict(X)], axis=1)

In [5]:
from sklearn.pipeline import make_pipeline
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report
cv_scores = []

df = load_raw_data('train')
X = DF2arr().transform(df)
y = load_label('train')
subj_nums = df['subject']

preprocessor = make_pipeline(DF2arr(), MyPreprocessor())

keras.backend.clear_session()
tf.random.set_seed(42)

callbacks = [
    keras.callbacks.EarlyStopping(patience=200, restore_best_weights=True)
]
for mask_df_val, mask_y_val in group_splitter(df, nfold=5, random_state=42):
    df_train, df_val = df[~mask_df_val], df[mask_df_val]
    y_train, y_val = y[~mask_y_val], y[mask_y_val]
    for mask_df_v, mask_y_v in group_splitter(df_train, nfold=5, random_state=42):
        df_t, df_v = df_train[~mask_df_v], df_train[mask_df_v]
        y_t, y_v = y_train[~mask_y_v], y_train[mask_y_v]

    X_t = preprocessor.fit_transform(df_t)
    X_v = preprocessor.transform(df_v)
    X_val = preprocessor.transform(df_val)

    models = [RNNThickModel() for _ in range(3)] + [ResNetModel() for _ in range(3)]
    for model in models:
        with tf.device('gpu:0'):
            model.compile(
                loss='binary_crossentropy', 
                metrics=['AUC'],
                optimizer=keras.optimizers.Adam(1e-3))
            model.fit(
                X_t, y_t, 
                batch_size=1024,
                epochs=500, 
                callbacks=callbacks,
                validation_data=(X_v, y_v),
                verbose=0
            )
            model.evaluate(X_val, y_val)

    model = MySoftVoter(*models)
    print(evaluate(model, X_val, y_val))
    print(classification_report(y_val, (model.predict(X_val) >= 0.5).astype(int), digits=4 ))
    
    cv_scores.append(evaluate(model, X_val, y_val))
print(f'5-fold CV score: {np.mean(cv_scores):.4f}')

2022-09-18 06:11:38.603587: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 06:11:38.718418: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 06:11:38.719242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 06:11:38.721268: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

0.9686295403779447
              precision    recall  f1-score   support

           0     0.9174    0.8954    0.9063      2592
           1     0.8966    0.9183    0.9073      2559

    accuracy                         0.9068      5151
   macro avg     0.9070    0.9069    0.9068      5151
weighted avg     0.9071    0.9068    0.9068      5151

0.9577448074366772
              precision    recall  f1-score   support

           0     0.8922    0.9059    0.8990      2412
           1     0.8944    0.8793    0.8868      2187

    accuracy                         0.8932      4599
   macro avg     0.8933    0.8926    0.8929      4599
weighted avg     0.8933    0.8932    0.8932      4599

0.969454315916729
              precision    recall  f1-score   support

           0     0.8897    0.9139    0.9017      2789
           1     0.9235    0.9017    0.9125      3215

    accuracy                         0.9074      6004
   macro avg     0.9066    0.9078    0.9071      6004
weighted avg     0

In [6]:
df_test_final = load_raw_data('test')

X_train = preprocessor.fit_transform(df_train)
X_val = preprocessor.transform(df_val)
X_test_final = preprocessor.transform(df_test_final)

for model in models:
    with tf.device('gpu:0'):
        model.fit(X_train, y_train, 
                  epochs=500, 
                  batch_size=1024,
                  callbacks=callbacks,
                  validation_data=(X_val, y_val)
                 )
    
model = MySoftVoter(*models)
y_pred = model.predict(X_test_final)
submit(y_pred)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78