In [1]:
import numpy as np
import pandas as pd

from utils import reduce_mem_usage, save_submission

import gc

gc.enable()

In [2]:
train = pd.read_csv('../input/feature-engineering/train_features.csv')
test = pd.read_csv('../input/feature-engineering/test_features.csv')

In [4]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

gc.collect()

Mem. usage decreased to 435.35 Mb (67.6% reduction)
Mem. usage decreased to 156.40 Mb (66.9% reduction)


0

In [5]:
X, y = train.drop('open_channels', axis=1), train['open_channels']

In [6]:
import pickle

with open('../input/lgbm-5-fold-cv/lgbm-pseudo-labelled-classifier.pkl', 'rb') as f:
    lgbm = pickle.load(f)
        
train_lgbm_features = lgbm.predict_proba(X)
test_lgbm_features = lgbm.predict_proba(test)

for i in range(5):
    with open(f'../input/lgbm-5-fold-cv/lgbm-cv-round-{i + 1}', 'rb') as f:
        lgbm = pickle.load(f)
        
    preds = lgbm.predict_proba(X)
    train_lgbm_features += preds
    
    preds = lgbm.predict_proba(test)
    test_lgbm_features += preds
    
    
with open('../input/6-extratrees-regressor/extra_trees.pkl', 'rb') as f:
    extra_trees = pickle.load(f)
    
train_regression_features = extra_trees.predict(X)
test_regression_features = extra_trees.predict(test)

In [7]:
gc.collect()

68

In [8]:
train.columns

Index(['signal', 'open_channels', 'scaled_signal', 'signal_squared',
       'signal_squared_minus_mean', 'signal_shifted_by_1',
       'signal_shifted_by_-1', 'signal_shifted_by_2', 'signal_shifted_by_-2',
       'signal_shifted_by_3', 'signal_shifted_by_-3', 'signal_shifted_by_4',
       'signal_shifted_by_-4', 'signal_shifted_by_5', 'signal_shifted_by_-5',
       'percentage_change_1', 'percentage_change_-1', 'percentage_change_2',
       'percentage_change_-2', 'percentage_change_3', 'percentage_change_-3',
       'percentage_change_4', 'percentage_change_-4', 'percentage_change_5',
       'percentage_change_-5', 'target_encoded_signal', 'rolling_mean_10',
       'rolling_std_10', 'rolling_mean_100', 'rolling_std_100',
       'rolling_mean_1000', 'rolling_std_1000'],
      dtype='object')

In [9]:
to_keep = ['signal', 'signal_squared']
train_select_features = X[to_keep].values
test_select_features = test[to_keep].values

train_features = np.concatenate((train_select_features, train_lgbm_features, train_regression_features.reshape(-1, 1)), axis=1)
test_features = np.concatenate((test_select_features, test_lgbm_features, test_regression_features.reshape(-1, 1)), axis=1)

In [10]:
gc.collect()

60

In [11]:
columns = ['signal', 'signal_squared']
columns += [f'lgbm_{i}' for i in range(11)]
columns += ['regression']

train = pd.DataFrame(train_features, columns=columns)
test = pd.DataFrame(test_features, columns=columns)

train['open_channels'] = y

In [12]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

Mem. usage decreased to 152.11 Mb (74.3% reduction)
Mem. usage decreased to 53.41 Mb (75.0% reduction)


In [13]:
gc.collect()

20

In [14]:
def augment(X, y):
    '''
    augments the data by flipping and appending to the existing data
    '''
    X = np.vstack((X, np.flip(X, axis=1)))
    y = np.vstack((y, np.flip(y, axis=1)))
    
    return X, y

In [15]:
from tensorflow.keras.layers import Conv1D, Multiply, Add, Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


def wave_block(x, filters, kernel_size, n):
    
    dilation_rates = [2 ** i for i in range(n)]
    
    x = Conv1D(filters, 1, padding='same')(x)
    
    result = x
    
    for dilation_rate in dilation_rates:
        tanh_out = Conv1D(filters, kernel_size, 
                          padding='same', activation='tanh', 
                          dilation_rate=dilation_rate)(x)
        
        sig_out = Conv1D(filters, kernel_size,
                          padding='same', activation='sigmoid', 
                          dilation_rate=dilation_rate)(x)
        
        x = Multiply()([tanh_out, sig_out])
        
        x = Conv1D(filters, 1, padding='same')(x)
        
        result = Add()([result, x])
        
    return result

def wavenet(shape, learning_rate=1e-3):

    
    inputs = Input(shape)
    
    x = wave_block(inputs, 16, 3, 8)
    x = wave_block(x, 32, 3, 12)
    x = wave_block(x, 64, 3, 4)
    x = wave_block(x, 128, 3, 1)

    outputs = Dense(11, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    

    optimizer = Adam(learning_rate)
    model.compile(optimizer, 'categorical_crossentropy')

    return model

In [16]:
from tensorflow.keras.callbacks import (ModelCheckpoint, LearningRateScheduler,
                                        Callback, ReduceLROnPlateau)

from sklearn.metrics import f1_score


class MacroF1(Callback):
    
    def __init__(self, model, data, callbacks):
        super(MacroF1, self).__init__()

        
        self.model = model
        self.X_val = data[0]
        self.y_val = np.argmax(data[1], axis=2).reshape(-1)
        
        # using callbacks allows us to use early stopping and model checkpoint on f1 score
        self.callbacks = callbacks
        
    def on_train_begin(self, logs={}):
        for callback in self.callbacks:
            callback.on_train_begin(logs)
            
    def on_train_end(self, logs={}):
        for callback in self.callbacks:
            callback.on_train_end(logs)
        
    def on_epoch_end(self, epoch, logs={}):
        preds = np.argmax(self.model.predict(self.X_val), axis=2).reshape(-1)
        score = f1_score(self.y_val, preds, average='macro')
        logs['f1_score'] = score
        
        print(f'\nf1 score: {score}')
        
        for callback in self.callbacks:
            callback.on_epoch_end(epoch, logs)
            


def schedule(epoch, lr):
    if epoch < 40:
        return lr
    elif epoch < 60:
        return lr / 10
    else:
        return lr / 100

In [17]:
from sklearn.model_selection import GroupKFold


def cross_val_predict(train, test, n_splits=5, 
                   group_size=5000, epochs=70, 
                   batch_size=16):
    '''
    performs cross validation and at each cross val step predicts the test outcomes
    '''
    
    predictions = np.zeros((len(test), 11))
    
    # since the entitity of data cannot be fit into GPU train in small groups
    train['group'] = np.arange(len(train)) // group_size
    test['group'] = np.arange(len(test)) // group_size
    
    # k fold cross validation
    # The same group will not appear in two different folds 
    cv = GroupKFold(n_splits)
    splits = [x for x in cv.split(train, train['open_channels'], train['group'])]

    
    # one-hot encode targets
    targets = pd.concat([pd.get_dummies(train['open_channels']), train['group']], axis=1)
    target_cols = [f'target_{i}' for i in range(11)]
    targets.columns = target_cols + ['group']
    
    # wavenet expects data to be three dimensional. Both the train and the test set is 2D
    # we convert this 2D data into 3D by having each group be the third dimension
    # new shape = (group_size, num_rows // group_size, num_cols)
    y = np.array(list(targets.groupby('group').apply(lambda x: x[target_cols].values)))
    
    X = np.array(list(train.groupby('group').apply(lambda x: x.drop(['group', 'open_channels'], axis=1).values)))
    
    # do the same for test
    test = np.array(list(test.groupby('group').apply(lambda x: x.drop(['group'], axis=1).values)))

    # flip test for test time augmentation
    # since wavenet uses information from previous timesteps 
    # predicting on flipped test set gives different results
    # summing both the prediction wild yield better result
    # this is test time augmentation
    test_flip = np.flip(test, axis=1)
    
    
    # K fold splits are for 2D data
    # make it work for 3D data
    group_splits = []
    group = train['group']
    for split in splits:
        new_split = []
        new_split.append(np.unique(group[split[0]]))
        new_split.append(np.unique(group[split[1]]))
        group_splits.append(new_split)
    
    
    gc.collect()
    for i, (train_idx, val_idx) in enumerate(group_splits, start=1):
        
        X_train, y_train = X[train_idx], y[train_idx]
        X_val, y_val = X[val_idx], y[val_idx]
        
        X_train, y_train = augment(X_train, y_train)
        
        model = wavenet((group_size, X_train.shape[2]))
        
        checkpoint = ModelCheckpoint(f'fold_{i}.h5', monitor='f1_score', mode='max', save_best_only=True)
        scheduler = LearningRateScheduler(schedule)
        reduce_lr = ReduceLROnPlateau(moniteor='f1_score', mode='max', patience=10)
        macro_f1 = MacroF1(model, (X_val, y_val), [checkpoint, scheduler, reduce_lr])
        
        callbacks = [
            macro_f1,
            checkpoint,
            scheduler,
            reduce_lr,
        ]
        
        model.fit(X_train, y_train,
                 epochs=epochs,
                 batch_size=batch_size,
                 validation_data=(X_val, y_val),
                 callbacks=callbacks,
                 verbose=0)
        
        
        # load the best model
        model.load_weights(f'fold_{i}.h5')
        
        val_preds = model.predict(X_val)
        score = f1_score(
            np.argmax(y_val, axis=2).reshape(-1),
            np.argmax(val_preds, axis=2).reshape(-1),
            average='macro',
            
        )
        
        print(f'Training fold {i} finished. F1 score: {score}')
        
        
        # at each cross val step predict on test using the best model
        test_preds = model.predict(test)
        test_preds = test_preds.reshape(-1, test_preds.shape[-1])
        predictions += test_preds
    
        # test time augmentation
        test_preds = np.flip(model.predict(test_flip), axis=1)
        test_preds = test_preds.reshape(-1, test_preds.shape[-1])
        predictions += test_preds
        
        gc.collect()

    
    # return class predictions
    return np.argmax(predictions, axis=1)

In [18]:
predictions = cross_val_predict(train, test)


f1 score: 0.9388928408216386

f1 score: 0.9407164387221482

f1 score: 0.9409101297096587

f1 score: 0.9411347805916503

f1 score: 0.9401027997383969

f1 score: 0.9379744548096389

f1 score: 0.9414756103700017

f1 score: 0.9414959463111718

f1 score: 0.9416208203689574

f1 score: 0.9414774854162055

f1 score: 0.9414303348423881

f1 score: 0.9414631751382821

f1 score: 0.9414754988661109

f1 score: 0.9415411441821586

f1 score: 0.9415189851156307

f1 score: 0.9415110409507377

f1 score: 0.9414744357177242

f1 score: 0.941474064218689

f1 score: 0.9414841084490999

f1 score: 0.9414942040429861

f1 score: 0.9414895497801847

f1 score: 0.9414905771842151

f1 score: 0.9414950166117201

f1 score: 0.9414927449286101

f1 score: 0.941490591999246

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0.9414935055501862

f1 score: 0

In [20]:
save_submission(predictions, 'wavenet')