In [1]:
import copy
import gc
import glob
import os
import time

import cv2
import IPython
import IPython.display
import joblib
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from joblib import Parallel, delayed
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from tqdm import tqdm

%matplotlib inline

In [2]:
pd.options.display.max_columns = 128
pd.options.display.max_rows = 128
plt.rcParams['figure.figsize'] = (15, 8)

In [3]:
class EasyDict(dict):
    def __init__(self, d=None, **kwargs):
        if d is None:
            d = {}
        if kwargs:
            d.update(**kwargs)
        for k, v in d.items():
            setattr(self, k, v)
        # Class attributes
        for k in self.__class__.__dict__.keys():
            if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
                setattr(self, k, getattr(self, k))

    def __setattr__(self, name, value):
        if isinstance(value, (list, tuple)):
            value = [self.__class__(x)
                     if isinstance(x, dict) else x for x in value]
        elif isinstance(value, dict) and not isinstance(value, self.__class__):
            value = self.__class__(value)
        super(EasyDict, self).__setattr__(name, value)
        super(EasyDict, self).__setitem__(name, value)

    __setitem__ = __setattr__

    def update(self, e=None, **f):
        d = e or dict()
        d.update(f)
        for k in d:
            setattr(self, k, d[k])

    def pop(self, k, d=None):
        delattr(self, k)
        return super(EasyDict, self).pop(k, d)

## Configuration and global parameters:

In [4]:
train_df = pd.read_csv('../input/train_curated.csv')
sample_submission = pd.read_csv('../input/sample_submission.csv')
print('train: {}'.format(train_df.shape))
print('test: {}'.format(sample_submission.shape))

ROOT = '../input/'
test_root = os.path.join(ROOT, 'test/')
train_root = os.path.join(ROOT, 'train_curated/')


CONFIG = EasyDict()
CONFIG.hop_length = 347 # to make time steps 128
CONFIG.fmin = 20
CONFIG.fmax = 44100 / 2
CONFIG.n_fft = 480

N_SAMPLES = 48
SAMPLE_DIM = 256

TRAINING_CONFIG = {
    'sample_dim': (N_SAMPLES, SAMPLE_DIM),
    'padding_mode': cv2.BORDER_REFLECT,
}

print(CONFIG)
print(TRAINING_CONFIG)

train_df.head()

train: (4970, 2)
test: (1120, 81)
{'hop_length': 347, 'fmin': 20, 'fmax': 22050.0, 'n_fft': 480}
{'sample_dim': (48, 256), 'padding_mode': 2}


Unnamed: 0,fname,labels
0,0006ae4e.wav,Bark
1,0019ef41.wav,Raindrop
2,001ec0ad.wav,Finger_snapping
3,0026c7cb.wav,Run
4,0026f116.wav,Finger_snapping


## Data Processing class:

In [5]:
# Preprocessing functions inspired by:
# https://github.com/xiaozhouwang/tensorflow_speech_recognition_solution/blob/master/data.py
class DataProcessor(object):
    
    def __init__(self, debug=False):
        self.debug = debug
        
        # Placeholders for global statistics
        self.mel_mean = None
        self.mel_std = None
        self.mel_max = None
        self.mfcc_max = None
        
    def createMel(self, filename, params, normalize=False):
        """
        Create Mel Spectrogram sample out of raw wavfile
        """
        y, sr = librosa.load(filename, sr=None)
        mel = librosa.feature.melspectrogram(y, sr, n_mels=N_SAMPLES, **params)
        mel = librosa.power_to_db(mel)
        if normalize:
            if self.mel_mean is not None and self.mel_std is not None:
                mel = (mel - self.mel_mean) / self.mel_std
            else:
                sample_mean = np.mean(mel)
                sample_std = np.std(mel)
                mel = (mel - sample_mean) / sample_std
            if self.mel_max is not None:
                mel = mel / self.mel_max
            else:
                mel = mel / np.max(np.abs(mel))
        return mel
    
    def createMfcc(self, filename, params, normalize=False):
        """
        Create MFCC sample out of raw wavfile
        """
        y, sr = librosa.load(filename, sr=None)
        nonzero_idx = [y > 0]
        y[nonzero_idx] = np.log(y[nonzero_idx])
        mfcc = librosa.feature.mfcc(y, sr, n_mfcc=N_SAMPLES, **params)
        if normalize:
            if self.mfcc_max is not None:
                mfcc = mfcc / self.mfcc_max
            else:
                mfcc = mfcc / np.max(np.abs(mfcc))
        return mfcc
    
    def createLogspec(self, filename, params,
                      normalize=False,
                      window_size=20,
                      step_size=10, eps=1e-10):
        """
        Create log spectrogram,
        based on 
        https://www.kaggle.com/voglinio/keras-2d-model-5-fold-log-specgram-curated-only
        """
        
        y, sr = librosa.load(filename, sr=None)
        nperseg = int(round(window_size * sr / 1e3))
        noverlap = int(round(step_size * sr / 1e3))
        freqs, times, spec = scipy.signal.spectrogram(
            y,
            fs=sr,
            window='hann',
            nperseg=nperseg,
            noverlap=noverlap,
            detrend=False)
        spec = np.log(spec.astype(np.float32) + eps)
        return spec
        
    
    def prepareSample(self, root, row, 
                      preprocFunc, 
                      preprocParams, trainingParams, 
                      test_mode=False, normalize=False, 
                      proc_mode='split'):
        """
        Prepare sample for model training.
        Function takes row of DataFrame, extracts filename and labels and processes them.
        
        If proc_mode is 'split':
        Outputs sets of arrays of constant shape padded to TRAINING_CONFIG shape
        with selected padding mode, also specified in TRAINING_CONFIG.
        This approach prevents loss of information caused by trimming the audio sample,
        instead it splits it into equally-sized parts and pads them.
        To account for creation of multiple samples, number of labels are multiplied to a number
        equal to number of created samples.
        
        If proc_mode is 'resize':
        Resizes the original processed sample to (SAMPLE_DIM, N_SAMPLES) shape.
        """
        
        assert proc_mode in ['split', 'resize'], 'proc_must be one of split or resize'
        
        filename = os.path.join(root, row['fname'])
        if not test_mode:
            labels = row['labels']
            
        sample = preprocFunc(filename, preprocParams, normalize=normalize)
        # print(sample.min(), sample.max())
        
        if proc_mode == 'split':
            sample_split = np.array_split(
                sample, np.ceil(sample.shape[1] / SAMPLE_DIM), axis=1)
            samples_pad = []
            for i in sample_split:
                padding_dim = SAMPLE_DIM - i.shape[1]
                sample_pad = cv2.copyMakeBorder(i, 0, 0, 0, padding_dim, trainingParams['padding_mode'])
                samples_pad.append(sample_pad)
            samples_pad = np.asarray(samples_pad)
            if not test_mode:
                labels = [labels] * len(samples_pad)
                labels = np.asarray(labels)
                return samples_pad, labels
            return samples_pad
        elif proc_mode == 'resize':
            sample_pad = cv2.resize(sample, (SAMPLE_DIM, N_SAMPLES), interpolation=cv2.INTER_NEAREST)
            sample_pad = np.expand_dims(sample_pad, axis=0)
            if not test_mode:
                labels = np.asarray(labels)
                return sample_pad, labels
            return sample_pad
        elif proc_mode == 'raw':
            if not test_model:
                return sample, labels
            return sample


processor = DataProcessor()

In [6]:
DATA_PREFIX = 'proc'
train_filename = 'train_curated_{}.joblib'.format(DATA_PREFIX)
test_filename = 'test_{}.joblib'.format(DATA_PREFIX)


# Train processing/loading:
if os.path.isfile(train_filename):
    print('load processed train:')
    train_dict = joblib.load(train_filename)
    X_train = train_dict['X']
    y_train = train_dict['y']
    print(X_train.shape, y_train.shape)
else:
    print('process train...')
    output = Parallel(n_jobs=-3, verbose=1)(
        delayed(processor.prepareSample)(
            train_root, 
            train_df.iloc[f, :],
            processor.createLogspec,
            CONFIG,
            TRAINING_CONFIG,
            test_mode=False,
            proc_mode='resize',
        ) for f in range(train_df.shape[0]))  # change to number of sample in train data for full processing
    X_train = np.array([x[0] for x in output])
    y_train = np.array([x[1] for x in output])
    y_train = pd.Series(y_train).str.get_dummies(sep=',')
    print(X_train.shape, y_train.shape)
    # Save output for quicker experiments
    train_dict = {
        'X': X_train,
        'y': y_train,
    }
    joblib.dump(train_dict, train_filename)
    

# Test processing/loading:
if os.path.isfile(test_filename):
    print('load processed test:')
    test_dict = joblib.load(test_filename)
    X_test = test_dict['X']
    print(X_test.shape)
else:
    print('process test...')
    X_test = Parallel(n_jobs=-3, verbose=1)(
        delayed(processor.prepareSample)(
            test_root, 
            sample_submission.iloc[f, :],
            processor.createLogspec,
            CONFIG,
            TRAINING_CONFIG,
            test_mode=True,
            proc_mode='resize',
        ) for f in range(sample_submission.shape[0]))  # change to number of sample in test data for full processing
    X_test = np.array(X_test)
    print(X_test.shape)
    test_dict = {
        'X': X_test,
    }
    joblib.dump(test_dict, test_filename)
    
    
# Switch channel axis from 2nd to last
X_train = np.moveaxis(X_train, 1, -1)
X_test = np.moveaxis(X_test, 1, -1)
print(X_train.shape, y_train.shape)

process train...


[Parallel(n_jobs=-3)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-3)]: Done 4970 out of 4970 | elapsed:  3.0min finished


(4970, 1, 48, 256) (4970, 80)
process test...


[Parallel(n_jobs=-3)]: Using backend SequentialBackend with 1 concurrent workers.


(1120, 1, 48, 256)
(4970, 48, 256, 1) (4970, 80)


[Parallel(n_jobs=-3)]: Done 1120 out of 1120 | elapsed:   53.3s finished


In [7]:
# Distribution of multilabel labels
print('Multilabel class distribution:')
print(y_train.sum(axis=1).value_counts())

# Most of the samples belong to only one class.
# There are some (around 15%), which belong to two classes.
# Occurrence of samples belonging to more than two classes at once
# is quite rare, around 1.5%.

Multilabel class distribution:
1    4269
2     627
3      69
4       4
6       1
dtype: int64


In [8]:
from collections import Counter
from pprint import pprint

df_lab = (y_train.loc[y_train.sum(axis=1) > 1] > 0)
multilabel_combs = []
for i in range(df_lab.shape[0]):
    row_label = df_lab.iloc[i, :][df_lab.iloc[i, :] > 0].index.tolist()
    multilabel_combs.append(row_label)

multilabel_comb_counter = Counter(list(map(lambda x: ' + '.join(x), multilabel_combs)))
pprint(multilabel_comb_counter.most_common(20))
# 20 most common combinations of labels

[('Acoustic_guitar + Strum', 69),
 ('Drip + Raindrop', 52),
 ('Sink_(filling_or_washing) + Water_tap_and_faucet', 46),
 ('Applause + Cheering + Crowd', 36),
 ('Cutlery_and_silverware + Dishes_and_pots_and_pans', 29),
 ('Chink_and_clink + Dishes_and_pots_and_pans', 27),
 ('Accelerating_and_revving_and_vroom + Race_car_and_auto_racing', 26),
 ('Female_speech_and_woman_speaking + Yell', 22),
 ('Slam + Squeak', 21),
 ('Screaming + Yell', 20),
 ('Applause + Cheering', 15),
 ('Meow + Purr', 13),
 ('Accelerating_and_revving_and_vroom + Motorcycle', 12),
 ('Squeak + Walk_and_footsteps', 12),
 ('Car_passing_by + Traffic_noise_and_roadway_noise', 10),
 ('Applause + Crowd', 10),
 ('Bass_guitar + Electric_guitar', 10),
 ('Gurgling + Toilet_flush', 10),
 ('Female_singing + Male_singing', 9),
 ('Cheering + Crowd', 8)]


In [9]:
import numpy as np
import sklearn.metrics


# Based on https://www.kaggle.com/voglinio/keras-2d-model-5-fold-log-specgram-curated-only
# Core calculation of label precisions for one test sample.
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
        retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
        (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits

# All-in-one calculation of per-class lwlrap.


def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class

# Calculate the overall lwlrap using sklearn.metrics function.


def calculate_overall_lwlrap_sklearn(truth, scores):
    """Calculate the overall lwlrap using sklearn.metrics.lrap."""
    # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = sklearn.metrics.label_ranking_average_precision_score(
        truth[nonzero_weight_sample_indices, :] > 0,
        scores[nonzero_weight_sample_indices, :],
        sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap


# Accumulator object version.

class lwlrap_accumulator(object):
    """Accumulate batches of test samples into per-class and overall lwlrap."""

    def __init__(self):
        self.num_classes = 0
        self.total_num_samples = 0

    def accumulate_samples(self, batch_truth, batch_scores):
        """Cumulate a new batch of samples into the metric.

        Args:
          truth: np.array of (num_samples, num_classes) giving boolean
            ground-truth of presence of that class in that sample for this batch.
          scores: np.array of (num_samples, num_classes) giving the
            classifier-under-test's real-valued score for each class for each
            sample.
        """
        assert batch_scores.shape == batch_truth.shape
        num_samples, num_classes = batch_truth.shape
        if not self.num_classes:
            self.num_classes = num_classes
            self._per_class_cumulative_precision = np.zeros(self.num_classes)
            self._per_class_cumulative_count = np.zeros(self.num_classes,
                                                        dtype=np.int)
        assert num_classes == self.num_classes
        for truth, scores in zip(batch_truth, batch_scores):
            pos_class_indices, precision_at_hits = (
                _one_sample_positive_class_precisions(scores, truth))
            self._per_class_cumulative_precision[pos_class_indices] += (
                precision_at_hits)
            self._per_class_cumulative_count[pos_class_indices] += 1
        self.total_num_samples += num_samples

    def per_class_lwlrap(self):
        """Return a vector of the per-class lwlraps for the accumulated samples."""
        return (self._per_class_cumulative_precision /
                np.maximum(1, self._per_class_cumulative_count))

    def per_class_weight(self):
        """Return a normalized weight vector for the contributions of each class."""
        return (self._per_class_cumulative_count /
                float(np.sum(self._per_class_cumulative_count)))

    def overall_lwlrap(self):
        """Return the scalar overall lwlrap for cumulated samples."""
        return np.sum(self.per_class_lwlrap() * self.per_class_weight())

In [10]:
# import tensorflow.keras as keras
# from tensorflow.keras import layers
# from tensorflow.keras.callbacks import *
# from tensorflow.keras.layers import *
# from tensorflow.keras.optimizers import *
# from tensorflow.keras.losses import *
# from tensorflow.keras.models import Model

from keras import layers
from keras.callbacks import *
from keras.layers import *
from keras.optimizers import *
from keras.losses import *
from keras.models import Model


# Based on https://github.com/keras-team/keras-applications/blob/master/keras_applications/resnet_common.py
def block1(x, filters, kernel_size=3, stride=1,
           conv_shortcut=True, name=None):
    """A residual block.
    # Arguments
        x: input tensor.
        filters: integer, filters of the bottleneck layer.
        kernel_size: default 3, kernel size of the bottleneck layer.
        stride: default 1, stride of the first layer.
        conv_shortcut: default True, use convolution shortcut if True,
            otherwise identity shortcut.
        name: string, block label.
    # Returns
        Output tensor for the residual block.
    """
    bn_axis = 3

    if conv_shortcut is True:
        shortcut = layers.Conv2D(4 * filters, 1, strides=stride,
                                 name=name + '_0_conv')(x)
        shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
                                             name=name + '_0_bn')(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
                                  name=name + '_1_bn')(x)
    x = layers.PReLU(name=name + '_1_prelu')(x)

    x = layers.Conv2D(filters, kernel_size, padding='SAME',
                      name=name + '_2_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
                                  name=name + '_2_bn')(x)
    x = layers.PReLU(name=name + '_2_prelu')(x)

    x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
                                  name=name + '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.PReLU(name=name + '_out')(x)
    
    return x


def stack1(x, filters, blocks, stride1=2, name=None):
    """A set of stacked residual blocks.
    # Arguments
        x: input tensor.
        filters: integer, filters of the bottleneck layer in a block.
        blocks: integer, blocks in the stacked blocks.
        stride1: default 2, stride of the first layer in the first block.
        name: string, stack label.
    # Returns
        Output tensor for the stacked blocks.
    """
    x = block1(x, filters, stride=stride1, name=name + '_block1')
    for i in range(2, blocks + 1):
        x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
    return x


def ResNetlike(input_shape, num_classes):
    
    use_bias = False
    num_blocks = 1
    strides = (2, 4)
    
    input_layer = layers.Input(input_shape)
    # x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(input_layer)
    x = layers.Conv2D(64, (3, 7), strides=2, use_bias=use_bias, name='conv1_conv')(input_layer)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
                                      name='conv1_bn')(x)
    x = layers.PReLU(name='conv1_prelu')(x)
    
    x = stack1(x, 64, num_blocks, stride1=1, name='conv2')
    x = stack1(x, 64, num_blocks, stride1=strides, name='conv3')
    x = stack1(x, 64, num_blocks, stride1=strides, name='conv4')
    
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(num_classes, activation='softmax', name='probs')(x)
    
    model = Model(input_layer, x, name='resnetlike')
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

Using TensorFlow backend.


In [11]:
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder


# Inspired by https://www.kaggle.com/yekenot/pooled-gru-fasttext
class LwlRapEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()
        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = calculate_overall_lwlrap_sklearn(self.y_val, y_pred)
            print("\n LWLRAP - epoch: {} - score: {:.4f} \n".format(epoch +1, score))
        return


# Based on https://www.kaggle.com/voglinio/keras-2d-model-5-fold-log-specgram-curated-only
def create_unique_labels(all_labels):
    label_dict = {}
    all_labels_set = []
    first_labels_set = []
    for labs in all_labels:
        lab = labs.split(',')
        for l in lab:
            if l in label_dict:
                label_dict[l] = label_dict[l]  + 1
            else:
                label_dict[l]= 0

        all_labels_set.append(set(lab))
        first_labels_set.append(lab[0])
    classes = list(label_dict.keys())
    
    return label_dict, classes, all_labels_set, first_labels_set


label_dict, classes, all_labels_set, first_labels_set = create_unique_labels(train_df.labels)
binarize = MultiLabelBinarizer(classes=classes)
y_cat = binarize.fit_transform(all_labels_set)

In [13]:
RUN_NAME = 'ResNetLike_mel'
BATCH_SIZE = 32
NUM_EPOCHS = 35
NFOLDS = 5
SEED = 1337
LOAD_MODEL = False
TRAIN_MODEL = True
DEBUG = False
SAVE_SUBMISSION = True


# y_cat for categorical_crossentropy
# y_train.values for binary_crossentropy
labels_set = y_cat   
kfold = KFold(n_splits=NFOLDS, random_state=SEED)
num_classes = 80
bn_axis = 3

oof_train = np.zeros((X_train.shape[0], num_classes))
oof_test = np.zeros((X_test.shape[0], num_classes, NFOLDS))
valid_scores = []


# KFold training
for fold_idx, (train_idx, valid_idx) in enumerate(kfold.split(X_train)):
    X_tr, y_tr = X_train[train_idx], labels_set[train_idx]
    X_val, y_val = X_train[valid_idx], labels_set[valid_idx]
    print(X_tr.shape, y_tr.shape)
    print(X_val.shape, y_val.shape)
    
    checkpoint_name = '{}_{}.h5'.format(RUN_NAME, fold_idx)
    lwl_callback = LwlRapEvaluation(validation_data=(X_val, y_val))
    
    callbacks_list = [
        ModelCheckpoint(checkpoint_name, save_best_only=True, save_weights_only=True),
        ReduceLROnPlateau(patience=5, factor=0.2),
        EarlyStopping(patience=8, monitor='val_loss', restore_best_weights=True),
        lwl_callback,
    ]
    
    model = ResNetlike(X_train.shape[1:], y_train.shape[-1])
    # model = BaselineModel(X_train.shape[1:])
    # model.summary()
    if LOAD_MODEL:
        print('loading: {}'.format(checkpoint_name))
        model.load_weights(checkpoint_name)
    if TRAIN_MODEL:
        print('training model...')
        model.fit(
            X_tr, y_tr, 
            validation_data=(X_val, y_val),
            batch_size=BATCH_SIZE,
            epochs=NUM_EPOCHS,
            verbose=1,
            callbacks=callbacks_list)
    print('loading best weights from current fold')
    model.load_weights(checkpoint_name)
    
    val_pred = model.predict(X_val, batch_size=BATCH_SIZE)
    oof_train[valid_idx, :] = val_pred
    oof_test[:, :, fold_idx] = model.predict(X_test, batch_size=BATCH_SIZE)
    
    val_lwlrap = calculate_overall_lwlrap_sklearn(y_val, val_pred)
    valid_scores.append(val_lwlrap)
    print("lwlrap fold: {:.4f}".format(val_lwlrap))
    # break
    
    
oof_lwl = calculate_overall_lwlrap_sklearn(labels_set, oof_train)
print('OOF LWLRAP: {:.4f}'.format(oof_lwl))

(3976, 48, 256, 1) (3976, 80)
(994, 48, 256, 1) (994, 80)
training model...
Train on 3976 samples, validate on 994 samples
Epoch 1/35

 LWLRAP - epoch: 1 - score: 0.0782 

Epoch 2/35

 LWLRAP - epoch: 2 - score: 0.0975 

Epoch 3/35

 LWLRAP - epoch: 3 - score: 0.0830 

Epoch 4/35

 LWLRAP - epoch: 4 - score: 0.0762 

Epoch 5/35

 LWLRAP - epoch: 5 - score: 0.0893 

Epoch 6/35

 LWLRAP - epoch: 6 - score: 0.0921 

Epoch 7/35

 LWLRAP - epoch: 7 - score: 0.1022 

Epoch 8/35

 LWLRAP - epoch: 8 - score: 0.0910 

Epoch 9/35

 LWLRAP - epoch: 9 - score: 0.0865 

Epoch 10/35

 LWLRAP - epoch: 10 - score: 0.1629 

Epoch 11/35

 LWLRAP - epoch: 11 - score: 0.1000 

Epoch 12/35

 LWLRAP - epoch: 12 - score: 0.1097 

Epoch 13/35

 LWLRAP - epoch: 13 - score: 0.1027 

Epoch 14/35

 LWLRAP - epoch: 14 - score: 0.1055 

Epoch 15/35

 LWLRAP - epoch: 15 - score: 0.1513 

Epoch 16/35

 LWLRAP - epoch: 16 - score: 0.2522 

Epoch 17/35

 LWLRAP - epoch: 17 - score: 0.3422 

Epoch 18/35

 LWLRAP - epoch


 LWLRAP - epoch: 11 - score: 0.0901 

Epoch 12/35

 LWLRAP - epoch: 12 - score: 0.0959 

Epoch 13/35

 LWLRAP - epoch: 13 - score: 0.1158 

Epoch 14/35

 LWLRAP - epoch: 14 - score: 0.0974 

Epoch 15/35

 LWLRAP - epoch: 15 - score: 0.1346 

Epoch 16/35

 LWLRAP - epoch: 16 - score: 0.2621 

Epoch 17/35

 LWLRAP - epoch: 17 - score: 0.2658 

Epoch 18/35

 LWLRAP - epoch: 18 - score: 0.1959 

Epoch 19/35

 LWLRAP - epoch: 19 - score: 0.2678 

Epoch 20/35

 LWLRAP - epoch: 20 - score: 0.1563 

Epoch 21/35

 LWLRAP - epoch: 21 - score: 0.4595 

Epoch 22/35

 LWLRAP - epoch: 22 - score: 0.2611 

Epoch 23/35

 LWLRAP - epoch: 23 - score: 0.2446 

Epoch 24/35

 LWLRAP - epoch: 24 - score: 0.2228 

Epoch 25/35

 LWLRAP - epoch: 25 - score: 0.2670 

Epoch 26/35

 LWLRAP - epoch: 26 - score: 0.1991 

Epoch 27/35

 LWLRAP - epoch: 27 - score: 0.2784 

Epoch 28/35

 LWLRAP - epoch: 28 - score: 0.4690 

Epoch 29/35

 LWLRAP - epoch: 29 - score: 0.4595 

loading best weights from current fold
lwlr


 LWLRAP - epoch: 28 - score: 0.5672 

Epoch 29/35

 LWLRAP - epoch: 29 - score: 0.5707 

Epoch 30/35

 LWLRAP - epoch: 30 - score: 0.5547 

Epoch 31/35

 LWLRAP - epoch: 31 - score: 0.5286 

Epoch 32/35

 LWLRAP - epoch: 32 - score: 0.5627 

Epoch 33/35

 LWLRAP - epoch: 33 - score: 0.5816 

Epoch 34/35

 LWLRAP - epoch: 34 - score: 0.5810 

Epoch 35/35

 LWLRAP - epoch: 35 - score: 0.5786 

loading best weights from current fold
lwlrap fold: 0.5810
(3976, 48, 256, 1) (3976, 80)
(994, 48, 256, 1) (994, 80)
training model...
Train on 3976 samples, validate on 994 samples
Epoch 1/35

 LWLRAP - epoch: 1 - score: 0.0710 

Epoch 2/35

 LWLRAP - epoch: 2 - score: 0.0812 

Epoch 3/35

 LWLRAP - epoch: 3 - score: 0.0799 

Epoch 4/35

 LWLRAP - epoch: 4 - score: 0.1056 

Epoch 5/35

 LWLRAP - epoch: 5 - score: 0.0677 

Epoch 6/35

 LWLRAP - epoch: 6 - score: 0.0895 

Epoch 7/35

 LWLRAP - epoch: 7 - score: 0.0934 

Epoch 8/35

 LWLRAP - epoch: 8 - score: 0.0742 

Epoch 9/35

 LWLRAP - epoch: 9


 LWLRAP - epoch: 3 - score: 0.0829 

Epoch 4/35

 LWLRAP - epoch: 4 - score: 0.0959 

Epoch 5/35

 LWLRAP - epoch: 5 - score: 0.1048 

Epoch 6/35

 LWLRAP - epoch: 6 - score: 0.0970 

Epoch 7/35

 LWLRAP - epoch: 7 - score: 0.0913 

Epoch 8/35

 LWLRAP - epoch: 8 - score: 0.1060 

Epoch 9/35

 LWLRAP - epoch: 9 - score: 0.1010 

Epoch 10/35

 LWLRAP - epoch: 10 - score: 0.1160 

Epoch 11/35

 LWLRAP - epoch: 11 - score: 0.1092 

Epoch 12/35

 LWLRAP - epoch: 12 - score: 0.1297 

Epoch 13/35

 LWLRAP - epoch: 13 - score: 0.1182 

Epoch 14/35

 LWLRAP - epoch: 14 - score: 0.1044 

Epoch 15/35

 LWLRAP - epoch: 15 - score: 0.1078 

Epoch 16/35

 LWLRAP - epoch: 16 - score: 0.1795 

Epoch 17/35

 LWLRAP - epoch: 17 - score: 0.2729 

Epoch 18/35

 LWLRAP - epoch: 18 - score: 0.2547 

Epoch 19/35

 LWLRAP - epoch: 19 - score: 0.2270 

Epoch 20/35

 LWLRAP - epoch: 20 - score: 0.4321 

Epoch 21/35

 LWLRAP - epoch: 21 - score: 0.3088 

Epoch 22/35

 LWLRAP - epoch: 22 - score: 0.2494 

Epoch

In [14]:
oof_test_mean = oof_test.mean(axis=-1)
print(oof_test_mean.shape)

sort_idx = np.argsort(classes).astype(int)
oof_test_mean_sorted = oof_test_mean[:, sort_idx]
sample_submission.iloc[:, 1:] =  oof_test_mean_sorted
if SAVE_SUBMISSION:
    # sample_submission.to_csv('{}_lwl_{:.4f}.csv'.format(RUN_NAME, oof_lwl, index=False))
    sample_submission.to_csv('submission.csv', index=False)
    print(sample_submission.shape)
    
sample_submission.head()

(1120, 80)
(1120, 81)


Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,Gurgling,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,7.630934e-06,1.126024e-06,9.557936e-06,7.191704e-06,5.745985e-06,0.002253374,6.174082e-06,0.02355557,0.003019212,0.001116,0.0001127185,0.0004156595,1.125294e-07,6.142814e-06,0.007523261,7.807558e-06,0.002217286,0.019559,2e-06,0.000231,0.000821,0.002206258,0.116611,7.487527e-06,5.106073e-05,0.054577,0.003496,0.0001380159,0.007563,8.464166e-06,0.0003790464,1.796575e-05,0.0007161554,0.001204408,0.02411856,0.010561,0.001141314,3.99507e-06,0.0006569998,0.0008718882,8.443955e-05,0.008851263,0.03842605,0.292134,1.648564e-05,3.101176e-06,0.0007125412,5.951346e-06,3.640461e-05,0.0004123669,0.00021,9.366042e-07,0.000145,0.0001422188,5.197266e-08,0.011268,9.356048e-06,0.128158,0.0001711372,0.140231,0.0003475408,0.007002,1.862208e-06,0.0004675094,0.001831639,0.002119,0.000305114,3.1472e-06,8.9e-05,0.001445,0.0002000678,1.137943e-06,0.001875239,3.074506e-05,0.004461727,1.493053e-05,0.003263234,0.06970076,0.0001609087,0.0004525526
1,0012633b.wav,0.007945251,6.793572e-07,2.767486e-07,4.266541e-05,0.0003476241,5.21742e-06,1.051208e-06,0.01277254,2.783864e-07,0.000156,9.149866e-05,0.008942229,0.000118512,4.317892e-05,0.02516754,0.0001578456,3.880692e-07,8.5e-05,1.6e-05,1.5e-05,0.000534,0.06352894,0.000111,0.0002674633,0.0003477763,1.4e-05,4e-06,0.08569744,4.8e-05,5.71199e-07,0.0006306472,1.985967e-05,0.0007372833,0.001520966,3.733742e-08,0.000255,8.873779e-05,1.981254e-12,1.304141e-07,0.003926818,2.210444e-06,1.873633e-06,0.003355232,2.9e-05,5.773754e-05,0.0002419432,0.000889962,2.745302e-09,0.0002020297,0.00167287,0.000725,0.02838649,0.002354,0.1803961,0.0001049741,3e-06,0.0393447,0.001237,3.008439e-05,1.9e-05,0.0002220316,0.000832,0.001536778,0.001044683,0.0001713921,0.005715,0.0001108031,9.399754e-08,7e-06,0.000114,0.01511131,0.0001009377,1.844445e-05,0.1155871,0.001066032,0.02837723,0.0001594179,0.009546124,0.0002999285,0.3472851
2,001ed5f1.wav,0.0004914838,5.95993e-07,4.253546e-06,2.10295e-05,0.0001089857,0.05557509,3.716875e-05,0.0002423663,1.361906e-05,0.000168,0.001586857,4.781486e-06,2.390796e-05,2.770816e-05,0.00673123,1.194379e-05,0.0001097298,4.7e-05,6.4e-05,0.00174,0.00776,0.0001945635,2.1e-05,8.320537e-05,0.07620898,0.001282,0.000364,0.03849829,5e-05,0.0004099506,0.01384116,1.452739e-07,0.0005066368,3.991843e-05,0.0001983964,3.2e-05,8.880346e-06,1.21702e-06,5.853775e-06,7.599831e-05,8.467414e-07,0.001173972,0.0004086373,0.000173,0.09300863,2.091942e-06,0.00253754,0.001043665,8.105561e-05,4.742294e-05,0.026263,0.001132772,0.006985,5.558643e-05,9.856125e-05,3.3e-05,0.01259541,0.004615,5.979117e-05,0.000834,3.193068e-06,8e-05,0.0007580077,0.3379928,0.00292585,0.133867,3.261873e-06,4.63573e-06,0.014612,0.049321,3.780541e-05,6.191605e-05,2.500981e-06,0.09177884,6.7713e-06,0.0002638801,0.0004096762,0.0009777345,0.000314997,0.008839027
3,00294be0.wav,3.017973e-05,4.028038e-07,6.008819e-06,1.118851e-05,0.002601729,3.143482e-06,7.871578e-06,0.02782175,2.041693e-06,0.000217,0.0002748477,0.003740818,1.197445e-06,6.105997e-06,0.02260438,0.03871577,2.511732e-05,0.000671,0.000168,2e-06,0.00272,8.103669e-05,0.001542,2.616761e-05,3.992716e-05,6.7e-05,2.6e-05,0.004051256,6.6e-05,2.201624e-06,0.0003012429,0.0005623194,0.01727634,0.0005731898,1.193709e-07,6.5e-05,2.994187e-05,3.593231e-09,2.430043e-06,0.02005642,0.0002965128,3.607208e-06,0.00033752,0.000116,0.0004762887,5.201666e-05,0.0005360056,2.27047e-06,3.666047e-06,0.005781172,3.3e-05,0.0002963027,0.002549,0.4489451,3.897196e-06,4.8e-05,0.00629369,0.000108,1.26218e-05,9e-06,2.91115e-05,0.001492,0.0002008946,7.094769e-05,9.866642e-05,0.000492,3.537733e-05,3.252812e-06,0.000263,0.002591,0.0007771356,3.500645e-06,0.0005882365,0.01856796,0.0008137919,3.540992e-05,0.0009361802,0.320823,0.002883691,0.03899243
4,003fde7a.wav,4.506943e-11,8.921707e-08,1.036718e-06,7.856484e-08,1.572457e-07,1.505096e-08,9.94878e-09,2.414148e-09,0.8056401,3e-06,3.039046e-07,7.447269e-10,1.652458e-12,5.385623e-08,1.13778e-07,1.309705e-09,0.1604554,5.2e-05,9e-06,1.6e-05,4e-06,5.33481e-10,6e-06,4.348518e-08,2.881525e-08,0.002008,0.027753,8.389362e-11,2.9e-05,6.607043e-07,5.820608e-09,4.084433e-08,4.364073e-08,2.58049e-07,2.657285e-06,1e-06,4.677611e-08,0.0008467334,1.36417e-06,7.464735e-08,0.0005795021,8.360082e-07,4.462873e-07,0.000136,1.325708e-08,1.742664e-11,7.846682e-09,0.00129628,5.261956e-07,7.912064e-09,0.000184,2.701101e-11,1.4e-05,1.005599e-11,1.347549e-11,3.4e-05,2.243251e-11,3e-06,1.544679e-07,0.000191,3.297388e-09,4e-06,3.136905e-10,2.702228e-07,3.415866e-07,7e-06,2.012157e-10,3.186759e-07,3.5e-05,0.000681,4.628647e-10,1.20694e-08,7.399385e-07,1.406988e-10,4.495905e-07,8.433536e-14,8.252657e-08,1.030768e-09,8.2194e-09,6.913077e-12
