This is the fork of the inference kernel that got 80-th place on public LB with 70.2 LWLRAP and will likely be disqulified due to OOM from stage 2 evaluation - too large batch size was chosen.
Brief summary:
* PCEN used as features
* Pure CNN with global average pooling to handle different input size at training and inference stage
* Tiling used instead of padding for short files. At training stage maxshape = 500 was used, at inference - 4893
* Using SpatialDropout in CNN helped a lot. Using PRelu helped a bit.
* Model was first trained on curated data. Then curated + noisy with sample_weight 0.8 for noisy data.
* 5 fold cv was used using skmultilearn.model_selection.IterativeStratification

based on : 
* https://www.kaggle.com/CVxTz/keras-cnn-starter
* https://www.kaggle.com/jmourad100/keras-eda-and-cnn-starter

# || Loading Packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, time, random, cv2, glob, pickle, librosa
from pathlib import Path
from PIL import Image
import imgaug as ia
from imgaug import augmenters as iaa
from tqdm import tqdm

from keras.models import Model, Sequential
from keras.layers import (Convolution1D, Input, Dense, Flatten, Dropout, GlobalAveragePooling1D, concatenate,
                          Activation, MaxPool1D, GlobalMaxPool1D, BatchNormalization, Concatenate, ReLU, LeakyReLU, GRU, Masking, 
                          Conv2D,MaxPooling2D, GlobalAveragePooling2D,GlobalMaxPooling2D, Conv1D,MaxPooling1D, Reshape, SpatialDropout2D, PReLU)
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, Callback
from keras.optimizers import Adam, SGD, RMSprop
from keras.losses import sparse_categorical_crossentropy
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.constraints import max_norm, MinMaxNorm
print(os.listdir("../input"))

# || Configuration

In [None]:
t_start = time.time()

# Keras reproduce score (then init all model seed)
seed_nb=14
import numpy as np 
np.random.seed(seed_nb)
import tensorflow as tf
tf.set_random_seed(seed_nb)

# || Data Preparation

In [None]:
maxshape = 4893 #inference mod, for training 500 was used
import random

from scipy import ndimage
def pad2d(inp, use_mfcc=True, use_conv2d=True, augment=False, maxshape=maxshape, channels=1):
    inp = inp.astype(np.float64)

    if augment:
        scale = random.uniform(0.7, 1.3)
        inp = ndimage.zoom(inp, (1, scale))
    if inp.shape[1] >= maxshape:
        if augment:
            offset = int(random.uniform(0, 1) * (inp.shape[1] - maxshape))
            out =  inp[:, offset:offset + maxshape] 
        else:
            out =  inp[:, :maxshape] 
    else:
        offset = int(random.uniform(0, 1) * inp.shape[1] * 0.1)
        out =  inp[:, offset:] 
        ntiles = int(maxshape / inp.shape[1]) + 1
        out = np.tile(inp, (1, ntiles))[:, :maxshape]
    if use_conv2d:
        res = out.T.reshape((out.T.shape[0],out.T.shape[1], 1))
        if channels > 1:
            res = np.repeat(res, 3, axis=-1)
        return res
    else:
        return out.T

In [None]:
import sklearn.metrics
# Accumulator object version.
# Core calculation of label precisions for one test sample.

def _one_sample_positive_class_precisions(scores, truth):
  """Calculate precisions for each true class for a single sample.
  
  Args:
    scores: np.array of (num_classes,) giving the individual classifier scores.
    truth: np.array of (num_classes,) bools indicating which classes are true.

  Returns:
    pos_class_indices: np.array of indices of the true classes for this sample.
    pos_class_precisions: np.array of precisions corresponding to each of those
      classes.
  """
  num_classes = scores.shape[0]
  pos_class_indices = np.flatnonzero(truth > 0)
  # Only calculate precisions if there are some true classes.
  if not len(pos_class_indices):
    return pos_class_indices, np.zeros(0)
  # Retrieval list of classes for this sample. 
  retrieved_classes = np.argsort(scores)[::-1]
  # class_rankings[top_scoring_class_index] == 0 etc.
  class_rankings = np.zeros(num_classes, dtype=np.int)
  class_rankings[retrieved_classes] = range(num_classes)
  # Which of these is a true label?
  retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
  retrieved_class_true[class_rankings[pos_class_indices]] = True
  # Num hits for every truncated retrieval list.
  retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
  # Precision of retrieval list truncated at each hit, in order of pos_labels.
  precision_at_hits = (
      retrieved_cumulative_hits[class_rankings[pos_class_indices]] / 
      (1 + class_rankings[pos_class_indices].astype(np.float)))
  return pos_class_indices, precision_at_hits

class lwlrap_accumulator(object):
  """Accumulate batches of test samples into per-class and overall lwlrap."""  

  def __init__(self):
    self.num_classes = 0
    self.total_num_samples = 0
  
  def accumulate_samples(self, batch_truth, batch_scores):
    """Cumulate a new batch of samples into the metric.
    
    Args:
      truth: np.array of (num_samples, num_classes) giving boolean
        ground-truth of presence of that class in that sample for this batch.
      scores: np.array of (num_samples, num_classes) giving the 
        classifier-under-test's real-valued score for each class for each
        sample.
    """
    assert batch_scores.shape == batch_truth.shape
    num_samples, num_classes = batch_truth.shape
    if not self.num_classes:
      self.num_classes = num_classes
      self._per_class_cumulative_precision = np.zeros(self.num_classes)
      self._per_class_cumulative_count = np.zeros(self.num_classes, 
                                                  dtype=np.int)
    assert num_classes == self.num_classes
    for truth, scores in zip(batch_truth, batch_scores):
      pos_class_indices, precision_at_hits = (
        _one_sample_positive_class_precisions(scores, truth))
      self._per_class_cumulative_precision[pos_class_indices] += (
        precision_at_hits)
      self._per_class_cumulative_count[pos_class_indices] += 1
    self.total_num_samples += num_samples

  def per_class_lwlrap(self):
    """Return a vector of the per-class lwlraps for the accumulated samples."""
    return (self._per_class_cumulative_precision / 
            np.maximum(1, self._per_class_cumulative_count))

  def per_class_weight(self):
    """Return a normalized weight vector for the contributions of each class."""
    return (self._per_class_cumulative_count / 
            float(np.sum(self._per_class_cumulative_count)))

  def overall_lwlrap(self):
    """Return the scalar overall lwlrap for cumulated samples."""
    return np.sum(self.per_class_lwlrap() * self.per_class_weight())


In [None]:
class LwrapEvaluation(Callback):
    def __init__(self, valgen):
        super(Callback, self).__init__()
        self.valgen = valgen

    def on_epoch_end(self, epoch, logs={}):
        lwrap = lwlrap_accumulator()
        for i in range(len(self.valgen)):
            inp, truth = self.valgen[i][:2]
            pred = self.model.predict(inp)
            lwrap.accumulate_samples(truth, pred)
        print("validation lwrap: ", lwrap.overall_lwlrap())
        


In [None]:
def get_model(lr=0.001, trainable = True, print_model=True):
    nclass = 80
    model = Sequential()
    model.add(BatchNormalization(input_shape=(None, 128, 1), trainable = trainable, axis=2))
    model.add(Conv2D(filters=192, kernel_size=3, input_shape=(maxshape, 128, 1), padding="same", trainable = trainable,  kernel_initializer='he_uniform', 
                     kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(SpatialDropout2D(0.25))
    model.add(Conv2D(filters=192, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', 
              kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(SpatialDropout2D(0.25))
    model.add(MaxPooling2D(pool_size=2))
    

    model.add(Conv2D(filters=128, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(SpatialDropout2D(0.25))
    model.add(Conv2D(filters=128, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(SpatialDropout2D(0.25))
    model.add(MaxPooling2D(pool_size=2))
    

    model.add(Conv2D(filters=256, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(SpatialDropout2D(0.25))
    model.add(MaxPooling2D(pool_size=(2,2)))
    

    model.add(Conv2D(filters=512, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization(trainable = trainable))
    model.add(PReLU(shared_axes=[1,2]))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(SpatialDropout2D(0.25))
    
    model.add(Conv2D(filters=1024, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization())
    model.add(PReLU(shared_axes=[1,2]))
    model.add(MaxPooling2D(pool_size=2))
    model.add(SpatialDropout2D(0.25))
    
    model.add(Conv2D(filters=512, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization())
    model.add(PReLU(shared_axes=[1,2]))
    model.add(MaxPooling2D(pool_size=(2,1)))
    model.add(SpatialDropout2D(0.25))
    
    model.add(Conv2D(filters=256, kernel_size=3, padding="same", trainable = trainable,  kernel_initializer='he_uniform', kernel_constraint=max_norm(2.)))
    model.add(BatchNormalization())
    model.add(PReLU(shared_axes=[1,2]))
    model.add(MaxPooling2D(pool_size=(2,1)))
    model.add(SpatialDropout2D(0.25))
    
    model.add(Conv2D(filters=nclass, kernel_size=(1,1), padding="valid", trainable = trainable,  kernel_initializer='he_uniform', activation="sigmoid"))
    model.add(GlobalAveragePooling2D())
    
    model.compile(optimizer=Adam(lr), loss='binary_crossentropy', metrics=['categorical_accuracy'])
    if print_model:
        model.summary()
    return model

In [None]:
softened_weights = {0:"../input/softened-noisy-kfold-fold-0/validation_lwrap_curated_noisy_0_best_weight.h5",
                   1:"../input/softened-noisy-kfold-fold-1/validation_lwrap_curated_noisy_1_best_weight.h5",
                   2:"../input/softened-noisy-kfold-fold-2/validation_lwrap_curated_noisy_2_best_weight.h5",
                   3:"../input/softened-noisy-kfold-fold-3/validation_lwrap_curated_noisy_3_best_weight.h5",
                   4:"../input/softened-noisy-kfold-fold-4/validation_lwrap_curated_noisy_4_best_weight.h5"}


In [None]:
models = []
for i in range(5):
    model = get_model()
    model.load_weights(softened_weights[i])
    models.append(model)

In [None]:
df = pd.read_csv('../input/freesound-audio-tagging-2019/sample_submission.csv')
df.head(), df.shape

In [None]:
test_file_names = glob.glob("../input/freesound-audio-tagging-2019/test/*.wav")
len(test_file_names), sorted(test_file_names)[:10]

In [None]:
testfiles = ["../input/freesound-audio-tagging-2019/test/" + f for f in df.fname.values]
len(testfiles), testfiles[:10]

In [None]:
import librosa
def load_audio_file(file_path):
    data, sr = librosa.core.load(file_path, sr=44100)
    data_t, index = librosa.effects.trim(data)
    S = librosa.feature.melspectrogram(data_t, sr=sr, power=1)
    pcen = librosa.core.pcen(S, sr=sr,  gain=0.7, bias=0.1, power=0.4, time_constant=0.4, eps=1e-9)
    return pcen.astype(np.float16)

In [None]:
from tqdm import tqdm_notebook
features_test = [load_audio_file(f) for f in tqdm_notebook(testfiles)]

In [None]:
import gc
gc.collect()

In [None]:
nbatches = 400
def test_generator():
    for s in np.array_split(np.arange(len(features_test)), nbatches):
        yield np.array([pad2d(features_test[f]) for f in s])

In [None]:
preds_test = None
for m in tqdm_notebook(models):
    preds = m.predict_generator(test_generator(), nbatches, verbose=1)
    if preds_test is None:
        preds_test = preds
    else:
        preds_test += preds
preds_test = preds_test / len(models)

In [None]:
for i in range(preds_test.shape[1]):
    df.iloc[:, i + 1] = preds_test[:, i]
    

In [None]:
df.head()

In [None]:
df.to_csv("submission.csv", index=False)