In [1]:
import os
import numpy as np
import pandas as pd
import scipy
import librosa
import shutil
import keras


import keras.backend as K
import tensorflow as tf
from scipy import signal

from keras.optimizers import *

from keras.regularizers import *

from keras import regularizers, optimizers
from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.utils import *
from keras.callbacks import *
from sklearn.model_selection import *
from keras.preprocessing.image import *
os.environ['CUDA_VISIBLE_DEVICES']='2, 3'
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF

config = tf.ConfigProto()  
config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
session = tf.Session(config=config)

# 设置session
KTF.set_session(session)
from keras import backend as K
K.get_session().list_devices()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


[_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456),
 _DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:0, GPU, 10968078746),
 _DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:1, GPU, 10968078746)]

In [2]:
os.listdir('../audio-data/')
train_path = '../audio-data/audio_train/'
test_path = '../audio-data/audio_test/'
train = pd.read_csv('../audio-data/train.csv')
test = pd.read_csv('../audio-data/sample_submission.csv')
# print('training samples: ', len(os.listdir(train_path)))
# print('test samples: ', len(os.listdir(test_path)))
# print('training labels: ', len(train.label.unique()))
# print(train.head())
LABELS = list(train.label.unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
train.set_index('fname', inplace=True)
test.set_index('fname', inplace=True)
train['label_idx'] = train.label.apply(lambda x: label_idx[x])




In [3]:
class Config(object):
    def __init__(self,
                sampling_rate=44100, audio_duration=2, n_classes=41,
                use_mfcc=False, n_folds=10, learning_rate=0.0001,
                max_epochs=50, n_mfcc=20, use_log_sp=False, use_mixup=False, alpha=0.2, use_log_mel_sp=False):
        self.sampling_rate = sampling_rate
        self.audio_duration = audio_duration
        self.n_classes = n_classes
        self.use_log_sp = use_log_sp
        self.use_mfcc = use_mfcc
        self.use_mixup = use_mixup
        self.use_log_mel_sp = use_log_mel_sp
        self.alpha = alpha
        self.n_mfcc = n_mfcc
        self.n_folds = n_folds
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.audio_length = self.sampling_rate * self.audio_duration
        if self.use_mfcc:
            # np.floor 计算比每一个元素小或相等的最大的整数
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length/512)), 1)
        elif self.use_log_sp:
            self.dim = (self.audio_duration*100-1, self.sampling_rate//100+1, 3)
        elif self.use_log_mel_sp:
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length/512)), 1)
        else:
            self.dim = (self.audio_length, 1)
            






In [4]:
# spectrogram.shape[0] = time*100 - 1
# spectrogram.shape[1] = rate/100 + 1
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data - min_data) / (max_data - min_data + 1e-6)
    return data - 0.5

In [5]:
import numpy as np


class MixupGenerator():
    def __init__(self, X_train, y_train, batch_size=32, alpha=0.2, shuffle=True, datagen=None):
        self.X_train = X_train
        self.y_train = y_train
        self.batch_size = batch_size
        self.alpha = alpha
        self.shuffle = shuffle
        self.sample_num = len(X_train)
        self.datagen = datagen

    def __call__(self):
        while True:
            indexes = self.__get_exploration_order()
            itr_num = int(len(indexes) // (self.batch_size * 2))

            for i in range(itr_num):
                batch_ids = indexes[i * self.batch_size * 2:(i + 1) * self.batch_size * 2]
                X, y = self.__data_generation(batch_ids)

                yield X, y

    def __get_exploration_order(self):
        indexes = np.arange(self.sample_num)

        if self.shuffle:
            np.random.shuffle(indexes)

        return indexes

    def __data_generation(self, batch_ids):
        _, h, w, c = self.X_train.shape
        l = np.random.beta(self.alpha, self.alpha, self.batch_size)
        X_l = l.reshape(self.batch_size, 1, 1, 1)
        y_l = l.reshape(self.batch_size, 1)

        X1 = self.X_train[batch_ids[:self.batch_size]]
        X2 = self.X_train[batch_ids[self.batch_size:]]
        X = X1 * X_l + X2 * (1 - X_l)

        if self.datagen:
            for i in range(self.batch_size):
                X[i] = self.datagen.random_transform(X[i])
                X[i] = self.datagen.standardize(X[i])

        if isinstance(self.y_train, list):
            y = []

            for y_train_ in self.y_train:
                y1 = y_train_[batch_ids[:self.batch_size]]
                y2 = y_train_[batch_ids[self.batch_size:]]
                y.append(y1 * y_l + y2 * (1 - y_l))
        else:
            y1 = self.y_train[batch_ids[:self.batch_size]]
            y2 = self.y_train[batch_ids[self.batch_size:]]
            y = y1 * y_l + y2 * (1 - y_l)

        return X, y

In [6]:
def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255):
    def eraser(input_img):
        img_h, img_w, _ = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        c = np.random.uniform(v_l, v_h)
        input_img[top:top + h, left:left + w, :] = c

        return input_img

    return eraser

In [7]:
%%time
X_train = np.load('X_44100x5_log_mel_sp_train.npy')
X_test = np.load('X_44100x5_log_mel_sp_test.npy')
# X_train = np.load('X_1d_train.npy')
# X_test = np.load('X_1d_test.npy')
y_train = np.load('y_train.npy')

print(X_train.shape, X_test.shape, y_train.shape)

(9473, 256, 431, 1) (9400, 256, 431, 1) (9473, 41)
CPU times: user 271 ms, sys: 10.8 s, total: 11.1 s
Wall time: 20.4 s


In [8]:
%%time
def normalization(x):
    mean = np.mean(x, axis=0)
    std = np.std(x, axis=0)
    x = (x - mean) / std
    return x
X_train = normalization(X_train)
X_test = normalization(X_test)
print(X_train.shape, X_test.shape)

(9473, 256, 431, 1) (9400, 256, 431, 1)
CPU times: user 20.6 s, sys: 27.2 s, total: 47.8 s
Wall time: 46.1 s


In [19]:
%%time
def mixup(data, targets, alpha):
        size = data.shape[0]
        weight = np.random.beta(alpha, alpha, size)
      
        x_weight = weight.reshape(size, 1, 1, 1)
#         x_weight = weight.reshape(size, 1, 1)
        y_weight = weight.reshape(size, 1)
        index = np.random.permutation(size)
        x1, x2 = data, data[index]
        x = x1 * x_weight + x2 * (1 - x_weight)
        y1, y2 = targets, targets[index]
        y = y1 * y_weight + y2 * (1 - y_weight)
        return x, y
X_train_mixup, y_train_mixup = mixup(X_train, y_train, 2)
print(X_train_mixup.shape, y_train_mixup.shape)

(9473, 256, 431, 1) (9473, 41)
CPU times: user 6.57 s, sys: 13.8 s, total: 20.4 s
Wall time: 20 s


In [9]:
class ParallelModelCheckpoint(ModelCheckpoint):
    def __init__(self,model,filepath, monitor='val_loss', verbose=1,
                 save_best_only=True, save_weights_only=True,
                 mode='auto', period=1):
        self.single_model = model
        super(ParallelModelCheckpoint,self).__init__(filepath, monitor, verbose,save_best_only, save_weights_only,mode, period)

    def set_model(self, model):
        super(ParallelModelCheckpoint,self).set_model(self.single_model)


class CustomModelCheckpoint(keras.callbacks.Callback):

    def __init__(self, model, path):
        self.model = model
        self.path = path
        self.best_loss = np.inf

    def on_epoch_end(self, epoch, logs=None):
        val_loss = logs['val_loss']
        if val_loss < self.best_loss:
            print("\nValidation loss decreased from {} to {}, saving model".format(self.best_loss, val_loss))
            self.model.save_weights(self.path, overwrite=True)
            self.best_loss = val_loss



In [10]:
def get_model(config):
    nclass = config.n_classes
    
    inp = Input(shape=(config.dim[0], config.dim[1], 1))
    x = Convolution2D(32, (3,3), padding="same")(inp)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(32, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(64, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(64, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)

    x = Convolution2D(128, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(128, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
#     x = Flatten()(x)
    x = GlobalAveragePooling2D()(x)
#     x = Dropout(0.5)(x)
#     x = Dense(256)(x)
# #     x = BatchNormalization()(x)
#     x = Dropout(0.5)(x)
#     x = Activation("relu")(x)
    out = Dense(nclass, activation='softmax')(x)

    model = Model(inputs=inp, outputs=out)
    model.summary()
    print(len(model.layers))
    print(config.dim)
    return model

In [10]:
%%time
# log mel sp 44100x5
            
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=7, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5"
CHECKPOINT_FOLDER = 'checkpionts_log_mel_sp_44000x5'



if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 64

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
#     parallel_model = multi_gpu_model(model, gpus=2)
    
    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    
#     adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6, amsgrad=True)
#     model.compile(loss='categorical_crossentropy',
#                        optimizer=adam,
#                          metrics=['acc'])
    sgd = SGD(lr=0.00001, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    #     checkpoint = ModelCheckpoint(CHECKPOINT_FOLDER+'/best_%d.h5'%i, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)
    checkpoint = CustomModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=10)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks_list = [checkpoint, early, tb, rlrop, log]
    
    history = model.fit(X, y, validation_data=(X_val, y_val), callbacks=[checkpoint], 
                        batch_size=batch_size, epochs=config.max_epochs, shuffle=True)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
#     parallel_model = multi_gpu_model(model, gpus=2)


    
    # Save train predictions
    predictions = model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)





train shape:  (8108, 256, 431, 1) (8108, 41)
val shape:  (1365, 256, 431, 1) (1365, 41)
Fold:  0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_7 (Batch (None, 256, 431, 32)      128       
_________________________________________________________________
activation_7 (Activation)    (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 128, 215, 32)      9248      
_____________________________________________

Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300

Validation loss decreased from 1.1532698383261433 to 0.8937533621386294, saving model
Epoch 30/300

Validation loss decreased from 0.8937533621386294 to 0.8878387273012937, saving model
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
train shape:  (8111, 256, 431, 1) (8111, 41)
val shape:  (1362, 256, 431, 1) (1362, 41)
Fold:  1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_13 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activa


Validation loss decreased from 2.1430226736958633 to 2.075886449946979, saving model
Epoch 7/300

Validation loss decreased from 2.075886449946979 to 1.8394100101969857, saving model
Epoch 8/300

Validation loss decreased from 1.8394100101969857 to 1.8320850184697774, saving model
Epoch 9/300

Validation loss decreased from 1.8320850184697774 to 1.592461321741058, saving model
Epoch 10/300

Validation loss decreased from 1.592461321741058 to 1.5311667544275938, saving model
Epoch 11/300

Validation loss decreased from 1.5311667544275938 to 1.2710677104167951, saving model
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300

Validation loss decreased from 1.2710677104167951 to 1.2397375239071646, saving model
Epoch 18/300
Epoch 19/300
Epoch 20/300

Validation loss decreased from 1.2397375239071646 to 1.074134856309197, saving model
Epoch 21/300

Validation loss decreased from 1.074134856309197 to 1.0196874822263415, saving model
Epoch 22/300
Epoch 23/300
Epoch

Train on 8115 samples, validate on 1358 samples
Epoch 1/300

Validation loss decreased from inf to 4.784766863652989, saving model
Epoch 2/300

Validation loss decreased from 4.784766863652989 to 3.2227633540163336, saving model
Epoch 3/300

Validation loss decreased from 3.2227633540163336 to 2.7766234607865075, saving model
Epoch 4/300
Epoch 5/300

Validation loss decreased from 2.7766234607865075 to 2.4392727386969004, saving model
Epoch 6/300

Validation loss decreased from 2.4392727386969004 to 1.7061841682762742, saving model
Epoch 7/300
Epoch 8/300

Validation loss decreased from 1.7061841682762742 to 1.638481173845384, saving model
Epoch 9/300
Epoch 10/300
Epoch 11/300

Validation loss decreased from 1.638481173845384 to 1.2191354650812052, saving model
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300

Validation loss decreased from 1.2191354650812052 to 0.8310241866006416, saving model
Epoch 19/300
Epoch 20/300
Epoch 21/300

Validation

Train on 8121 samples, validate on 1352 samples
Epoch 1/300

Validation loss decreased from inf to 5.730176327496591, saving model
Epoch 2/300

Validation loss decreased from 5.730176327496591 to 2.4785092088597764, saving model
Epoch 3/300
Epoch 4/300

Validation loss decreased from 2.4785092088597764 to 1.985259339654234, saving model
Epoch 5/300

Validation loss decreased from 1.985259339654234 to 1.5234238381921892, saving model
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300

Validation loss decreased from 1.5234238381921892 to 1.360217403378007, saving model
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300

Validation loss decreased from 1.360217403378007 to 1.2784422586655475, saving model
Epoch 14/300
Epoch 15/300
Epoch 16/300

Validation loss decreased from 1.2784422586655475 to 1.1511600497206287, saving model
Epoch 17/300
Epoch 18/300
Epoch 19/300

Validation loss decreased from 1.1511600497206287 to 1.1002345071036435, saving model
Epoch 20/300
Epoch 21/300
Epoch 22/300


Train on 8125 samples, validate on 1348 samples
Epoch 1/300

Validation loss decreased from inf to 3.920248627308922, saving model
Epoch 2/300
Epoch 3/300

Validation loss decreased from 3.920248627308922 to 3.2712998814681873, saving model
Epoch 4/300

Validation loss decreased from 3.2712998814681873 to 2.3671821769691714, saving model
Epoch 5/300

Validation loss decreased from 2.3671821769691714 to 2.3171965524067866, saving model
Epoch 6/300
Epoch 7/300

Validation loss decreased from 2.3171965524067866 to 1.5602062643458652, saving model
Epoch 8/300
Epoch 9/300

Validation loss decreased from 1.5602062643458652 to 1.4814677259688562, saving model
Epoch 10/300

Validation loss decreased from 1.4814677259688562 to 1.4387312455417849, saving model
Epoch 11/300

Validation loss decreased from 1.4387312455417849 to 1.2171408077729562, saving model
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300

Validation loss decreased from 1.21714080777295

Train on 8146 samples, validate on 1327 samples
Epoch 1/300

Validation loss decreased from inf to 5.883746755509423, saving model
Epoch 2/300

Validation loss decreased from 5.883746755509423 to 2.8949762623204087, saving model
Epoch 3/300

Validation loss decreased from 2.8949762623204087 to 2.505142078780553, saving model
Epoch 4/300
Epoch 5/300

Validation loss decreased from 2.505142078780553 to 1.8076874947494013, saving model
Epoch 6/300

Validation loss decreased from 1.8076874947494013 to 1.517361645601527, saving model
Epoch 7/300
Epoch 8/300

Validation loss decreased from 1.517361645601527 to 1.5063562401273507, saving model
Epoch 9/300

Validation loss decreased from 1.5063562401273507 to 1.3748614292173522, saving model
Epoch 10/300
Epoch 13/300

Validation loss decreased from 1.2604577995909725 to 1.2501407837454501, saving model
Epoch 14/300
Epoch 15/300
Epoch 16/300

Validation loss decreased from 1.2501407837454501 to 1.2213207538988506, saving model
Epoch 17/300
Epoc

In [11]:
%%time
# log_mel_sp_44100x5 fine tune 
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=7, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5"
CHECKPOINT_FOLDER = 'checkpionts_log_mel_sp_44000x5'



if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 64

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
#     parallel_model = multi_gpu_model(model, gpus=2)
    
    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    
#     adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6, amsgrad=True)
#     model.compile(loss='categorical_crossentropy',
#                        optimizer=adam,
#                          metrics=['acc'])
    sgd = SGD(lr=0.00001, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    #     checkpoint = ModelCheckpoint(CHECKPOINT_FOLDER+'/best_%d.h5'%i, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)
    checkpoint = CustomModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=10)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks_list = [checkpoint, early, tb, rlrop, log]
    
    history = model.fit(X, y, validation_data=(X_val, y_val), callbacks=[checkpoint], 
                        batch_size=batch_size, epochs=10, shuffle=True)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
#     parallel_model = multi_gpu_model(model, gpus=2)


    
    # Save train predictions
    predictions = model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)






train shape:  (8108, 256, 431, 1) (8108, 41)
val shape:  (1365, 256, 431, 1) (1365, 41)
Fold:  0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_49 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_49 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_49 (Activation)   (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_41 (MaxPooling (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_50 (Conv2D)           (None, 128, 215, 32)      9248      
_____________________________________________

Train on 8111 samples, validate on 1362 samples
Epoch 1/10

Validation loss decreased from inf to 0.860225069715413, saving model
Epoch 2/10
Epoch 3/10

Validation loss decreased from 0.860225069715413 to 0.8601464948640171, saving model
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

Validation loss decreased from 0.8601464948640171 to 0.8599245737302671, saving model
Epoch 8/10
Epoch 9/10
Epoch 10/10
train shape:  (8112, 256, 431, 1) (8112, 41)
val shape:  (1361, 256, 431, 1) (1361, 41)
Fold:  2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_61 (Batc (None, 256, 431, 32)      128       
______________________________________

Train on 8115 samples, validate on 1358 samples
Epoch 1/10

Validation loss decreased from inf to 0.8114897305905732, saving model
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train shape:  (8121, 256, 431, 1) (8121, 41)
val shape:  (1352, 256, 431, 1) (1352, 41)
Fold:  4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_73 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_73 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_73 (Activation)   (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2

Train on 8125 samples, validate on 1348 samples
Epoch 1/10

Validation loss decreased from inf to 0.8677879699702079, saving model
Epoch 2/10

Validation loss decreased from 0.8677879699702079 to 0.8671770688334275, saving model
Epoch 3/10
Epoch 4/10

Validation loss decreased from 0.8671770688334275 to 0.8670146671353178, saving model
Epoch 5/10

Validation loss decreased from 0.8670146671353178 to 0.8667577261033327, saving model
Epoch 6/10
Epoch 7/10

Validation loss decreased from 0.8667577261033327 to 0.8666219865621374, saving model
Epoch 8/10
Epoch 9/10
Epoch 10/10
train shape:  (8146, 256, 431, 1) (8146, 41)
val shape:  (1327, 256, 431, 1) (1327, 41)
Fold:  6
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_85 (Conv2D)           (None, 256, 431, 32)      320  

In [11]:
%%time

# log mel sp 44100x5 mixup
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=7, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5_mixup"
CHECKPOINT_FOLDER = 'checkpoints_log_mel_sp_44000x5_mixup'

if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 64

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
    if i < 1:
        continue
    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
#     parallel_model = multi_gpu_model(model, gpus=2)
    
#     model.load_weights('checkpoint_1d_24000x5' + '/best_99.h5')
    
    adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-7, amsgrad=True)
    model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                         metrics=['acc'])

#     sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#     parallel_model.compile(loss='categorical_crossentropy',
#                   optimizer=sgd,
#                   metrics=['acc'])

    checkpoint = ParallelModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=15)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks = [checkpoint, early, tb, rlrop, log]
    
    training_generator = MixupGenerator(X, y, batch_size=batch_size, alpha=0.5, datagen=None)()
    model.fit_generator(generator=training_generator,
                        steps_per_epoch=X.shape[0] // batch_size,
                        validation_data=(X_val, y_val),
                        epochs=config.max_epochs, verbose=1,
                        callbacks=callbacks)
    
    
# Fine tune
#     model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5' % i)
#     K.set_value(model.optimizer.lr, 0.00001)
#     parallel_model.fit_generator(mixupgen(),
#                         steps_per_epoch=X_train.shape[0] // batch_size,
#                         epochs=10,
#                         validation_data=test_datagen.flow(X_val, y_val), callbacks=None)
#     parallel_model.save_weights(CHECKPOINT_FOLDER + '/besh_%d.h5' % i)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
#     parallel_model = multi_gpu_model(model, gpus=2)

    
    # Save train predictions
    predictions = model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)







train shape:  (8111, 256, 431, 1) (8111, 41)
val shape:  (1362, 256, 431, 1) (1362, 41)
Fold:  1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 256, 431, 32)      128       
_________________________________________________________________
activation_1 (Activation)    (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 215, 32)      9248      
_____________________________________________


Epoch 00021: val_loss did not improve
Epoch 22/300

Epoch 00022: val_loss improved from 1.11288 to 1.06991, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_1.h5
Epoch 23/300

Epoch 00023: val_loss did not improve
Epoch 24/300

Epoch 00024: val_loss did not improve
Epoch 25/300

Epoch 00025: val_loss did not improve
Epoch 26/300

Epoch 00026: val_loss did not improve
Epoch 27/300

Epoch 00027: val_loss did not improve
Epoch 28/300

Epoch 00028: val_loss did not improve
Epoch 29/300

Epoch 00029: val_loss improved from 1.06991 to 0.81870, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_1.h5
Epoch 30/300

Epoch 00030: val_loss improved from 0.81870 to 0.81331, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_1.h5
Epoch 31/300

Epoch 00031: val_loss improved from 0.81331 to 0.80123, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_1.h5
Epoch 32/300

Epoch 00032: val_loss did not improve
Epoch 33/300

Epoch 00033: val_loss improved from 0.80123 to 0.80

Epoch 1/300

Epoch 00001: val_loss improved from inf to 4.10816, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 2/300

Epoch 00002: val_loss improved from 4.10816 to 3.42952, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 3/300

Epoch 00003: val_loss improved from 3.42952 to 2.98713, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 4/300

Epoch 00004: val_loss did not improve
Epoch 5/300

Epoch 00005: val_loss improved from 2.98713 to 1.98158, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss improved from 1.98158 to 1.81028, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.81028 to 1.68022, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_2.h5
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss improved from 1.68022 t

Epoch 1/300

Epoch 00001: val_loss improved from inf to 4.19594, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 2/300

Epoch 00002: val_loss did not improve
Epoch 3/300

Epoch 00003: val_loss improved from 4.19594 to 3.38787, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 4/300

Epoch 00004: val_loss improved from 3.38787 to 2.45754, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 5/300

Epoch 00005: val_loss improved from 2.45754 to 2.29675, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss improved from 2.29675 to 1.68438, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.68438 to 1.48220, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_3.h5
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss did not improve
Epoch 1

Epoch 1/300

Epoch 00001: val_loss improved from inf to 3.24089, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 2/300

Epoch 00002: val_loss did not improve
Epoch 3/300

Epoch 00003: val_loss improved from 3.24089 to 2.46435, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 4/300

Epoch 00004: val_loss improved from 2.46435 to 2.33790, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 5/300

Epoch 00005: val_loss improved from 2.33790 to 2.23069, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 6/300

Epoch 00006: val_loss improved from 2.23069 to 1.63679, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 7/300

Epoch 00007: val_loss improved from 1.63679 to 1.46855, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.46855 to 1.39606, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_4.h5
Epoch 9/300

Epoch 00009: val


Epoch 00039: val_loss did not improve
Epoch 40/300

Epoch 00040: val_loss did not improve
Epoch 41/300

Epoch 00041: val_loss did not improve
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss did not improve
Epoch 46/300

Epoch 00046: val_loss did not improve
Epoch 47/300

Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss did not improve
Epoch 51/300

Epoch 00051: val_loss did not improve
Epoch 52/300

Epoch 00052: val_loss did not improve
Epoch 53/300

Epoch 00053: val_loss did not improve
train shape:  (8125, 256, 431, 1) (8125, 41)
val shape:  (1348, 256, 431, 1) (1348, 41)
Fold:  5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inpu


Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss did not improve
Epoch 51/300

Epoch 00051: val_loss did not improve
Epoch 52/300

Epoch 00052: val_loss did not improve
Epoch 53/300

Epoch 00053: val_loss did not improve
Epoch 54/300

Epoch 00054: val_loss did not improve
Epoch 55/300

Epoch 00055: val_loss did not improve
Epoch 56/300

Epoch 00056: val_loss did not improve
Epoch 57/300

Epoch 00057: val_loss improved from 0.81188 to 0.81153, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_5.h5
Epoch 58/300

Epoch 00058: val_loss did not improve
Epoch 59/300

Epoch 00059: val_loss did not improve
Epoch 60/300

Epoch 00060: val_loss did not improve
Epoch 61/300

Epoch 00061: val_loss did not improve
Epoch 62/300

Epoch 00062: val_loss did not improve
Epoch 63/300

Epoch 00063: val_loss did not improve
Epoch 64/300

Epoch 00064: val_loss did not impr

Epoch 1/300

Epoch 00001: val_loss improved from inf to 3.08575, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_6.h5
Epoch 2/300

Epoch 00002: val_loss did not improve
Epoch 3/300

Epoch 00003: val_loss improved from 3.08575 to 2.31058, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_6.h5
Epoch 4/300

Epoch 00004: val_loss did not improve
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss improved from 2.31058 to 1.93247, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_6.h5
Epoch 7/300

Epoch 00007: val_loss improved from 1.93247 to 1.40911, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_6.h5
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss improved from 1.40911 to 1.30379, saving model to checkpoints_log_mel_sp_44000x5_mixup/best_6.h5
Epoch 11/300

Epoch 00011: val_loss did not improve
Epoch 12/300

Epoch 00012: val_loss 

In [13]:
%%time

# log mel sp 44100x5 image_aug
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=10, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5_image_aug_10f"
CHECKPOINT_FOLDER = 'checkpoints_log_mel_sp_44000x5_image_aug_10f'

if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 142

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):

    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
    parallel_model = multi_gpu_model(model, gpus=2)
    
#     model.load_weights('checkpoint_1d_24000x5' + '/best_99.h5')
    
    adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-7, amsgrad=True)
    parallel_model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                         metrics=['acc'])

#     sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#     parallel_model.compile(loss='categorical_crossentropy',
#                   optimizer=sgd,
#                   metrics=['acc'])

    checkpoint = ParallelModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=15)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks = [checkpoint, early, tb, rlrop, log]
    
    
    datagen = ImageDataGenerator(
    featurewise_center=True,  # set input mean to 0 over the dataset
    featurewise_std_normalization=True,  # divide inputs by std of the dataset
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
    preprocessing_function=get_random_eraser(v_l=np.min(X), v_h=np.max(X)) # RANDOM ERASER
    )
    
    datagen.fit(X)
    training_generator = MixupGenerator(X, y, batch_size=batch_size, alpha=0.5, datagen=datagen)()
    
    parallel_model.fit_generator(generator=training_generator,
                        steps_per_epoch=X.shape[0] // batch_size,
                        validation_data=(X_val, y_val),
                        epochs=config.max_epochs, verbose=1,
                        callbacks=callbacks)
    
    
# Fine tune
#     model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5' % i)
#     K.set_value(model.optimizer.lr, 0.00001)
#     parallel_model.fit_generator(mixupgen(),
#                         steps_per_epoch=X_train.shape[0] // batch_size,
#                         epochs=10,
#                         validation_data=test_datagen.flow(X_val, y_val), callbacks=None)
#     parallel_model.save_weights(CHECKPOINT_FOLDER + '/besh_%d.h5' % i)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    parallel_model = multi_gpu_model(model, gpus=2)

    
    # Save train predictions
    predictions = parallel_model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = parallel_model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)








train shape:  (8518, 256, 431, 1) (8518, 41)
val shape:  (955, 256, 431, 1) (955, 41)
Fold:  0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_19 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_19 (Activation)   (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 128, 215, 32)      9248      
_______________________________________________

train shape:  (8520, 256, 431, 1) (8520, 41)
val shape:  (953, 256, 431, 1) (953, 41)
Fold:  1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_25 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_25 (Activation)   (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 128, 215, 32)      9248      
_______________________________________________


Epoch 00020: val_loss improved from 1.16912 to 1.16226, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_1.h5
Epoch 21/300

Epoch 00021: val_loss improved from 1.16226 to 0.99920, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_1.h5
Epoch 22/300

Epoch 00022: val_loss did not improve
Epoch 23/300

Epoch 00023: val_loss did not improve
Epoch 24/300

Epoch 00024: val_loss did not improve
Epoch 25/300

Epoch 00025: val_loss did not improve
Epoch 26/300

Epoch 00026: val_loss did not improve
Epoch 27/300

Epoch 00027: val_loss did not improve
Epoch 28/300

Epoch 00028: val_loss improved from 0.99920 to 0.94388, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_1.h5
Epoch 29/300

Epoch 00029: val_loss improved from 0.94388 to 0.85678, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_1.h5
Epoch 30/300

Epoch 00030: val_loss improved from 0.85678 to 0.83786, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_1

Epoch 1/300

Epoch 00001: val_loss improved from inf to 3.76224, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 2/300

Epoch 00002: val_loss improved from 3.76224 to 3.56314, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 3/300

Epoch 00003: val_loss did not improve
Epoch 4/300

Epoch 00004: val_loss improved from 3.56314 to 3.40319, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 5/300

Epoch 00005: val_loss improved from 3.40319 to 2.26351, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 6/300

Epoch 00006: val_loss improved from 2.26351 to 1.60646, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 7/300

Epoch 00007: val_loss did not improve
Epoch 8/300

Epoch 00008: val_loss improved from 1.60646 to 1.49183, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/30


Epoch 00039: val_loss did not improve
Epoch 40/300

Epoch 00040: val_loss did not improve
Epoch 41/300

Epoch 00041: val_loss did not improve
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss improved from 0.73471 to 0.73406, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 46/300

Epoch 00046: val_loss improved from 0.73406 to 0.73084, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 47/300

Epoch 00047: val_loss improved from 0.73084 to 0.72868, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss improved from 0.72868 to 0.72809, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_2.h5
Epoch 51/300

Epoch 00051: val_lo

Epoch 1/300

Epoch 00001: val_loss improved from inf to 7.04285, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 2/300

Epoch 00002: val_loss improved from 7.04285 to 3.77731, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 3/300

Epoch 00003: val_loss improved from 3.77731 to 2.61033, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 4/300

Epoch 00004: val_loss improved from 2.61033 to 2.56431, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss improved from 2.56431 to 1.97523, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 7/300

Epoch 00007: val_loss improved from 1.97523 to 1.95057, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.95057 to 1.59201, saving model to checkpoints_log_mel_sp_44000


Epoch 00039: val_loss improved from 0.72883 to 0.71870, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 40/300

Epoch 00040: val_loss did not improve
Epoch 41/300

Epoch 00041: val_loss improved from 0.71870 to 0.71585, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss did not improve
Epoch 46/300

Epoch 00046: val_loss did not improve
Epoch 47/300

Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss improved from 0.71585 to 0.71399, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 49/300

Epoch 00049: val_loss improved from 0.71399 to 0.70938, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_3.h5
Epoch 50/300

Epoch 00050: val_loss improved from 0.70938 to 0.70514, saving model to

Epoch 1/300

Epoch 00001: val_loss improved from inf to 7.23132, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_4.h5
Epoch 2/300

Epoch 00002: val_loss improved from 7.23132 to 4.58552, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_4.h5
Epoch 3/300

Epoch 00003: val_loss did not improve
Epoch 4/300

Epoch 00004: val_loss improved from 4.58552 to 2.82824, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_4.h5
Epoch 5/300

Epoch 00005: val_loss improved from 2.82824 to 2.71271, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_4.h5
Epoch 6/300

Epoch 00006: val_loss improved from 2.71271 to 2.08233, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_4.h5
Epoch 7/300

Epoch 00007: val_loss did not improve
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss did not improve
Epoch 11/300

Epoch 00011: val_loss improved fro

Epoch 1/300

Epoch 00001: val_loss improved from inf to 5.84551, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 2/300

Epoch 00002: val_loss improved from 5.84551 to 5.16449, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 3/300

Epoch 00003: val_loss improved from 5.16449 to 4.49520, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 4/300

Epoch 00004: val_loss improved from 4.49520 to 3.12423, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 5/300

Epoch 00005: val_loss improved from 3.12423 to 2.22730, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss did not improve
Epoch 8/300

Epoch 00008: val_loss improved from 2.22730 to 2.20445, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_5.h5
Epoch 9/300

Epoch 00009: val_loss improved from 2.20445 to 1.


Epoch 00039: val_loss did not improve
Epoch 40/300

Epoch 00040: val_loss did not improve
Epoch 41/300

Epoch 00041: val_loss did not improve
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
train shape:  (8528, 256, 431, 1) (8528, 41)
val shape:  (945, 256, 431, 1) (945, 41)
Fold:  6
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_55 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_55 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_55 (Activation)   (None, 256, 431, 32)      0         
________________________________________________________________


Epoch 00060: val_loss did not improve
Epoch 61/300

Epoch 00061: val_loss did not improve
Epoch 62/300

Epoch 00062: val_loss did not improve
Epoch 63/300

Epoch 00063: val_loss did not improve
Epoch 64/300

Epoch 00064: val_loss did not improve
Epoch 65/300

Epoch 00065: val_loss did not improve
Epoch 66/300

Epoch 00066: val_loss did not improve
Epoch 67/300

Epoch 00067: val_loss did not improve
Epoch 68/300

Epoch 00068: val_loss did not improve
Epoch 69/300

Epoch 00069: val_loss did not improve
train shape:  (8530, 256, 431, 1) (8530, 41)
val shape:  (943, 256, 431, 1) (943, 41)
Fold:  7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
ba


Epoch 00012: val_loss did not improve
Epoch 13/300

Epoch 00013: val_loss improved from 1.42418 to 1.19956, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_7.h5
Epoch 14/300

Epoch 00014: val_loss improved from 1.19956 to 1.11226, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_7.h5
Epoch 15/300

Epoch 00015: val_loss did not improve
Epoch 16/300

Epoch 00016: val_loss did not improve
Epoch 17/300

Epoch 00017: val_loss did not improve
Epoch 18/300

Epoch 00018: val_loss did not improve
Epoch 19/300

Epoch 00019: val_loss did not improve
Epoch 20/300

Epoch 00020: val_loss improved from 1.11226 to 1.08255, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_7.h5
Epoch 21/300

Epoch 00021: val_loss did not improve
Epoch 22/300

Epoch 00022: val_loss did not improve
Epoch 23/300

Epoch 00023: val_loss improved from 1.08255 to 1.07312, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_7.h5
Epoch 24/300

Epoch 00024: val_lo

Epoch 1/300

Epoch 00001: val_loss improved from inf to 4.58406, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 2/300

Epoch 00002: val_loss improved from 4.58406 to 4.14242, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 3/300

Epoch 00003: val_loss improved from 4.14242 to 3.13627, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 4/300

Epoch 00004: val_loss improved from 3.13627 to 2.10118, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 5/300

Epoch 00005: val_loss improved from 2.10118 to 2.05771, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss improved from 2.05771 to 1.70933, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.70933 to 1.47688, saving model to checkpoints_log_mel_sp_44000


Epoch 00040: val_loss improved from 0.80124 to 0.80057, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 41/300

Epoch 00041: val_loss improved from 0.80057 to 0.79503, saving model to checkpoints_log_mel_sp_44000x5_image_aug_10f/best_8.h5
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss did not improve
Epoch 46/300

Epoch 00046: val_loss did not improve
Epoch 47/300

Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss did not improve
Epoch 51/300

Epoch 00051: val_loss did not improve
train shape:  (8537, 256, 431, 1) (8537, 41)
val shape:  (936, 256, 431, 1) (936, 41)
Fold:  9
_________________________________________________________________
Layer (type)                 Output Shape         

In [None]:
%%time

# log mel sp 44100x5 image_aug + class_weights
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=7, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5_image_aug_10f"
CHECKPOINT_FOLDER = 'checkpoints_log_mel_sp_44000x5_image_aug_10f'

if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 142

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):

    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
    parallel_model = multi_gpu_model(model, gpus=2)
    
#     model.load_weights('checkpoint_1d_24000x5' + '/best_99.h5')
    
    adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-7, amsgrad=True)
    parallel_model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                         metrics=['acc'])

#     sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#     parallel_model.compile(loss='categorical_crossentropy',
#                   optimizer=sgd,
#                   metrics=['acc'])

    checkpoint = ParallelModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=15)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks = [checkpoint, early, tb, rlrop, log]
    
    
    datagen = ImageDataGenerator(
    featurewise_center=True,  # set input mean to 0 over the dataset
    featurewise_std_normalization=True,  # divide inputs by std of the dataset
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
    preprocessing_function=get_random_eraser(v_l=np.min(X), v_h=np.max(X)) # RANDOM ERASER
    )
    
    datagen.fit(X)
    training_generator = MixupGenerator(X, y, batch_size=batch_size, alpha=0.5, datagen=datagen)()
    
    parallel_model.fit_generator(generator=training_generator,
                        steps_per_epoch=X.shape[0] // batch_size,
                        validation_data=(X_val, y_val),
                        epochs=config.max_epochs, verbose=1,
                        callbacks=callbacks)
    
    
# Fine tune
#     model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5' % i)
#     K.set_value(model.optimizer.lr, 0.00001)
#     parallel_model.fit_generator(mixupgen(),
#                         steps_per_epoch=X_train.shape[0] // batch_size,
#                         epochs=10,
#                         validation_data=test_datagen.flow(X_val, y_val), callbacks=None)
#     parallel_model.save_weights(CHECKPOINT_FOLDER + '/besh_%d.h5' % i)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    parallel_model = multi_gpu_model(model, gpus=2)

    
    # Save train predictions
    predictions = parallel_model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = parallel_model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)









In [22]:
def get_model(config):
    nclass = config.n_classes
    
    inp = Input(shape=(config.dim[0], config.dim[1], 1))
    x = Convolution2D(32, (3,3), padding="same")(inp)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(32, (3,3), padding="same")(inp)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(64, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(64, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(128, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(128, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(128, (1,1), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)
    
    x = Convolution2D(256, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(256, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(256, (1,1), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D(strides=(2, 2))(x)

    x = Convolution2D(512, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(512, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Convolution2D(512, (1,1), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
#     x = MaxPool2D(strides=(2, 2))(x)
    

#     x = GlobalAveragePooling2D()(x)
    x = GlobalMaxPooling2D()(x)
    x = Dropout(0.5)(x)
    x = Dense(128)(x)
    x = Dropout(0.5)(x)
    x = Dense(128)(x)
    x = BatchNormalization()(x)
#     x = Dropout(0.5)(x)
#     x = Activation("relu")(x)
    out = Dense(nclass, activation='softmax')(x)

    model = Model(inputs=inp, outputs=out)
    model.summary()
    print(len(model.layers))
    print(config.dim)
    return model

In [16]:
%%time

# log mel sp 44100x5 image_aug_m2
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=7, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5_image_aug_m2"
CHECKPOINT_FOLDER = 'checkpoints_log_mel_sp_44000x5_image_aug_m2'

if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 40

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = np.r_[X_train[train_split], X_train_mixup[train_split]]
#     y = np.r_[y_train[train_split], y_train_mixup[train_split]]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):

    X = X_train[train_split]
    y = y_train[train_split]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
#     parallel_model = multi_gpu_model(model, gpus=2)
    
#     model.load_weights('checkpoint_1d_24000x5' + '/best_99.h5')
    
    adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-7, amsgrad=True)
    model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                         metrics=['acc'])

#     sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#     parallel_model.compile(loss='categorical_crossentropy',
#                   optimizer=sgd,
#                   metrics=['acc'])

    checkpoint = ParallelModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=15)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks = [checkpoint, early, tb, rlrop, log]
    
    
    datagen = ImageDataGenerator(
    featurewise_center=True,  # set input mean to 0 over the dataset
    featurewise_std_normalization=True,  # divide inputs by std of the dataset
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
    preprocessing_function=get_random_eraser(v_l=np.min(X), v_h=np.max(X)) # RANDOM ERASER
    )
    
    datagen.fit(X)
    training_generator = MixupGenerator(X, y, batch_size=batch_size, alpha=0.5, datagen=datagen)()
    
    model.fit_generator(generator=training_generator,
                        steps_per_epoch=X.shape[0] // batch_size,
                        validation_data=(X_val, y_val),
                        epochs=config.max_epochs, verbose=1,
                        callbacks=callbacks)
    
    
# Fine tune
#     model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5' % i)
#     K.set_value(model.optimizer.lr, 0.00001)
#     parallel_model.fit_generator(mixupgen(),
#                         steps_per_epoch=X_train.shape[0] // batch_size,
#                         epochs=10,
#                         validation_data=test_datagen.flow(X_val, y_val), callbacks=None)
#     parallel_model.save_weights(CHECKPOINT_FOLDER + '/besh_%d.h5' % i)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
#     parallel_model = multi_gpu_model(model, gpus=2)

    
    # Save train predictions
    predictions = model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)









train shape:  (8108, 256, 431, 1) (8108, 41)
val shape:  (1365, 256, 431, 1) (1365, 41)
Fold:  0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_86 (Conv2D)           (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_86 (Batc (None, 256, 431, 32)      128       
_________________________________________________________________
activation_86 (Activation)   (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_71 (MaxPooling (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_87 (Conv2D)           (None, 128, 215, 64)      18496     
_____________________________________________

Epoch 1/300

Epoch 00001: val_loss improved from inf to 7.33205, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_1.h5
Epoch 2/300

Epoch 00002: val_loss improved from 7.33205 to 2.43428, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_1.h5
Epoch 3/300

Epoch 00003: val_loss did not improve
Epoch 4/300

Epoch 00004: val_loss improved from 2.43428 to 2.03855, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_1.h5
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss improved from 2.03855 to 1.92870, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_1.h5
Epoch 7/300

Epoch 00007: val_loss did not improve
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss did not improve
Epoch 11/300

Epoch 00011: val_loss improved from 1.92870 to 1.32345, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_1.h5
E


Epoch 00004: val_loss improved from 3.08419 to 2.42817, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_2.h5
Epoch 5/300

Epoch 00005: val_loss improved from 2.42817 to 1.83870, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_2.h5
Epoch 6/300

Epoch 00006: val_loss improved from 1.83870 to 1.79552, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_2.h5
Epoch 7/300

Epoch 00007: val_loss improved from 1.79552 to 1.76521, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_2.h5
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss did not improve
Epoch 11/300

Epoch 00011: val_loss did not improve
Epoch 12/300

Epoch 00012: val_loss improved from 1.76521 to 1.47708, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_2.h5
Epoch 13/300

Epoch 00013: val_loss improved from 1.47708 to 1.32525, saving model to checkpoints_log_mel_sp_4


Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss did not improve
Epoch 46/300

Epoch 00046: val_loss did not improve
Epoch 47/300

Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss did not improve
Epoch 51/300

Epoch 00051: val_loss did not improve
Epoch 52/300

Epoch 00052: val_loss did not improve
Epoch 53/300

Epoch 00053: val_loss did not improve
Epoch 54/300

Epoch 00054: val_loss did not improve
Epoch 55/300

Epoch 00055: val_loss did not improve
train shape:  (8115, 256, 431, 1) (8115, 41)
val shape:  (1358, 256, 431, 1) (1358, 41)
Fold:  3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_18 (InputLayer)        (None, 256, 431, 1)       0


Epoch 00004: val_loss improved from 2.99879 to 2.25631, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_3.h5
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss improved from 2.25631 to 1.86334, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_3.h5
Epoch 7/300

Epoch 00007: val_loss did not improve
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss improved from 1.86334 to 1.66361, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_3.h5
Epoch 10/300

Epoch 00010: val_loss improved from 1.66361 to 1.59046, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_3.h5
Epoch 11/300

Epoch 00011: val_loss did not improve
Epoch 12/300

Epoch 00012: val_loss did not improve
Epoch 13/300

Epoch 00013: val_loss did not improve
Epoch 14/300

Epoch 00014: val_loss improved from 1.59046 to 1.53969, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_3.h5
Epoch 


Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss improved from 1.95076 to 1.41707, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_4.h5
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss improved from 1.41707 to 1.32927, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_4.h5
Epoch 11/300

Epoch 00011: val_loss did not improve
Epoch 12/300

Epoch 00012: val_loss did not improve
Epoch 13/300

Epoch 00013: val_loss did not improve
Epoch 14/300

Epoch 00014: val_loss did not improve
Epoch 15/300

Epoch 00015: val_loss did not improve
Epoch 16/300

Epoch 00016: val_loss improved from 1.32927 to 1.13210, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_4.h5
Epoch 17/300

Epoch 00017: val_loss did not improve
Epoch 18/300

Epoch 00018: val_loss did not improve
Epoch 19/300

Epoch 00019

Epoch 1/300

Epoch 00001: val_loss improved from inf to 3.18958, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 2/300

Epoch 00002: val_loss improved from 3.18958 to 2.85997, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 3/300

Epoch 00003: val_loss did not improve
Epoch 4/300

Epoch 00004: val_loss improved from 2.85997 to 2.16405, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss improved from 2.16405 to 1.96202, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 7/300

Epoch 00007: val_loss improved from 1.96202 to 1.81781, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 8/300

Epoch 00008: val_loss improved from 1.81781 to 1.46862, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_5.h5
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epo


Epoch 00039: val_loss did not improve
Epoch 40/300

Epoch 00040: val_loss did not improve
Epoch 41/300

Epoch 00041: val_loss did not improve
Epoch 42/300

Epoch 00042: val_loss did not improve
Epoch 43/300

Epoch 00043: val_loss did not improve
Epoch 44/300

Epoch 00044: val_loss did not improve
Epoch 45/300

Epoch 00045: val_loss did not improve
Epoch 46/300

Epoch 00046: val_loss did not improve
Epoch 47/300

Epoch 00047: val_loss did not improve
Epoch 48/300

Epoch 00048: val_loss did not improve
Epoch 49/300

Epoch 00049: val_loss did not improve
Epoch 50/300

Epoch 00050: val_loss did not improve
Epoch 51/300

Epoch 00051: val_loss did not improve
Epoch 52/300

Epoch 00052: val_loss did not improve
Epoch 53/300

Epoch 00053: val_loss did not improve
train shape:  (8146, 256, 431, 1) (8146, 41)
val shape:  (1327, 256, 431, 1) (1327, 41)
Fold:  6
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inpu


Epoch 00002: val_loss improved from 4.08391 to 3.08936, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_6.h5
Epoch 3/300

Epoch 00003: val_loss improved from 3.08936 to 2.19133, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_6.h5
Epoch 4/300

Epoch 00004: val_loss did not improve
Epoch 5/300

Epoch 00005: val_loss did not improve
Epoch 6/300

Epoch 00006: val_loss did not improve
Epoch 7/300

Epoch 00007: val_loss improved from 2.19133 to 1.58421, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_6.h5
Epoch 8/300

Epoch 00008: val_loss did not improve
Epoch 9/300

Epoch 00009: val_loss did not improve
Epoch 10/300

Epoch 00010: val_loss improved from 1.58421 to 1.57772, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_6.h5
Epoch 11/300

Epoch 00011: val_loss improved from 1.57772 to 1.44151, saving model to checkpoints_log_mel_sp_44000x5_image_aug_m2/best_6.h5
Epoch 12/300

Epoch 00012: val_loss did not improve
Epoch 13

In [25]:
%%time
# log_mel_sp_44100x5 
config = Config(sampling_rate=44100, audio_duration=5, n_classes=41, use_log_mel_sp=True, n_folds=5, max_epochs=300, n_mfcc=128*2)
PREDICTION_FOLDER = "predictions_log_mel_sp_44000x5_mixup_2x"
CHECKPOINT_FOLDER = 'checkpionts_log_mel_sp_44000x5_mixup_2x'



if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if not os.path.exists(CHECKPOINT_FOLDER):
    os.mkdir(CHECKPOINT_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

batch_size = 64

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True, random_state=2)


for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
    X = np.r_[X_train[train_split], X_train_mixup[train_split]]
    y = np.r_[y_train[train_split], y_train_mixup[train_split]]
    X_val = X_train[val_split]
    y_val = y_train[val_split]
    
# for i, (train_split, val_split) in enumerate(skf.split(X_train, np.argmax(y_train, axis=1))):
#     X = X_train[train_split]
#     y = y_train[train_split]
#     X_val = X_train[val_split]
#     y_val = y_train[val_split]
    
    print('train shape: ', X.shape, y.shape)
    print('val shape: ', X_val.shape, y_val.shape)

    print("Fold: ", i)
    
    model = get_model(config)
#     parallel_model = multi_gpu_model(model, gpus=2)
    
#     model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    
    adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6, amsgrad=True)
    model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                         metrics=['acc'])
#     sgd = SGD(lr=0.00001, momentum=0.9, nesterov=True)
#     model.compile(loss='categorical_crossentropy',
#                   optimizer=sgd,
#                   metrics=['accuracy'])

    #     checkpoint = ModelCheckpoint(CHECKPOINT_FOLDER+'/best_%d.h5'%i, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)
    checkpoint = CustomModelCheckpoint(model, CHECKPOINT_FOLDER + '/best_%d.h5'%i)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=15)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
    log = CSVLogger(PREDICTION_FOLDER + '/log_%d.csv'%i)
    callbacks_list = [checkpoint, early, tb, rlrop, log]
    
    history = model.fit(X, y, validation_data=(X_val, y_val), callbacks=callbacks_list, 
                        batch_size=batch_size, epochs=config.max_epochs, shuffle=True)
    

    model.load_weights(CHECKPOINT_FOLDER + '/best_%d.h5'%i)
#     parallel_model = multi_gpu_model(model, gpus=2)


    
    # Save train predictions
    predictions = model.predict(X_train, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

    # Save test predictions
    predictions = model.predict(X_test, batch_size=batch_size, verbose=1)
    np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

    # Make a submission file
    top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
    predicted_labels = [' '.join(list(x)) for x in top_3]
    test['label'] = predicted_labels
    test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)







train shape:  (15144, 256, 431, 1) (15144, 41)
val shape:  (1901, 256, 431, 1) (1901, 41)
Fold:  0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_180 (Conv2D)          (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization_188 (Bat (None, 256, 431, 32)      128       
_________________________________________________________________
activation_180 (Activation)  (None, 256, 431, 32)      0         
_________________________________________________________________
max_pooling2d_106 (MaxPoolin (None, 128, 215, 32)      0         
_________________________________________________________________
conv2d_181 (Conv2D)          (None, 128, 215, 32)      9248      
___________________________________________

Epoch 7/300

Validation loss decreased from 1.5492449226319098 to 1.3216732721559867, saving model
Epoch 8/300

Validation loss decreased from 1.3216732721559867 to 1.2418281610572302, saving model
Epoch 9/300

Validation loss decreased from 1.2418281610572302 to 1.165733650311027, saving model
Epoch 10/300

Validation loss decreased from 1.165733650311027 to 1.0781811972563837, saving model
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300

Validation loss decreased from 1.0781811972563837 to 0.8926779144179582, saving model
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300

Validation loss decreased from 0.8926779144179582 to 0.6165361692078875, saving model
Epoch 24/300

Validation loss decreased from 0.6165361692078875 to 0.6141870581437966, saving model
Epoch 25/300

Validation loss decreased from 0.6141870581437966 to 0.6123100093971188, saving model
Epoch 26/300
Epoch 27/300

Validation loss decreased from 0.61

Train on 15156 samples, validate on 1895 samples
Epoch 1/300

Validation loss decreased from inf to 2.9440285369399986, saving model
Epoch 2/300

Validation loss decreased from 2.9440285369399986 to 2.2410008869573748, saving model
Epoch 3/300

Validation loss decreased from 2.2410008869573748 to 2.1552185105145134, saving model
Epoch 4/300

Validation loss decreased from 2.1552185105145134 to 1.8199748279551402, saving model
Epoch 5/300

Validation loss decreased from 1.8199748279551402 to 1.3131829325945208, saving model
Epoch 6/300
Epoch 7/300

Validation loss decreased from 1.3131829325945208 to 1.2693160547115558, saving model
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300

Validation loss decreased from 1.2693160547115558 to 0.9642103831812071, saving model
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300

Validation loss decreased from 0.9642103831812071 to 0.5868891879719605, saving model
Epoch 19/300

Validation loss decreased from 


Validation loss decreased from 0.8790385199802235 to 0.5486323537488998, saving model
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300

Validation loss decreased from 0.5486323537488998 to 0.5479361142075635, saving model
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
train shape:  (15174, 256, 431, 1) (15174, 41)
val shape:  (1886, 256, 431, 1) (1886, 41)
Fold:  4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_29 (InputLayer)        (None, 256, 431, 1)       0         
_________________________________________________________________
conv2d_204 (Conv2D)          (None, 256, 431, 32)      320       
_________________________________________________________________
batch_normalization