In [None]:
import os
import numpy as np
np.random.seed(1969)
import tensorflow as tf
tf.set_random_seed(1969)

from scipy import signal
import glob
import re
import pandas as pd
import gc
from scipy.io import wavfile

from keras import optimizers, losses, activations, models
from keras.callbacks import TensorBoard
from keras.models import Sequential, Model
from keras.layers import GRU, Convolution2D, Dense, Input, Flatten, Dropout, MaxPooling2D, BatchNormalization, Conv3D, ConvLSTM2D, concatenate, merge, ZeroPadding2D
from keras.layers.convolutional import Conv2D
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import multi_gpu_model
import keras.backend as K

from tqdm import tqdm
from sklearn.model_selection import GroupKFold
from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank

import fnmatch
import random

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline

## Load local modules, custom methods

In [None]:
from utilities.utilities import list_wavs_fname, pad_audio, chop_audio, label_transform_audio, label_transform_speech, test_data_generator
from utilities.wav_utilities import read
from DenseNet.custom_layers import *
from DenseNet.DenseNet import DenseNet

In [None]:
def train_on_model(model, epochs_, lr_, decay_, train_x, train_y, val_x, val_y, BATCH_SIZE=256):
    sgd = optimizers.SGD(lr=lr_, decay=decay_, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd,loss='binary_crossentropy',metrics=['categorical_accuracy'])
    model.fit(train_x, train_y, batch_size=BATCH_SIZE, validation_data=(val_x, val_y), epochs=epochs_, shuffle=True, verbose=1, callbacks=[])

### load UrbanSound8K dataset

In [None]:
urban = pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")
classes = urban["class"]

In [None]:
root_path = r'UrbanSound8K'
out_path = r'UrbanSound8K'
model_path = r'.'
train_data_path = os.path.join(r"UrbanSound8K", 'audio')

In [None]:
matches = []   #.wav file paths in UrbanSound8K dataset
for root, dirnames, filenames in os.walk(train_data_path):
    for filename in fnmatch.filter(filenames, '*.wav'):
        matches.append(os.path.join(root, filename))

In [None]:
matches[:5]

In [None]:
"""                                                                                                                                                       
Contributions from:                                                                                                                                       
Luis Andre Dutra e Silva                                                                                                                                  
https://www.kaggle.com/mindcool/lb-0-77-keras-gru-with-filter-banks-features                                                                              
"""

new_sample_rate=16000
y_train = []
x_train = np.zeros((8732,199,26),np.float32)
G = []
ix = 0
for match in tqdm(matches):
    s_ = read(match)
    if s_ == None:
        continue
    sample_rate = s_[0]
    samples = s_[1]
    samples = pad_audio(samples)
    if len(samples) > 16000:
        n_samples = chop_audio(samples)
    else:
        n_samples = [samples]
    ok = 0
    for samples in n_samples:
        filter_banks = logfbank(samples)
        filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
#         print(filter_banks.shape)
        if filter_banks.shape != (199,26):
            continue
        ok = 1
        x_train[ix,:,:] = filter_banks
        break
    if ok == 0:
        continue
    m = re.search('/([^/]+)$', match)
    if m:
        found = m.group(0)
    y_train.append(list(urban.loc[urban['slice_file_name'] == found[1:]]["class"])[0])
    ix += 1

In [None]:
x_train = np.expand_dims(x_train, axis=-1)

y_train = np.array(y_train)
y_train = label_transform_audio(y_train)
y_train = np.array(y_train)

x_train = x_train[:len(y_train)]

In [None]:
index = range(len(x_train))
random.shuffle(index)
num_train = 7000
train = index[:num_train]
val = index[num_train:]

### Train model on UrbanSound8K

In [None]:
model1 = DenseNet(input_shape=(199, 26, 1),classes=10)
parallel_model = multi_gpu_model(model1, gpus=2)

In [None]:
train_on_model(parallel_model, epochs_=70, lr_=0.001, decay_=1e-7, 
               train_x = x_train[train], train_y = y_train[train], val_x=x_train[val], val_y=y_train[val])

In [None]:
train_on_model(parallel_model, epochs_=30, lr_=0.0001, decay_=1e-8, 
               train_x = x_train[train], train_y = y_train[train], val_x=x_train[val], val_y=y_train[val])

### Load Speech Command dataset

In [None]:
root_path = r'.'
out_path = r'.'
model_path = r'.'
train_data_path = os.path.join(r".", 'input', 'train', 'audio')
test_data_path = os.path.join(r".", 'input', 'test', 'audio')

In [None]:
"""                                                                                                                                                       
Contributions from:                                                                                                                                       
Luis Andre Dutra e Silva                                                                                                                                  
https://www.kaggle.com/mindcool/lb-0-77-keras-gru-with-filter-banks-features                                                                              
"""

fpaths = glob.glob(os.path.join(train_data_path, r'*/*' + "wav"))
labels, fnames = list_wavs_fname(fpaths)
new_sample_rate=16000
y_train = []
x_train = np.zeros((64727,99,26),np.float32)
G = []
ix = 0
for label, fname in tqdm(zip(labels, fnames)):
    sample_rate, samples = wavfile.read(os.path.join(train_data_path, label, fname))
    samples = pad_audio(samples)
    if len(samples) > 16000:
        n_samples = chop_audio(samples)
    else:
        n_samples = [samples]
    for samples in n_samples:
        filter_banks = logfbank(samples)
        filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
        x_train[ix,:,:] = filter_banks
    y_train.append(label)
    group = fname.split('_')[0]
    G.append(group)
    ix += 1

In [None]:
x_train = np.expand_dims(x_train, axis=-1)

y_train = np.array(y_train)
y_train = label_transform_speech(y_train)
label_index = y_train.columns.values
y_train = np.array(y_train)

In [None]:
index = range(len(x_train))
random.shuffle(index)
num_train = 58000
train = index[:num_train]
val = index[num_train:]

### Load pretrained model parameters

In [None]:
new_model = DenseNet(input_shape=(99, 26, 1),classes=12)

In [None]:
len(new_model.layers)

In [None]:
for i in tqdm(range(1,len(new_model.layers))):
    if len(model1.layers[i].get_weights()):
        new_model.layers[i].set_weights(model1.layers[i].get_weights())

In [None]:
parallel_model = multi_gpu_model(new_model, gpus=4)
parallel_model.compile(optimizer=sgd,loss='binary_crossentropy',metrics=['categorical_accuracy'])

### Train new model

In [None]:
train_on_model(parallel_model, epochs_=70, lr_=0.002, decay_=1e-7, 
               train_x = x_train[train], train_y = y_train[train], val_x=x_train[val], val_y=y_train[val])

In [None]:
train_on_model(parallel_model, epochs_=30, lr_=0.0002, decay_=1e-8, 
               train_x = x_train[train], train_y = y_train[train], val_x=x_train[val], val_y=y_train[val])

### Kaggle Test

In [None]:
gc.collect()

index = []
results = []
probs = []
for fnames, imgs in tqdm(test_data_generator(batch=32)):
    li = list(imgs.shape)
    li.append(1)
    imgs = imgs.reshape(tuple(li))
    predicts = model1.predict(imgs)
    probs.extend(predicts)
    predicts = np.argmax(predicts, axis=1)
    predicts = [label_index[p] for p in predicts]
    index.extend(fnames)
    results.extend(predicts)

df = pd.DataFrame(columns=['fname', 'label'])
df['fname'] = index
df['label'] = results
df.to_csv(os.path.join(out_path, 'submit.csv'), index=False)