This builds features for a given mode, either conv or time for CNN or RNN respectively, and stores the resulting pickle file. Then it builds the Keras model and stores that as well.

In [1]:
import os
from scipy.io import wavfile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
from python_speech_features import mfcc
import pickle
from keras.callbacks import ModelCheckpoint
from cfg import Config

Using TensorFlow backend.


In [2]:
df = pd.read_csv('instruments.csv')
df.set_index('fname', inplace=True)

for f in df.index:
    rate, signal = wavfile.read('clean/'+f)
    df.at[f, 'length'] = signal.shape[0]/rate

classes = list(np.unique(df.label))
class_dist = df.groupby(['label'])['length'].mean()

n_samples = 2 * int(df['length'].sum()/0.1)
prob_dist = class_dist/class_dist.sum()
choices = np.random.choice(class_dist.index, p=prob_dist)

#fig, ax = plt.subplots()
#ax.set_title('Class Distribution', y=1.08)
#ax.pie(class_dist, labels=class_dist.index, autopct='%1.1f%%',
#       shadow=False, startangle=90)
#ax.axis('equal')
#plt.show()

In [3]:
config = Config(mode='time')

In [4]:
def check_data():
    if os.path.isfile(config.p_path):
        print('loading existing data for {} model'.format(config.mode))
        with open(config.p_path, 'rb') as handle:
            tmp = pickle.load(handle)
            return tmp
    else:
        return None

In [5]:
def build_rand_feat():
    tmp = check_data()
    if tmp:
        return tmp.data[0], tmp.data[1]
    
    X = []
    y = []
    _min, _max = float('inf'), -float('inf')
    for _ in tqdm(range(n_samples)):
        rand_class = np.random.choice(class_dist.index, p=prob_dist)
        file = np.random.choice(df[df.label==rand_class].index)
        rate, wav = wavfile.read('clean/'+file)
        label = df.at[file, 'label']
        if wav.shape[0] < config.step:
            continue
        rand_index = np.random.randint(0, wav.shape[0]-config.step)
        sample = wav[rand_index:rand_index+config.step]
        X_sample = mfcc(sample, rate, numcep=config.nfeat,
                        nfilt=config.nfilt, nfft=config.nfft)
        _min = min(np.amin(X_sample), _min)
        _max = max(np.amax(X_sample), _max)
        X.append(X_sample)
        y.append(classes.index(label))
    config.min = _min
    config.max = _max
    X, y = np.array(X), np.array(y)
    X = (X - _min) / (_max - _min)
    if config.mode == 'conv':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    elif config.mode == 'time':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2])
    y = to_categorical(y, num_classes=10)
    config.data = (X, y)
    
    with open(config.p_path, 'wb') as handle:
        pickle.dump(config, handle)
    return X, y

In [6]:
def get_conv_model():
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu',
                     strides=(1,1), padding='same',
                     input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu',
                     strides=(1,1), padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu',
                     strides=(1,1), padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu',
                     strides=(1,1), padding='same'))
    model.add(MaxPool2D((2, 2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.summary()
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam', metrics=['acc'])
    return model


In [7]:
def get_recurrent_model():
    # shape of RNN is (n, time, feat)
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(64, activation='relu')))
    model.add(TimeDistributed(Dense(32, activation='relu')))
    model.add(TimeDistributed(Dense(16, activation='relu')))
    model.add(TimeDistributed(Dense(8, activation='relu')))
    model.add(Flatten())
    model.add(Dense(10, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam', metrics=['acc'])
    return model

In [8]:
X, y = build_rand_feat()

100%|██████████| 21854/21854 [03:16<00:00, 111.47it/s]


In [9]:
if config.mode == 'conv':
    y_flat = np.argmax(y, axis=1)
    input_shape = (X.shape[1], X.shape[2], 1)
    model = get_conv_model()
elif config.mode == 'time':
    y_flat = np.argmax(y, axis=1)
    input_shape = (X.shape[1], X.shape[2])
    model = get_recurrent_model()

W0707 05:36:16.883210 139995566778176 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0707 05:36:16.915966 139995566778176 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0707 05:36:16.923703 139995566778176 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0707 05:36:17.551080 139995566778176 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0707 05:36:17.560863 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 9, 128)            72704     
_________________________________________________________________
lstm_2 (LSTM)                (None, 9, 128)            131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 9, 128)            0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 9, 64)             8256      
_________________________________________________________________
time_distributed_2 (TimeDist (None, 9, 32)             2080      
_________________________________________________________________
time_distributed_3 (TimeDist (None, 9, 16)             528       
_________________________________________________________________
time_distributed_4 (TimeDist (None, 9, 8)              136       
__________

In [10]:
class_weight = compute_class_weight('balanced', 
                                    np.unique(y_flat),
                                    y_flat)

In [11]:
checkpoint = ModelCheckpoint(config.model_path, monitor='val_acc',
                             verbose=1, mode='max', save_best_only=True,
                             save_weights_only=False, period=1)

In [12]:
model.fit(X, y, epochs=10, batch_size=32, 
          shuffle=True, validation_split=0.1,
         callbacks=[checkpoint])

W0707 05:36:18.076245 139995566778176 deprecation.py:323] From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 19634 samples, validate on 2182 samples
Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.53850, saving model to models/time.model
Epoch 2/10

Epoch 00002: val_acc improved from 0.53850 to 0.71357, saving model to models/time.model
Epoch 3/10

Epoch 00003: val_acc improved from 0.71357 to 0.75435, saving model to models/time.model
Epoch 4/10

Epoch 00004: val_acc improved from 0.75435 to 0.80843, saving model to models/time.model
Epoch 5/10

Epoch 00005: val_acc improved from 0.80843 to 0.83547, saving model to models/time.model
Epoch 6/10

Epoch 00006: val_acc improved from 0.83547 to 0.85564, saving model to models/time.model
Epoch 7/10

Epoch 00007: val_acc improved from 0.85564 to 0.87214, saving model to models/time.model
Epoch 8/10

Epoch 00008: val_acc improved from 0.87214 to 0.89368, saving model to models/time.model
Epoch 9/10

Epoch 00009: val_acc did not improve from 0.89368
Epoch 10/10

Epoch 00010: val_acc improved from 0.89368 to 0.90147, saving model to 

<keras.callbacks.History at 0x7f52f70f8a90>

In [13]:
model.save(config.model_path)