First set up code to visualize a sound form

In [169]:
%pylab inline
import IPython.display as ipd
import librosa
from librosa import load, display
import glob
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
tqdm.pandas()
import pickle
from common import save_as_pickle
# import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [170]:
import pandas as pd
import numpy as np

In [171]:
# You should change these paths according to the path of the files on your system.
PATH_TO_TRAIN_LABELS = "data/train/train.csv"
PATH_TO_TEST_LABELS = "data/test/test.csv"
PATH_TO_TRAIN_AUDIO_FILES = "data/train/wav/"
PATH_TO_TEST_AUDIO_FILES = "data/test/wav/"
PATH_TO_SUBMISSION = "submission/"
PATH_TO_PICKLE = "pickles/"
SUBMISSION_TITLE = "nn chroma"

In [172]:
# It is easier to deal with csv if you can load it into a structure you can work with.
# Pandas are the most convenient way to do that and are available with 
# inbuilt functionality to handle csv file.

# Pandas assumes that the first row in your file is the header adn not the actual values.
# This behavior can be overriden by passing header=None as a parameter.
train = pd.read_csv(PATH_TO_TRAIN_LABELS)
test = pd.read_csv(PATH_TO_TEST_LABELS)

In [173]:
# You can reactivate this cell to make sure your model is working correctly in terms of dimensions.
#train = train[:2]
#test = test[:2]

In [174]:
train_error_count = 0
train_error_labels = []
test_error_count = 0
test_error_labels = []

In [175]:
# To start with classification, we first need to convert the wav sound files into a format we can work 
# with. It is easier to take the amplitude at each sampling point and use that 
# numeric value to form a feature vector.
def train_parser(row):
    global train_error_count
    global train_error_labels
    path_to_wav_files = PATH_TO_TRAIN_AUDIO_FILES
    file_path = path_to_wav_files + str(row.ID) + ".wav"
    try:
        data, sampling_rate = librosa.load(file_path)
        stft = np.abs(librosa.stft(data))
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sampling_rate).T,axis=0)
    except Exception as ex:
        print(ex)
        train_error_count += 1
        train_error_labels.append(row.ID)
        return [0]*12, row.Class
    features = chroma
    label = row.Class
    return [features, label]

In [176]:
# To create the training feature matrix, we can apply our parser to each training sample.
train_features = train.progress_apply(train_parser,axis=1)
print("%d samples had errors while parsing" % train_error_count)
print("Errorneous samples", train_error_labels)
save_as_pickle(data=train_features,pickle_file=PATH_TO_PICKLE + SUBMISSION_TITLE + " train.pickle" )

HBox(children=(IntProgress(value=0, max=5435), HTML(value='')))




0 samples had errors while parsing
Errorneous samples []


In [177]:
# Renaming the columns to singnify what they mean helps with documentation,
# and also helps you keep track of them later on.
train_features.columns = ['feature','label']
# train_features.head()

In [178]:
# this library helps us convert string labels into easy to handle encoded labels.
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [179]:
X = np.array(train_features.feature.tolist())
Y = np.array(train_features.label.tolist())
lb = LabelEncoder()
# Since labels are categories they dont inherently have an order amongst themselves.
# For example, Apples > oranges does not make any sense. So to madel such categorical 
# variables, we can convert them to one hot vectors.
Y = to_categorical(lb.fit_transform(Y))

In [180]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

In [181]:
number_of_labels = Y.shape[1]
filter_size = 2

In [182]:
model = Sequential()

In [183]:
model.add(Dense(256, input_shape=(12,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

In [184]:
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

In [185]:
model.add(Dense(number_of_labels))
model.add(Activation('softmax'))

In [186]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 256)               3328      
_________________________________________________________________
activation_13 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_14 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                2570      
__________

In [187]:
model.compile(loss='categorical_crossentropy', metrics = ['accuracy'], optimizer='adam')


In [188]:
model.fit(X,Y, batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50


Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50


Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50


Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x18b1cd08f28>

In [189]:
def test_parser(row):
    global test_error_count
    global test_error_labels
    path_to_wav_files = PATH_TO_TEST_AUDIO_FILES
    file_path = path_to_wav_files + str(row.ID) + ".wav"
    try:
        data, sampling_rate = librosa.load(file_path)
        stft = np.abs(librosa.stft(data))
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sampling_rate).T,axis=0)
    except Exception as ex:
        test_error_count += 1
        test_error_labels.append(row.ID)
        return pd.Series([0]*12)
    features = chroma
    return pd.Series(features)

In [190]:
test_features = test.progress_apply(test_parser,axis=1, reduce = True)
print("%d samples had errors while parsing" % test_error_count)
print("Errorneous samples", test_error_labels)
save_as_pickle(data=train_features,pickle_file=PATH_TO_PICKLE + SUBMISSION_TITLE + " test.pickle" )

HBox(children=(IntProgress(value=0, max=3297), HTML(value='')))




0 samples had errors while parsing
Errorneous samples []


In [191]:
X_test = test_features
# X_test

In [192]:
test_labels = model.predict(X_test, batch_size=32)

In [193]:
test_labels_strings = lb.inverse_transform(test_labels.argmax(axis=1))
# test_labels_strings

  if diff:


In [194]:
test['Class'] = test_labels_strings

In [195]:
test.to_csv(PATH_TO_SUBMISSION + SUBMISSION_TITLE + ".csv",index=None)

This approach gives 56% accuracy with the above setup.