First set up code to visualize a sound form

In [121]:
%pylab inline
import IPython.display as ipd
import librosa
from librosa import load, display
import glob
# import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [122]:
import pandas as pd
import numpy as np

In [123]:
# You should change these paths according to the path of the files on your system.
PATH_TO_TRAIN_LABELS = "data/train/train.csv"
PATH_TO_TEST_LABELS = "data/test/test.csv"
PATH_TO_TRAIN_AUDIO_FILES = "data/train/wav/"
PATH_TO_TEST_AUDIO_FILES = "data/test/wav/"
PATH_TO_SUBMISSION = "submission/"

In [124]:
# It is easier to deal with csv if you can load it into a structure you can work with.
# Pandas are the most convenient way to do that and are available with 
# inbuilt functionality to handle csv file.

# Pandas assumes that the first row in your file is the header adn not the actual values.
# This behavior can be overriden by passing header=None as a parameter.
train = pd.read_csv(PATH_TO_TRAIN_LABELS)
test = pd.read_csv(PATH_TO_TEST_LABELS)

In [125]:
# You can reactivate this cell to make sure your model is working correctly in terms of dimensions.
# train = train[:100]
# test = test[:100]

In [126]:
train_error_count = 0
train_error_labels = []
test_error_count = 0
test_error_labels = []

In [127]:
# To start with classification, we first need to convert the wav sound files into a format we can work 
# with. It is easier to take the amplitude at each sampling point and use that 
# numeric value to form a feature vector.
def train_parser(row):
    global train_error_count
    global train_error_labels
    path_to_wav_files = PATH_TO_TRAIN_AUDIO_FILES
    file_path = path_to_wav_files + str(row.ID) + ".wav"
    try:
        data, sampling_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = np.mean( librosa.feature.mfcc(y = data, sr = sampling_rate, n_mfcc= 40).T, axis=0)
    except Exception as ex:
        train_error_count += 1
        train_error_labels.append(row.ID)
        return [0]*40, row.Class
    features = mfccs
    label = row.Class
    return [features, label]

In [128]:
# To create the training feature matrix, we can apply our parser to each training sample.
train_features = train.apply(train_parser,axis=1)
print("%d samples had errors while parsing" % train_error_count)
print("Errorneous samples", train_error_labels)

1765 samples had errors while parsing
Errorneous samples [6, 17, 18, 19, 32, 33, 35, 42, 49, 61, 66, 72, 79, 80, 82, 87, 91, 99, 101, 105, 113, 114, 117, 134, 135, 142, 149, 152, 162, 165, 167, 168, 182, 184, 186, 196, 212, 217, 223, 229, 235, 236, 242, 251, 258, 268, 288, 295, 301, 305, 310, 311, 313, 324, 326, 332, 333, 334, 335, 337, 343, 356, 358, 361, 365, 366, 368, 378, 383, 395, 400, 407, 410, 411, 425, 426, 438, 445, 447, 452, 453, 454, 462, 473, 474, 475, 477, 480, 484, 486, 500, 501, 512, 513, 519, 524, 525, 528, 537, 538, 542, 543, 544, 555, 556, 557, 578, 585, 594, 604, 607, 611, 612, 614, 615, 620, 621, 622, 623, 630, 631, 633, 639, 642, 655, 656, 662, 663, 664, 688, 691, 692, 697, 699, 702, 704, 705, 719, 720, 724, 727, 728, 735, 738, 746, 749, 750, 751, 752, 753, 754, 766, 770, 776, 796, 798, 799, 801, 807, 809, 811, 820, 825, 826, 838, 853, 854, 858, 859, 865, 867, 869, 870, 876, 883, 894, 896, 912, 913, 916, 919, 922, 923, 925, 929, 933, 952, 955, 956, 972, 991, 1003, 

In [129]:
# Renaming the columns to singnify what they mean helps with documentation,
# and also helps you keep track of them later on.
train_features.columns = ['feature','label']
# train_features.head()

In [130]:
# this library helps us convert string labels into easy to handle encoded labels.
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [131]:
X = np.array(train_features.feature.tolist())
Y = np.array(train_features.label.tolist())
lb = LabelEncoder()
# Since labels are categories they dont inherently have an order amongst themselves.
# For example, Apples > oranges does not make any sense. So to madel such categorical 
# variables, we can convert them to one hot vectors.
Y = to_categorical(lb.fit_transform(Y))

In [132]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

In [133]:
number_of_labels = Y.shape[1]
filter_size = 2

In [134]:
model = Sequential()

In [135]:
model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

In [136]:
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

In [137]:
model.add(Dense(number_of_labels))
model.add(Activation('softmax'))

In [138]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 256)               10496     
_________________________________________________________________
activation_13 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_14 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                2570      
__________

In [139]:
model.compile(loss='categorical_crossentropy', metrics = ['accuracy'], optimizer='adam')


In [140]:
model.fit(X,Y, batch_size=32, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x245bfe0bfd0>

In [141]:
def test_parser(row):
    global test_error_count
    global test_error_labels
    path_to_wav_files = PATH_TO_TEST_AUDIO_FILES
    file_path = path_to_wav_files + str(row.ID) + ".wav"
    try:
        data, sampling_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = np.mean( librosa.feature.mfcc(y = data, sr = sampling_rate, n_mfcc= 40).T, axis=0)
    except Exception as ex:
        test_error_count += 1
        test_error_labels.append(row.ID)
        return [0]*40
    features = mfccs
    return features

In [142]:
test_features = test.apply(test_parser,axis=1)
print("%d samples had errors while parsing" % test_error_count)
print("Errorneous samples", test_error_labels)

1166 samples had errors while parsing
Errorneous samples [5, 14, 21, 23, 28, 31, 34, 39, 58, 69, 73, 85, 94, 106, 107, 110, 111, 122, 124, 133, 144, 145, 147, 150, 151, 156, 158, 166, 194, 195, 198, 207, 210, 220, 225, 231, 241, 256, 262, 265, 273, 279, 281, 283, 294, 297, 320, 325, 340, 341, 362, 384, 389, 390, 393, 398, 405, 415, 424, 433, 439, 448, 460, 465, 466, 476, 478, 481, 487, 494, 499, 502, 505, 510, 514, 517, 526, 529, 531, 536, 546, 550, 553, 563, 577, 581, 582, 591, 601, 616, 617, 632, 637, 638, 640, 650, 672, 674, 675, 680, 707, 708, 715, 725, 756, 759, 763, 767, 769, 775, 777, 779, 788, 789, 795, 800, 806, 808, 818, 822, 836, 837, 873, 895, 924, 936, 939, 940, 944, 947, 949, 962, 969, 984, 994, 997, 999, 1002, 1005, 1012, 1016, 1033, 1037, 1041, 1046, 1057, 1060, 1075, 1094, 1097, 1099, 1101, 1111, 1114, 1122, 1125, 1127, 1128, 1130, 1132, 1135, 1153, 1159, 1161, 1164, 1165, 1166, 1167, 1169, 1170, 1198, 1199, 1201, 1202, 1204, 1207, 1208, 1215, 1223, 1225, 1229, 1234, 1

In [143]:
X_test = np.array(test_features.tolist())
# X_test

In [144]:
test_labels = model.predict(X_test, batch_size=32)

In [145]:
test_labels_strings = lb.inverse_transform(test_labels.argmax(axis=1))
# test_labels_strings

  if diff:


In [146]:
test['Class'] = test_labels_strings

In [147]:
test.to_csv(PATH_TO_SUBMISSION + "sub_nn_1.csv",index=None)

This approach gives 35% accuracy with the above setup.