In [None]:
import numpy
import librosa
import matplotlib.pyplot as plot
from sklearn.model_selection import train_test_split
import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical

downsampling_rate = 10
split_ratio = 0.75
random_state = random.randint(1,101)
count_files = 14
y = numpy.flip(numpy.array([1, 2, 1, 2, 2, 0, 0, 2, 1, 0, 0, 2, 2, 2]))

def wav2mfcc(file_path):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    print('Sample rate: ' + str(sr))    
    print('Sample shape: ' + str(wave.shape))

    wave_downsampled = wave[::downsampling_rate]
    print('Downsample shape: ' + str(wave_downsampled.shape))

    mfcc = librosa.feature.mfcc(wave_downsampled, sr)
    # print('Original mfcc shape: ' + str(mfcc.shape))
    # pad_width = max_pad_len - mfcc.shape[1]
    # print(pad_width)
    # mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    print('Padded mfcc shape: ' + str(mfcc.shape))
    return mfcc

y = y[:count_files,]

mfcc_vectors = []

for i in range(1, count_files + 1):
    filename = 'vid' + str(i) + '.wav'
    mfcc = wav2mfcc(filename)
    mfcc_vectors.append(mfcc)
    
    #plot.subplot(312)
    #mfcc -= (numpy.mean(mfcc,axis=0) + 1e-8)
    #plot.imshow(mfcc.T, cmap=plot.cm.jet, aspect='auto')
    # plot.xticks(numpy.arange(0, (mfcc.T).shape[1], int((mfcc.T).shape[1] / 6)), ['0s', '0.5s', '1s', '1.5s','2.5s','3s','3.5'])
    #ax = plot.gca()
    #ax.invert_yaxis()
    #plot.title('the Normalized MFCC spectrum image for video ' + str(i))
    #plot.show()
    
X = numpy.array(mfcc_vectors)
assert X.shape[0] == y.shape[0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = (1 - split_ratio), random_state = random_state, shuffle = True)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3])))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate = 1 - 0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(rate = 1 - 0.25))
model.add(Dense(3, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.fit(X_train, y_train_hot, batch_size=10, epochs=100, verbose=1, validation_data=(X_test, y_test_hot))