In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf
import glob

In [2]:
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import itertools as it

In [3]:
# Declare Variables
n_mfcc = 16
n_frame = 16
n_classes = 3
n_channels = 1
learning_rate = 0.0002  # 0.005

In [4]:
def run_train(session, train_x, train_y):
    print ("\nStart training")
    session.run(init)
    for epoch in range(100):
        total_batch = int(train_x.shape[0] / batch_size)
        for i in range(total_batch):
            batch_x = train_x[i*batch_size:(i+1)*batch_size]
            batch_y = train_y[i*batch_size:(i+1)*batch_size]
            _, c = session.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y})
            if i % 100 == 0:
                print ("Epoch #%d step=%d cost=%f" % (epoch, i, c))

In [5]:
def cross_validate(session, split_size=2):
    results = []
    kf = KFold(n_splits=split_size)
    for train_idx, val_idx in kf.split(X_train2, y_train):
        train_x = X_train2[train_idx]
        train_y = y_train[train_idx]
        val_x = X_train2[val_idx]
        val_y = y_train[val_idx]
        run_train(session, train_x, train_y)
        results.append(session.run(accuracy, feed_dict={X: val_x, Y: val_y}))
    return results

In [6]:
### Data Loading ###
X_train = np.load('./X_train_chunk_3d.npy')
X_test = np.load('./X_test_chunk_3d.npy')
y_train = np.load('./y_train_chunk_3d.npy')
y_test = np.load('./y_test_chunk_3d.npy')

# Convolutional Layer

In [7]:
tf.reset_default_graph()

In [8]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)

conv2 = tf.layers.conv2d(inputs=pool1, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=1)

flat = tf.reshape(pool2, [-1, 16*16*1])

In [9]:
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense2, units=3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [10]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

Y_pred = tf.contrib.layers.fully_connected(logits,n_classes,activation_fn = None)

In [11]:
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(Y_pred, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [12]:
### Save Model ###
model_path = './cnnmodel'
saver = tf.train.Saver()

# Run Model

In [13]:
###########################
batch_size = 128
cost_history = np.empty(shape=[1], dtype=float)

with tf.Session() as session:
    result = cross_validate(session)
    print ("Cross-validation result: %s" % result)
    print ("Test accuracy: %f" % session.run(accuracy, feed_dict={X: X_train2, Y: y_train}))
    saver.save(session, model_path)
    y_pred = session.run(tf.argmax(logits,1),feed_dict={X: X_test2})
    y_true = session.run(tf.argmax(y_test,1))


Start training
Epoch #0 step=0 cost=227.917984
Epoch #0 step=100 cost=5.632903
Epoch #0 step=200 cost=2.819201
Epoch #1 step=0 cost=2.729789
Epoch #1 step=100 cost=1.948253
Epoch #1 step=200 cost=3.874354
Epoch #2 step=0 cost=2.461613
Epoch #2 step=100 cost=5.849786
Epoch #2 step=200 cost=8.458321
Epoch #3 step=0 cost=1.981824
Epoch #3 step=100 cost=2.860695
Epoch #3 step=200 cost=0.937846
Epoch #4 step=0 cost=1.639018
Epoch #4 step=100 cost=2.779765
Epoch #4 step=200 cost=2.384969
Epoch #5 step=0 cost=0.958013
Epoch #5 step=100 cost=2.797732
Epoch #5 step=200 cost=2.165730
Epoch #6 step=0 cost=0.740839
Epoch #6 step=100 cost=3.700287
Epoch #6 step=200 cost=1.602695
Epoch #7 step=0 cost=0.613833
Epoch #7 step=100 cost=1.685846
Epoch #7 step=200 cost=2.001583
Epoch #8 step=0 cost=0.740262
Epoch #8 step=100 cost=1.226041
Epoch #8 step=200 cost=2.164749
Epoch #9 step=0 cost=0.605273
Epoch #9 step=100 cost=0.848731
Epoch #9 step=200 cost=2.668489
Epoch #10 step=0 cost=0.910760
Epoch #10 s

Epoch #84 step=200 cost=0.303381
Epoch #85 step=0 cost=0.280517
Epoch #85 step=100 cost=0.232883
Epoch #85 step=200 cost=0.300915
Epoch #86 step=0 cost=0.275166
Epoch #86 step=100 cost=0.225105
Epoch #86 step=200 cost=0.292218
Epoch #87 step=0 cost=0.266875
Epoch #87 step=100 cost=0.225400
Epoch #87 step=200 cost=0.290846
Epoch #88 step=0 cost=0.258868
Epoch #88 step=100 cost=0.219496
Epoch #88 step=200 cost=0.289988
Epoch #89 step=0 cost=0.253226
Epoch #89 step=100 cost=0.221725
Epoch #89 step=200 cost=0.290564
Epoch #90 step=0 cost=0.245904
Epoch #90 step=100 cost=0.224700
Epoch #90 step=200 cost=0.294036
Epoch #91 step=0 cost=0.242615
Epoch #91 step=100 cost=0.210250
Epoch #91 step=200 cost=0.288530
Epoch #92 step=0 cost=0.233524
Epoch #92 step=100 cost=0.210189
Epoch #92 step=200 cost=0.282794
Epoch #93 step=0 cost=0.228426
Epoch #93 step=100 cost=0.206696
Epoch #93 step=200 cost=0.277467
Epoch #94 step=0 cost=0.222667
Epoch #94 step=100 cost=0.201844
Epoch #94 step=200 cost=0.2782

Epoch #69 step=100 cost=0.101910
Epoch #69 step=200 cost=0.075218
Epoch #70 step=0 cost=0.114218
Epoch #70 step=100 cost=0.106841
Epoch #70 step=200 cost=0.073361
Epoch #71 step=0 cost=0.113656
Epoch #71 step=100 cost=0.095248
Epoch #71 step=200 cost=0.074326
Epoch #72 step=0 cost=0.113254
Epoch #72 step=100 cost=0.097343
Epoch #72 step=200 cost=0.069598
Epoch #73 step=0 cost=0.117598
Epoch #73 step=100 cost=0.097471
Epoch #73 step=200 cost=0.070244
Epoch #74 step=0 cost=0.110975
Epoch #74 step=100 cost=0.095260
Epoch #74 step=200 cost=0.068243
Epoch #75 step=0 cost=0.115169
Epoch #75 step=100 cost=0.094759
Epoch #75 step=200 cost=0.067882
Epoch #76 step=0 cost=0.115198
Epoch #76 step=100 cost=0.091843
Epoch #76 step=200 cost=0.064176
Epoch #77 step=0 cost=0.118123
Epoch #77 step=100 cost=0.092611
Epoch #77 step=200 cost=0.061398
Epoch #78 step=0 cost=0.111917
Epoch #78 step=100 cost=0.083065
Epoch #78 step=200 cost=0.062951
Epoch #79 step=0 cost=0.107726
Epoch #79 step=100 cost=0.0826

# Show  pred result of test set

In [15]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.792
Accuracy:  0.792115114646701
             precision    recall  f1-score   support

          0       0.86      0.87      0.87      3141
          1       0.79      0.75      0.77      2975
          2       0.71      0.74      0.72      2432

avg / total       0.79      0.79      0.79      8548

[[2733  162  246]
 [ 241 2228  506]
 [ 204  418 1810]]
