In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf
import glob

In [2]:
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import itertools as it

In [3]:
# initial
uav_path = './Record/unload/*.wav'
loaded_path = './Record/load/*.wav'
none_path = './Record/background/*.wav'

uav_files = glob.glob(uav_path)
loaded_files = glob.glob(loaded_path)
none_files = glob.glob(none_path)

SR = 44100
sample_rate = SR

n_mels = 40
n_frame = 500
window_size=1024
hop_size=512

sequence_length = 50 #layer
n_unique_labels = 3

In [4]:
# initial
def load(files, sr=SR):
    [raw, sr] = librosa.load(files[0], sr=sr)
    for f in files[1:]:
        [array, sr] = librosa.load(f, sr=sr)
        raw = np.hstack((raw, array))
    print(raw.shape)
    return raw

In [5]:
# initial
none_raw = load(none_files)
uav_raw = load(uav_files)
loaded_raw = load(loaded_files)

(49786215,)
(45730224,)
(48921530,)


In [6]:
# Declare Variables
n_mfcc = 16
n_frame = 16
n_classes = 3
n_channels = 1
learning_rate = 0.0002  # 0.005

In [7]:
def run_train(session, train_x, train_y):
    print ("\nStart training")
    session.run(init)
    for epoch in range(10):
        total_batch = int(train_x.shape[0] / batch_size)
        for i in range(total_batch):
            batch_x = train_x[i*batch_size:(i+1)*batch_size]
            batch_y = train_y[i*batch_size:(i+1)*batch_size]
            _, c = session.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y})
            if i % 100 == 0:
                print ("Epoch #%d step=%d cost=%f" % (epoch, i, c))

In [8]:
def cross_validate(session, split_size=5):
    results = []
    kf = KFold(n_splits=split_size)
    for train_idx, val_idx in kf.split(X_train2, y_train):
        train_x = X_train2[train_idx]
        train_y = y_train[train_idx]
        val_x = X_train2[val_idx]
        val_y = y_train[val_idx]
        run_train(session, train_x, train_y)
        y_true = session.run(tf.argmax(val_y,1))
        y_pred = session.run(tf.argmax(logits,1),feed_dict={X: val_x})
        from sklearn.metrics import accuracy_score
        results.append(accuracy_score(y_true, y_pred))
    return results

In [9]:
# initial
none_spec = librosa.feature.melspectrogram(y=none_raw, sr=SR,n_fft=window_size, hop_length=hop_size, power=2.0, n_mels=40)

uav_spec = librosa.feature.melspectrogram(y=uav_raw, sr=SR,n_fft=window_size, hop_length=hop_size, power=2.0, n_mels=40)

load_spec = librosa.feature.melspectrogram(y=loaded_raw, sr=SR,n_fft=window_size, hop_length=hop_size, power=2.0, n_mels=40)

print( load_spec, load_spec.min(), load_spec.max())
load_spec.shape

[[3.02222408e+02 2.24740576e+02 1.35802361e+02 ... 2.96847090e-02
  2.52862511e-03 2.12844749e-02]
 [2.96940132e-02 1.59778620e-02 6.54773627e-03 ... 5.11122128e-04
  3.21802178e-04 3.77853838e-04]
 [2.23049691e-03 3.03739502e-04 3.05152668e-04 ... 1.12306643e-04
  9.22759706e-05 4.38188929e-05]
 ...
 [1.33908831e-09 8.43796267e-09 8.57859966e-09 ... 5.82998506e-07
  5.18313135e-07 5.19150173e-07]
 [1.15008615e-09 5.67973156e-09 6.33179071e-09 ... 5.03313496e-07
  5.04695835e-07 3.76998037e-07]
 [1.01010746e-09 6.24981609e-09 4.83135602e-09 ... 4.57500739e-07
  3.64037747e-07 4.76369383e-07]] 7.873009525194064e-10 401.7224888014053


(40, 95550)

In [10]:
#initail
y_none =np.zeros(none_spec.shape[1], dtype=int)
y_uav = np.ones(uav_spec.shape[1], dtype=int)
y_loaded = np.ones(load_spec.shape[1], dtype=int)*2

print(len(none_spec),len(uav_spec),len(load_spec))
print(y_none.shape, y_none[0])
print(y_uav.shape, y_uav[0])
print(y_loaded.shape, y_loaded[0])

40 40 40
(97239,) 0
(89317,) 1
(95550,) 2


In [11]:
#initial
X_mfcc = np.hstack((none_spec, uav_spec))
X_mfcc = np.hstack((X_mfcc, load_spec))
X_mfcc = X_mfcc.T

y = np.hstack((y_none, y_uav))
y = np.hstack((y, y_loaded))

In [12]:
# initial
n_labels = y.shape[0]
y_encoded = np.zeros((n_labels, n_unique_labels))
y_encoded[np.arange(n_labels), y] = 1

dataX = X_mfcc
dataY = y_encoded

In [13]:
def makeHot(dataX, dataY, sequence_length):
    X_hot_list= []
    Y_hot_tmp = dataY[sequence_length-1:]

    for i in range(0, dataX.shape[0] - sequence_length+1):
        _x = dataX[i:i + sequence_length]
        X_hot_list.append(_x)

    X_hot = np.array(X_hot_list[:])
    Y_hot= Y_hot_tmp.reshape((len(Y_hot_tmp),n_unique_labels))
    return X_hot[:], Y_hot[:]

In [None]:
X_hot, Y_hot = makeHot( dataX, dataY, sequence_length)
#X_hot, Y_hot = dataX, dataY

In [None]:
# initial
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X_hot, Y_hot, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

In [None]:
# initial
np.save('./X_train2', X_train)
np.save('./X_test2', X_test)
np.save('./y_train2', y_train)
np.save('./y_test2', y_test)

In [None]:
### Data Loading ###
X_train = np.load('./X_train2.npy')
X_test = np.load('./X_test2.npy')
y_train = np.load('./y_train2.npy')
y_test = np.load('./y_test2.npy')

# Convolutional Layer

In [None]:
tf.reset_default_graph()

In [None]:
X = tf.placeholder(tf.float32, shape=[None,sequence_length*n_mels*n_channels])
X = tf.reshape(X, [-1, sequence_length, n_mels, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)

conv2 = tf.layers.conv2d(inputs=pool1, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=1)

flat = tf.reshape(pool2, [-1, sequence_length*n_mels*1])

In [None]:
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense2, units=3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [None]:
print(X_train.shape)

In [None]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

Y_pred = tf.contrib.layers.fully_connected(logits,n_classes,activation_fn = None)

In [None]:
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(Y_pred, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
### Save Model ###
model_path = './spec'
saver = tf.train.Saver()

# Run Model

In [None]:
###########################
batch_size = 64
cost_history = np.empty(shape=[1], dtype=float)

with tf.Session() as session:
    result = cross_validate(session)
    print ("Cross-validation result: %s" % result)
    y_true = session.run(tf.argmax(y_train,1))
    y_pred = session.run(tf.argmax(logits,1),feed_dict={X: X_train2})
    from sklearn.metrics import accuracy_score
    print ("Test accuracy: %f" % accuracy_score(y_true, y_pred))
    saver.save(session, model_path)
    y_pred = session.run(tf.argmax(logits,1),feed_dict={X: X_test2})
    y_true = session.run(tf.argmax(y_test,1))

# Show  pred result of test set

In [None]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

### print true graph###
fig = plt.figure(figsize=(15,9))
ax = fig.add_subplot(1,1,1)
ax.plot(np.linspace(0,len(y_pred), len(y_pred)),y_true)

### print pred graph###
fig = plt.figure(figsize=(15,9))
ax = fig.add_subplot(1,1,1)
ax.plot(np.linspace(0,len(y_pred), len(y_pred)),y_pred)

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))