In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
import glob
uav_path = 'C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/JUNE_01_PHANTOMS/*.wav'
none_path = 'C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/JUNE_02_BACKGROUND/background_0*.wav'
uav_files = glob.glob(uav_path)
none_files = glob.glob(none_path)
#print(uav_files)
#print(none_files)

In [3]:
CHUNK_SIZE = 8192
SR = 44100
N_MFCC = 16

In [4]:
def load(files, sr=44100):
    [raw, sr] = librosa.load(files[0], sr=sr)
    for f in files[1:]:
        [array, sr] = librosa.load(f, sr=sr)
        raw = np.hstack((raw, array))
    print(raw.shape)
    return raw

In [5]:
uav_raw = load(uav_files)
none_raw = load(none_files)

(9732096,)
(18579456,)


# Data preprocessing

Chose `mfcc4` among functions below:
- input size 16x16; `n_mfcc=16`, used first 16 data points from 16x17 mfcc of a chunk with size 8192
- chunk siez 8192, overlapped half of it

In [6]:
def mfcc4(raw, label, chunk_size=8192, window_size=4096, sr=44100, n_mfcc=16, n_frame=16):
    mfcc = np.empty((0, n_mfcc, n_frame))
    y = []
    print(raw.shape)
    for i in range(0, len(raw), chunk_size//2):
        mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc) #n_mfcc,17
        if mfcc_slice.shape[1] < 17:
            print("small end:", mfcc_slice.shape)
            continue
        mfcc_slice = mfcc_slice[:,:-1]
        mfcc_slice = mfcc_slice.reshape((1, mfcc_slice.shape[0], mfcc_slice.shape[1]))
        mfcc = np.vstack((mfcc, mfcc_slice))
        y.append(label)
    y = np.array(y)
    return mfcc, y

In [7]:
mfcc_uav, y_uav = mfcc4(uav_raw, 1)
print(mfcc_uav.shape, y_uav.shape)
mfcc_none, y_none = mfcc4(none_raw, 0)
print(mfcc_none.shape, y_none.shape)


(9732096,)
small end: (16, 9)
(2375, 16, 16) (2375,)
(18579456,)
small end: (16, 9)
(4535, 16, 16) (4535,)


In [8]:
print(mfcc_uav.shape, y_uav.shape)
print(mfcc_none.shape, y_none.shape)

(2375, 16, 16) (2375,)
(4535, 16, 16) (4535,)


In [9]:
X = np.concatenate((mfcc_uav, mfcc_none), axis=0)
y = np.hstack((y_uav, y_none))
print(X.shape, y.shape)

(6910, 16, 16) (6910,)


In [10]:
n_labels = y.shape[0]
n_unique_labels = 2
y_encoded = np.zeros((n_labels, n_unique_labels))
y_encoded[np.arange(n_labels), y] = 1
print(y_encoded.shape)

(6910, 2)


In [11]:
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [12]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(5528, 16, 16) (1382, 16, 16)
(5528, 2) (1382, 2)


In [13]:
np.save('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/X_train_chunk_3d', X_train)
np.save('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/X_test_chunk_3d', X_test)
np.save('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/y_train_chunk_3d', y_train)
np.save('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/y_test_chunk_3d', y_test)


In [45]:
X_train = np.load('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/X_train_chunk_3d.npy')
X_test = np.load('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/X_test_chunk_3d.npy')
y_train = np.load('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/y_train_chunk_3d.npy')
y_test = np.load('C:/Users/Jaesung/Desktop/코딩/Python실습/K-SW-PJT/RW_AUDIO_DATA_2018_Update/RW_AUDIO_DATA_2018_Update/Xy/y_test_chunk_3d.npy')


# Tensorflow finally!

## Training

### Parameters
- input data: ?, 16, 16, 1 (batch size, #mfcc, #frame, #channels)
- filter: [3, 3]
- strides: 1(first layer), 2(second layer)

# Experiment 1 - One convolutional layer

#### Experiment 1-1
- learning rate 0.005
- pooling stride 1x1
- #filter 1
- best result among every other settings
- Question: cost increased overall with a little fluctuation during training. (3.16 -> 6.19)

In [34]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.005
training_epochs = 20

In [35]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])


conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)

flat = tf.reshape(dropout1, [-1, 16*16*1])

In [36]:
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
dropout2 = tf.layers.dropout(inputs=dense2, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dropout2, units=2)

In [37]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [38]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [39]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [40]:
batch_size = 1#y_train.shape[0]
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  7.972967008
Epoch: 0002 cost =  4.871424558
Epoch: 0003 cost =  6.534099989
Epoch: 0004 cost =  4.753167588
Epoch: 0005 cost =  7.286356338
Epoch: 0006 cost =  4.296921577
Epoch: 0007 cost =  5.501921589
Epoch: 0008 cost =  4.932081379
Epoch: 0009 cost =  4.968451023
Epoch: 0010 cost =  6.016656325
Epoch: 0011 cost =  6.195507080
Epoch: 0012 cost =  5.400600423
Epoch: 0013 cost =  5.406702785
Epoch: 0014 cost =  5.532029402
Epoch: 0015 cost =  4.839555036
Epoch: 0016 cost =  4.847552114
Epoch: 0017 cost =  4.044352142
Epoch: 0018 cost =  5.532412898
Epoch: 0019 cost =  5.280941114
Epoch: 0020 cost =  4.429576121


In [41]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [42]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.676
Accuracy:  0.6758321273516642
             precision    recall  f1-score   support

          0       0.67      0.97      0.79       885
          1       0.74      0.15      0.25       497

avg / total       0.69      0.68      0.60      1382

[[858  27]
 [421  76]]


#### Experiment 1-2
- learning rate 0.01
- pooling stride 1x1
- #filter 1
- best result among every other settings
- cost kept fluctuated during training. (0.8 -> 1.3) -- why is that?

In [44]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.01
training_epochs = 20

In [45]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])


conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)

flat = tf.reshape(dropout1, [-1, 16*16*1])

In [46]:
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
dropout2 = tf.layers.dropout(inputs=dense2, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dropout2, units=2)

In [47]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [48]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [49]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [50]:
batch_size = 1#y_train.shape[0]
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  50.438877749
Epoch: 0002 cost =  62.978497867
Epoch: 0003 cost =  50.151912144
Epoch: 0004 cost =  41.537279006
Epoch: 0005 cost =  33.564248436
Epoch: 0006 cost =  23.532914839
Epoch: 0007 cost =  21.471797288
Epoch: 0008 cost =  48.142434775
Epoch: 0009 cost =  7.857867058
Epoch: 0010 cost =  0.656436613
Epoch: 0011 cost =  0.643210001
Epoch: 0012 cost =  0.643210009
Epoch: 0013 cost =  0.643210009
Epoch: 0014 cost =  0.643210009
Epoch: 0015 cost =  0.643210009
Epoch: 0016 cost =  0.643210009
Epoch: 0017 cost =  0.643210009
Epoch: 0018 cost =  0.643210009
Epoch: 0019 cost =  0.643210009
Epoch: 0020 cost =  0.643210009


In [51]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [52]:
# learning_rate 0.01, pooling stride 1x1, dropout, cost 0.8 --> 1.3 ....why?
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.641
Accuracy:  0.6410998552821997
             precision    recall  f1-score   support

          0       0.64      1.00      0.78       885
          1       1.00      0.00      0.00       497

avg / total       0.77      0.64      0.50      1382

[[885   0]
 [496   1]]


#### Experiment 1-3
- pooling stride 2x2
- #filter 1
- used code below for the graph
- learnig rate 0.005

In [25]:
n_filters = 1 
learning_rate = 0.005
training_epochs = 10

In [17]:
#tf.reset_default_graph()

#X = tf.placeholder(tf.float32, shape=[None,n_mfcc,n_frame,n_channels])
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

W1 = tf.Variable(tf.random_normal([kernel_size, kernel_size, n_channels, n_filters], stddev=0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1,1,1,1], padding=pad)
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1,2,2,1], strides=[1,1,1,1], padding=pad)
L1 = tf.reshape(L1, [-1, 16*16*1])

In [18]:
# fully connected layer
W2 = tf.get_variable("W2", shape=[16*16*1,n_classes])
initializer = tf.contrib.layers.xavier_initializer()

In [19]:
b = tf.Variable(tf.random_normal([n_classes]))
hypothesis = tf.matmul(L1, W2) + b

In [20]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [21]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [23]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [24]:
# learning rate 0.005 epoch 20
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: X_test2, Y: y_test}))

Accuracy: 0.64037627


In [26]:
# epoch 10
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: X_test2, Y: y_test}))

Accuracy: 0.64037627


# Experiment 2 - Two convolutional layers

#### Experiment 2-1
- learning rate 0.01
- pooling stride 1x1
- #filter 5 for each conv layer
- detects everything as no uav

In [27]:
tf.reset_default_graph()

In [28]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.01
training_epochs = 20

In [29]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

In [30]:
conv1 = tf.layers.conv2d(inputs=X, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)


conv2 = tf.layers.conv2d(inputs=dropout1, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=2)
dropout2 = tf.layers.dropout(inputs=pool2, rate=0.7, training=True)


In [31]:
print(dropout2)

Tensor("dropout_1/dropout/mul:0", shape=(?, 8, 8, 5), dtype=float32)


In [32]:
flat = tf.reshape(dropout2, [-1,  8*8*5])
dense3 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
dropout3 = tf.layers.dropout(inputs=dense3, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dropout3, units=2)

In [33]:
print(logits)

Tensor("dense_1/BiasAdd:0", shape=(?, 2), dtype=float32)


In [34]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [35]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [36]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [37]:
batch_size = 1 # no batch set yet!
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  4.692324619
Epoch: 0002 cost =  0.662341942
Epoch: 0003 cost =  0.643209644
Epoch: 0004 cost =  0.644098513
Epoch: 0005 cost =  0.643209403
Epoch: 0006 cost =  0.643210006
Epoch: 0007 cost =  0.643210009
Epoch: 0008 cost =  0.643210009
Epoch: 0009 cost =  0.643210009
Epoch: 0010 cost =  0.643210009
Epoch: 0011 cost =  0.643210009
Epoch: 0012 cost =  0.643210009
Epoch: 0013 cost =  0.643210009
Epoch: 0014 cost =  0.643210009
Epoch: 0015 cost =  0.643210009
Epoch: 0016 cost =  0.643210009
Epoch: 0017 cost =  0.643210009
Epoch: 0018 cost =  0.643210009
Epoch: 0019 cost =  0.643210009
Epoch: 0020 cost =  0.643210009


In [38]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [39]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.64
Accuracy:  0.6403762662807525
             precision    recall  f1-score   support

          0       0.64      1.00      0.78       885
          1       0.00      0.00      0.00       497

avg / total       0.41      0.64      0.50      1382

[[885   0]
 [497   0]]


  'precision', 'predicted', average, warn_for)


#### Experiment 2-2
- learning rate 0.005
- pooling stride 1x1
- #filter 5 for each conv layer
- detects everything as no uav

In [44]:
tf.reset_default_graph()

In [46]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.005
training_epochs = 20

In [47]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

In [48]:
conv1 = tf.layers.conv2d(inputs=X, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)


conv2 = tf.layers.conv2d(inputs=dropout1, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=2)
dropout2 = tf.layers.dropout(inputs=pool2, rate=0.7, training=True)


In [49]:
print(dropout2)

Tensor("dropout_1/dropout/mul:0", shape=(?, 8, 8, 5), dtype=float32)


In [50]:
flat = tf.reshape(dropout2, [-1,  8*8*5])
dense3 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
dropout3 = tf.layers.dropout(inputs=dense3, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dropout3, units=2)

In [51]:
print(logits)

Tensor("dense_1/BiasAdd:0", shape=(?, 2), dtype=float32)


In [52]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [53]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [54]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [55]:
batch_size = 1 # no batch set yet!
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  2.503919052
Epoch: 0002 cost =  0.651520257
Epoch: 0003 cost =  0.642271590
Epoch: 0004 cost =  0.642093603
Epoch: 0005 cost =  0.642083272
Epoch: 0006 cost =  0.642083271
Epoch: 0007 cost =  0.642083271
Epoch: 0008 cost =  0.642083271
Epoch: 0009 cost =  0.642083271
Epoch: 0010 cost =  0.642083271
Epoch: 0011 cost =  0.642083271
Epoch: 0012 cost =  0.642083271
Epoch: 0013 cost =  0.642083271
Epoch: 0014 cost =  0.642083271
Epoch: 0015 cost =  0.642083271
Epoch: 0016 cost =  0.642083271
Epoch: 0017 cost =  0.642083271
Epoch: 0018 cost =  0.642083271
Epoch: 0019 cost =  0.642083271
Epoch: 0020 cost =  0.642083271


In [56]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [57]:
# cost history 5.75->0.64... but low f1 score (overfitting???)
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.64
Accuracy:  0.6403762662807525
             precision    recall  f1-score   support

          0       0.64      1.00      0.78       885
          1       0.00      0.00      0.00       497

avg / total       0.41      0.64      0.50      1382

[[885   0]
 [497   0]]


  'precision', 'predicted', average, warn_for)


#### Experiment 2-3
- learning rate 0.01
- pooling stride 1x1
- #filter 10 for each conv layer
- detects everything as no uav

In [58]:
tf.reset_default_graph()

In [59]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.005
training_epochs = 20

In [60]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

In [64]:
conv1 = tf.layers.conv2d(inputs=X, filters=10, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)


conv2 = tf.layers.conv2d(inputs=dropout1, filters=10, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=2)
dropout2 = tf.layers.dropout(inputs=pool2, rate=0.7, training=True)


In [65]:
print(dropout2)

Tensor("dropout_4/dropout/mul:0", shape=(?, 8, 8, 10), dtype=float32)


In [66]:
flat = tf.reshape(dropout2, [-1,  8*8*10])
dense3 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
dropout3 = tf.layers.dropout(inputs=dense3, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dropout3, units=2)

In [67]:
print(logits)

Tensor("dense_3/BiasAdd:0", shape=(?, 2), dtype=float32)


In [68]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [69]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [70]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [71]:
batch_size = 1 # no batch set yet!
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  3.538358817
Epoch: 0002 cost =  0.689462223
Epoch: 0003 cost =  0.716845682
Epoch: 0004 cost =  0.743745396
Epoch: 0005 cost =  0.821578845
Epoch: 0006 cost =  0.840159060
Epoch: 0007 cost =  0.673141914
Epoch: 0008 cost =  0.712075466
Epoch: 0009 cost =  0.643888027
Epoch: 0010 cost =  0.680887871
Epoch: 0011 cost =  0.784224287
Epoch: 0012 cost =  0.651777114
Epoch: 0013 cost =  0.642012939
Epoch: 0014 cost =  0.642083110
Epoch: 0015 cost =  0.642083272
Epoch: 0016 cost =  0.691910010
Epoch: 0017 cost =  0.642347266
Epoch: 0018 cost =  0.797738761
Epoch: 0019 cost =  0.642007503
Epoch: 0020 cost =  0.641934533


In [72]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [73]:
# learning_rate 0.01, pooling stride 1x1, dropout, filter 10
# 2 layers
# cost 5.75->0.64... but low f1 score (overfitting???)
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.64
Accuracy:  0.6403762662807525
             precision    recall  f1-score   support

          0       0.64      1.00      0.78       885
          1       0.00      0.00      0.00       497

avg / total       0.41      0.64      0.50      1382

[[885   0]
 [497   0]]


  'precision', 'predicted', average, warn_for)


# Experiment 3 - One convolutional layer /w no dropout

#### Experiment 3-1
- learning rate 0.005
- pooling stride 1x1
- #filter 1
- best result among every other settings
- no dropout

In [74]:
tf.reset_default_graph()

In [75]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.005
training_epochs = 20

In [76]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])


conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
#dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=True)

flat = tf.reshape(pool1, [-1, 16*16*1])

In [78]:
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
#dropout2 = tf.layers.dropout(inputs=dense2, rate=0.5, training=True)
logits = tf.layers.dense(inputs=dense2, units=2)

In [79]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [80]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [81]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [82]:
batch_size = 1#y_train.shape[0]
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

Epoch: 0001 cost =  2.803539171
Epoch: 0002 cost =  0.250112894
Epoch: 0003 cost =  0.229849453
Epoch: 0004 cost =  0.254783247
Epoch: 0005 cost =  0.109663360
Epoch: 0006 cost =  0.133262383
Epoch: 0007 cost =  0.141756691
Epoch: 0008 cost =  0.154786206
Epoch: 0009 cost =  0.175715524
Epoch: 0010 cost =  0.169192827
Epoch: 0011 cost =  0.204099491
Epoch: 0012 cost =  0.255600558
Epoch: 0013 cost =  0.185968976
Epoch: 0014 cost =  0.203131254
Epoch: 0015 cost =  0.183626400
Epoch: 0016 cost =  0.133235433
Epoch: 0017 cost =  0.120579422
Epoch: 0018 cost =  0.166078650
Epoch: 0019 cost =  0.121735026
Epoch: 0020 cost =  0.179982836


In [83]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [84]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.967
Accuracy:  0.9667149059334298
             precision    recall  f1-score   support

          0       0.99      0.96      0.97       885
          1       0.93      0.98      0.95       497

avg / total       0.97      0.97      0.97      1382

[[851  34]
 [ 12 485]]


### Experiment 4 - Two convolutional layers with no dropout

#### Experiment 4-1
- learning rate 0.01
- pooling stride 1x1
- #filter 5 for each conv layer
- detects everything as no uav
- no dropout

In [3]:
n_mfcc = 16
n_frame = 16
n_classes = 2
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.01
training_epochs = 20

In [None]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

In [None]:
conv1 = tf.layers.conv2d(inputs=X, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)

conv2 = tf.layers.conv2d(inputs=pool1, filters=5, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=2)

In [None]:
print(pool2)

In [None]:
flat = tf.reshape(pool2, [-1,  8*8*5])
dense3 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense3, units=2)

In [None]:
print(logits)

In [None]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

 batch_size update to 100

In [None]:
batch_size = 100
cost_history = np.empty(shape=[1], dtype=float)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost))

In [None]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [None]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))