In [1]:
# Common imports
import numpy as np
import pandas as pd
import os, sys, glob  
from os.path import isfile, isdir
import librosa
import librosa.display
import librosa.core as core
import scipy.io.wavfile as wavfile

# To plot pretty figures
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline


import warnings
warnings.filterwarnings("ignore")   # To rid of warnings 

os_sep = os.sep 

if sys.platform == 'win32':   # if windows 
    home = os.path.join('C:', os.sep, 'Users')      
elif sys.platform == "linux" or sys.platform == "linux2" :    
    home = os.path.expanduser("~")   # home = os.getenv("HOME")


In [2]:
wav_repo = os.path.join(home, '채진영', 'Desktop', 'counterUAV', 'after_data')
wav_data = glob.glob(os.path.join(wav_repo,'**','*.wav'), recursive=True)

In [3]:
print(len(wav_data))
wav_data

24


['C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_100023_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_101055_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_101420_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_101635_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_103752_car.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_104216_car.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_1_104446_car.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_2_102248_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_2_102508_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_2_102849_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_3_110458_person.wav',
 'C:\\Users\\채진영\\Desktop\\counterUAV\\after_data\\20181009_3_110901_drone.wav',
 'C:\\Users\\채진영\\Desktop\

In [4]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield int(start), int(start + window_size)
        start += (window_size / 2)

def extract_features(data ,bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    mfccs = []
    labels = []
    sound_clip,s = librosa.load(data)
    label = int(data.split('\\')[-1].split('_')[1])-1
    for (start,end) in windows(sound_clip,window_size):
        if(len(sound_clip[start:end]) == window_size):
            signal = sound_clip[start:end]
            mfcc = librosa.feature.melspectrogram(y=signal, sr=s, n_mels = bands).T.flatten()[:, np.newaxis].T
            mfccs.append(mfcc)
            labels.append(label)         
    features = np.asarray(mfccs).reshape(len(mfccs),frames,bands)
    return np.array(features), np.array(labels,dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = 3
    one_hot_encode = np.zeros((n_labels, 3))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [5]:
n_wav = len(wav_data) 
stacked_features = []
stacked_labels = []

for i in range(n_wav) :
    tr_features, tr_labels = extract_features(wav_data[i])
    tr_labels = one_hot_encode(tr_labels)
    stacked_features.append(tr_features)
    stacked_labels.append(tr_labels)


In [6]:
stacked_features = np.array(stacked_features)
stacked_features.shape

(24,)

In [7]:
stacked_labels = np.array(stacked_labels)
stacked_labels.shape

(24,)

In [8]:
X = stacked_features
y = stacked_labels

In [9]:
X_train = np.concatenate(X[:20], axis=0)
y_train = np.concatenate(y[:20], axis=0)

X_val = np.concatenate(X[20:], axis=0)
y_val = np.concatenate(y[20:], axis=0)


In [10]:
X_train.shape

(1641, 41, 60)

In [11]:
X_val.shape

(797, 41, 60)

keras model 

In [12]:
from tensorflow.keras.layers import TimeDistributed, Bidirectional
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation, Flatten, Reshape, LeakyReLU, ReLU
from tensorflow.keras import layers, models
from tensorflow.keras import regularizers
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras import Sequential

In [13]:
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell

tf.reset_default_graph()

learning_rate = 0.01
batch_size = 50
display_step = 200

# Network Parameters
n_input = 60 
n_steps = 41
n_hidden = 300
n_classes = 3 

In [14]:
#build model


model = Sequential()

model.add(LSTM(units=n_hidden, activation='tanh', input_shape=(n_steps, n_input),return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=True))
model.add(LSTM(units=n_hidden, activation='tanh', return_sequences=False))
model.add(Dropout(0.5))

model.add(Dense(n_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 41, 300)           433200    
_________________________________________________________________
lstm_1 (LSTM)                (None, 41, 300)           721200    
_________________________________________________________________
lstm_2 (LSTM)                (None, 41, 300)           721200    
_________________________________________________________________
lstm_3 (LSTM)                (None, 41, 300)           721200    
_________________________________________________________________
lstm_4 (LSTM)                (None, 41, 300)           721200    
_________________________________________________________________
lstm_5 (LSTM)                (None, 41, 300)           721200    
_________________________________________________________________
lstm_6 (LSTM)                (None, 41, 300)           721200    
__________

In [16]:
#callbacks_list = [keras.callbacks.EarlyStopping(monitor='acc', patience=50),
#                  keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
#                                                       factor=0.1, patience=50)]

#model.fit(X_train, y_train, 
#          batch_size=batch_size,validation_data=(X_val, y_val), 
#          epochs = 100, verbose=1)


Train on 1641 samples, validate on 797 samples
Epoch 1/100

KeyboardInterrupt: 