In [1]:
import numpy as np
import librosa
import tensorflow as tf
import glob

from myAudio import Audio


CHUNK_SIZE = 8192
SR = 44100

batch_size = 2048
num_classes = 20            #분류할 사전의 크기 

learning_rate = 0.01
sequence_length = 16 #9         

output_dim = 3
layers = 3 

model_path = '../models/RNN/my_RNN_model_test'

def mfcc(raw, chunk_size=8192, sr=44100, n_mfcc=num_classes):
    mfcc = np.empty((num_classes, 0))
    for i in range(0, len(raw), chunk_size):
        mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc)
        mfcc = np.hstack((mfcc, mfcc_slice))
    return mfcc
def makeHot(dataX, dataY, sequence_length):
    X_hot_list= []
    Y_hot_tmp = dataY[sequence_length-1:]

    for i in range(0, dataX.shape[0] - sequence_length+1):
        _x = dataX[i:i + sequence_length]
        #if i<10:
            #print(_x, "->", Y_hot_tmp[i])
        X_hot_list.append(_x)

    X_hot = np.array(X_hot_list[:])
    Y_hot= Y_hot_tmp.reshape((len(Y_hot_tmp),n_unique_labels))
    return X_hot[:], Y_hot[:]
def extractFeature(raw):
    dataX = mfcc(raw).T
    X_hot = makeHot(dataX,sequence_length = sequence_length)
    return X_hot[:]
def load(files, sr=44100):
    [raw, sr] = librosa.load(files[0], sr=sr)
    for f in files[1:]:
        [array, sr] = librosa.load(f, sr=sr)
        raw = np.hstack((raw, array))
        
    
    print(raw.shape)
    return raw
###########################################   Model   #########################################

##################################################################################################


In [2]:
X = tf.placeholder(tf.float32, [None, sequence_length,num_classes], name="X")
Y = tf.placeholder(tf.float32, [None, output_dim], name="Y")

keep_prob = tf.placeholder(tf.float32)

cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_classes, state_is_tuple=True)
#cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
cell = tf.contrib.rnn.MultiRNNCell([cell]*2, state_is_tuple= True)

BatchSize = tf.placeholder(tf.int32, [], name='BatchSize')
initial_state = cell.zero_state(BatchSize, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell, X,initial_state=initial_state,dtype=tf.float32)

dense1 = tf.layers.dense(inputs=outputs[:,-1], units=sequence_length*output_dim, activation=tf.nn.relu)

dense2 = tf.layers.dense(inputs=dense1, units=sequence_length*output_dim, activation=tf.nn.relu)
dropout2 = tf.nn.dropout(dense1, keep_prob=keep_prob)

dense3 = tf.layers.dense(inputs=dense2, units=output_dim, activation=tf.nn.relu)

Y_pred= tf.layers.dense(inputs=dense3, units=output_dim)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y_pred, labels=Y))
lr = tf.placeholder(tf.float32,shape=(), name='learning_rate')
train = tf.train.AdamOptimizer(lr).minimize(cost)


In [3]:
import glob
uav_path = '../data/44100/Test/Unloaded/*.*'
loaded_path = '../data/44100/Test/Loaded/*.*'
none_path = '../data/44100/Test/Background/*.*'

uav_files = glob.glob(uav_path)
loaded_files = glob.glob(loaded_path)
none_files = glob.glob(none_path)
print(len(uav_files),'개\t', uav_files[0])
print(len(uav_files),'개\t', loaded_files[0])
print(len(none_files), '개\t',none_files[0])
uav_raw = load(uav_files)
loaded_raw = load(loaded_files)
none_raw = load(none_files)

'''
mfcc_uav, y_uav = mfcc4(uav_raw, 1)
print(mfcc_uav.shape, y_uav.shape)
mfcc_none, y_none = mfcc4(none_raw, 0)
print(mfcc_none.shape, y_none.shape)
'''
mfcc_uav = mfcc(uav_raw)
mfcc_loaded = mfcc(loaded_raw)
mfcc_none = mfcc(none_raw)
print(len(mfcc_uav),len(mfcc_loaded),len(mfcc_none) )

# or should we give one label to one chunk?
y_uav = np.ones(mfcc_uav.shape[1], dtype=int)*2
y_loaded = np.ones(mfcc_loaded.shape[1], dtype=int)
y_none =np.zeros(mfcc_none.shape[1], dtype=int)

print(y_uav.shape, y_uav[0])
print(y_loaded.shape, y_loaded[0])
print(y_none.shape, y_none[0])

XX = np.hstack((mfcc_uav, mfcc_loaded))
XX = np.hstack((XX, mfcc_none)).T

y = np.hstack((y_uav, y_loaded))
y = np.hstack((y, y_none))
print(X.shape, y.shape)

n_labels = y.shape[0]
n_unique_labels = 3
y_encoded = np.zeros((n_labels, n_unique_labels))
y_encoded[np.arange(n_labels), y] = 1
print(y_encoded.shape)
#print(y_encoded[0], y_encoded[40000],y_encoded[100000])

dataX = XX
dataY = y_encoded
print(y_encoded)
print(dataX.shape, dataY.shape)

sequence_length = 16 #layer
X_hot, Y_hot = makeHot(dataX, dataY, sequence_length)
print(X_hot.shape, Y_hot.shape)


1 개	 ../data/44100/Test/Unloaded\test_1532716924-p2first.wav
1 개	 ../data/44100/Test/Loaded\WSU_P2_HIGH_LOADED_DOWN.wav
4 개	 ../data/44100/Test/Background\background_06_02_01.wav
(15759463,)
(884736,)
(3538944,)
20 20 20
(32704,) 2
(1836,) 1
(7344,) 0
(?, 16, 20) (41884,)
(41884, 3)
[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 ...
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]
(41884, 20) (41884, 3)
(41869, 16, 20) (41869, 3)


In [4]:
class Data:
    def __init__(self,X,Y,BatchSize):
        self.X = X
        self.Y = Y
        self.len = len(Y)
        self.bs = BatchSize
        self.bs_i = 0
    def getBatchData(self):
        s = self.bs_i
        e = self.bs_i + self.bs
        if e> self.len:
            e -= self.len
            result =  np.vstack((self.X[s:],self.X[:e])), np.vstack((self.Y[s:],self.Y[:e]))
        else:
            result =  self.X[s:e], self.Y[s:e]
            
        self.bs_i = e
        return result
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X_hot, Y_hot, test_size=0.0, random_state=42)

D = Data(X_train, y_train,2048)
print(D.X.shape)

(41869, 16, 20)


In [5]:
sess = tf.Session()
saver = tf.train.Saver()

saver.restore(sess, model_path)
y_pred = sess.run(tf.argmax(Y_pred,1), feed_dict={X: D.X, BatchSize:len(X_hot), keep_prob:1.0})
y_true = sess.run(tf.argmax(D.Y,1))
print(y_pred.shape, y_true.shape)

from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
print("Accuracy: ", accuracy_score(y_true, y_pred))

print(classification_report(y_true, y_pred))
print(confusion_matrix(y_true, y_pred))


INFO:tensorflow:Restoring parameters from ../models/RNN/my_RNN_model_test
(41869,) (41869,)
F-Score: 0.444
Accuracy:  0.44381284482552724
             precision    recall  f1-score   support

          0       0.71      1.00      0.83      7344
          1       0.04      0.43      0.07      1836
          2       0.91      0.32      0.47     32689

avg / total       0.84      0.44      0.52     41869

[[ 7338     0     6]
 [   27   784  1025]
 [ 2928 19301 10460]]
