# cnn
- <b>`scipy.io.wavfile`</b>
- 0.5s chunk size, 0.25s window 
- mfcc
- background data
- sample rate 44100

## Tensorboard 활용법
- 커맨드 실행 (ex. notebooks 디렉토리에서 실행하면 notebooks/summaries 디렉토리 생성됨) `tensorboard --logdir=summaries`
- http://localhost:6006
- Session안에 Writer 정의
- 트레이닝 루프에 merge 정의 `tf.summary.merge_all()`
- Merge를 넣고 런해서 summary 얻음 `summary = sess.run([merge, cost, optimizer], feed_dict=feed_dict)`
- Add summary `train_writer.add_summary(summary,epoch)`

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf
from scipy import signal
import scipy.io.wavfile

## Data load
- uav: only p2 unloaded
- none: some of the background sound and other sounds like gator/train
    - `cnn_test_1532549868.wav`, `cnn_test_1532559744.wav`: KSQ 에어콘 소리 +말하는 소리

In [2]:
import glob
uav_path = '../data/phantom/JUNE_01_PHANTOMS/wavs/p2-unloaded/*.wav'
none_path = '../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/*.wav'
uav_files = glob.glob(uav_path)
none_files = glob.glob(none_path)
print('\n'.join(uav_files))
print('\n'.join(none_files))

../data/phantom/JUNE_01_PHANTOMS/wavs/p2-unloaded/purdue_P2_unloaded_up_down.wav
../data/phantom/JUNE_01_PHANTOMS/wavs/p2-unloaded/WSU_P2_DOWN_UNLOADED.wav
../data/phantom/JUNE_01_PHANTOMS/wavs/p2-unloaded/WSU_P2_UNLOADED_BACK_AND_FORTH.wav
../data/phantom/JUNE_01_PHANTOMS/wavs/p2-unloaded/WSU_P2_UNLOADED_UP_AND_DOWN.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/background_06_02_01.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/background_06_02_02.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/background_06_02_03.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/background_07_02_17.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/cnn_test_1532549868.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/cnn_test_1532559744.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/eric_survey.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/gator.wav
../data/phantom/JUNE_02_BACKGROUND/wavs/background/use/train_03.wav

In [3]:
CHUNK_SIZE = 8192
SR = 44100
N_MFCC = 16

In [4]:
def load2(files):
    _, raw = scipy.io.wavfile.read(files[0])
    for f in files[1:]:
        _, array = scipy.io.wavfile.read(f)
        raw = np.hstack((raw, array))
    print(raw.shape)
    return raw

In [5]:
uav_raw = load2(uav_files)
none_raw = load2(none_files)

(3538944,)
(8761977,)


In [6]:
np.save('../data/Xy/uav_p2_unloaded_raw', uav_raw)
np.save('../data/Xy/none_diverse_raw', none_raw)

In [2]:
uav_raw = np.load('../data/Xy/uav_p2_unloaded_raw.npy')
none_raw = np.load('../data/Xy/none_selected_raw.npy')

In [2]:
uav_raw = np.load('../data/Xy/uav_p2_unloaded_raw.npy')
none_raw = np.load('../data/Xy/none_diverse_raw.npy')

In [3]:
uav_raw = uav_raw.astype(float)
none_raw = none_raw.astype(float)

## Data preprocessing
- features: mfcc, delta mfcc, delta2 mfcc, log spectrogram
- 사용한피처: mfcc
- 0.5초 청크, 0.25초 윈도우 슬라이드 (50% 오버랩)

In [2]:
def combine(X_uav, X_none, y_uav, y_none):
    X = np.concatenate((X_uav, X_none), axis=0)
    y = np.concatenate((y_uav, y_none), axis=0)
    return X, y

def onehot(y, n_classes):
    y_encoded = np.zeros((y.shape[0], n_classes))
    y_encoded[np.arange(y.shape[0]), y] = 1
    print(y_encoded.shape)
    return y_encoded

def split_save(X, y, name, save=False):    
        from sklearn import model_selection
        X_train, X_test, y_train, y_test = model_selection.train_test_split(
            X, y, test_size=0.2, random_state=42)
        if save:
            np.save('../data/Xy/X_train_%s'%name, X_train)
            np.save('../data/Xy/X_test_%s'%name, X_test)
            np.save('../data/Xy/y_train_%s'%name, y_train)
            np.save('../data/Xy/y_test_%s'%name, y_test)
        return X_train, X_test, y_train, y_test

def load_Xy(name):
    X_train = np.load('../data/Xy/X_train_%s.npy'%name)
    X_test = np.load('../data/Xy/X_test_%s.npy'%name)
    y_train = np.load('../data/Xy/y_train_%s.npy'%name)
    y_test = np.load('../data/Xy/y_test_%s.npy'%name)
    return X_train, X_test, y_train, y_test

In [8]:
# chunk:한번에 처리하는 오디오 데이터 단위
# window: 슬라이드하는 윈도우 크기
# 50% overlap
# chunk size 44100/2 --> n_frame 43 (n_frame은 mfcc.shape[1]인덱스사이즈)
def mfcc5(raw, label, chunk_size=44100//2, window_size=44100//4, sr=44100, n_mfcc=16, n_frame=43):
    mfcc = np.empty((0, n_mfcc, n_frame))
    y = []
    print(raw.shape)
    i = 0
    while i+chunk_size <= len(raw):
        mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc)
        if mfcc_slice.shape[1] < n_frame+1:
            print("small end:", mfcc_slice.shape)
            continue
        mfcc_slice = mfcc_slice[:,:-1]
        mfcc_slice = mfcc_slice.reshape((1, mfcc_slice.shape[0], mfcc_slice.shape[1]))
        mfcc = np.vstack((mfcc, mfcc_slice))
        y.append(label)
        i += window_size
    y = np.array(y)
    mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], mfcc.shape[2], 1)
    y = onehot(y, 2)
    return mfcc, y

In [None]:
# delta_mfcc5
# order 1 아니면 2 
def delta_mfcc5(raw, label, order, chunk_size=44100//2, window_size=44100//4, sr=44100, n_mfcc=16, n_frame=43):
    delta2_mfcc = np.empty((0, n_mfcc, n_frame))
    y = []
    print(raw.shape)
    i = 0
    while i+chunk_size <= len(raw):
        S = librosa.feature.melspectrogram(raw[i:i+chunk_size], sr=sr, n_mels=128)
        log_S = librosa.amplitude_to_db(S, ref=np.max)
        mfcc_slice = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=n_mfcc)
        #print(delta_mfcc.shape)
        delta2_mfcc_slice = librosa.feature.delta(mfcc_slice, order=order)
        #print(delta2_mfcc_slice.shape)

        #mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc)
        if delta2_mfcc_slice.shape[1] < n_frame+1:
            print("small end:", delta2_mfcc_slice.shape)
            continue
        delta2_mfcc_slice = delta2_mfcc_slice[:,:-1]
        delta2_mfcc_slice = delta2_mfcc_slice.reshape((1, delta2_mfcc_slice.shape[0], delta2_mfcc_slice.shape[1]))
        #print(delta2_mfcc_slice.shape)
        #print(delta2_mfcc.shape)
        delta2_mfcc = np.vstack((delta2_mfcc, delta2_mfcc_slice))
        y.append(label)
        i += window_size
    y = np.array(y)
    delta2_mfcc = delta2_mfcc.reshape(delta2_mfcc.shape[0], delta2_mfcc.shape[1], delta2_mfcc.shape[2], 1)
    y = onehot(y, 2)
    return delta2_mfcc, y

In [None]:
# log_spectrogram 계산
# n_frame사이즈 조정
def log_spectrograms(raw, label, chunk_size=44100//2, window_size=44100//4, sr=44100, n_frame=49, n_freqs=442):
    ls = np.empty((0, n_frame, n_freqs))
    y = []
    print(raw.shape)
    i = 0

    while i+chunk_size <= len(raw):
        #(청크개수,freqs=442, time=47)
        ls_slice = log_specgram(raw[i:i+chunk_size], sample_rate=sr)[2]
        if ls_slice.shape[0] < n_frame:
            print("small end:", ls_slice.shape)
            continue
        ls_slice = ls_slice.reshape((1, ls_slice.shape[0], ls_slice.shape[1]))
        #print(ls_slice.shape)
        #print(ls.shape)
        ls = np.vstack((ls, ls_slice))
        y.append(label)
        i += window_size
    y = np.array(y)
    ls = ls.reshape(ls.shape[0], ls.shape[1], ls.shape[2], 1)
    y = onehot(y,2)
    return ls, y

def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

# log specgram one pass - 한번에...안썼음
def log_specgram_one_pass(raw, label, sr=44100):
    ls = log_specgram(raw, sample_rate=sr)[2]
    ls = ls.reshape(ls.shape[0], ls.shape[1], 1)

    y = [label]*ls.shape[0]
    y = np.ones((ls.shape[0],),dtype=np.int)*label
    print(y.shape)
    y = onehot(y, 2)
    return ls, y

In [None]:
ls_uav = log_spectrograms(uav_raw, 1)#freq 442...
ls_none = log_spectrograms(none_raw, 0)#freq 442...
print(ls_uav[0].shape, ls_uav[1].shape)
print(ls_none[0].shape, ls_none[1].shape)

X_ls, y_ls = combine(ls_uav[0], ls_none[0], ls_uav[1], ls_none[1])

In [None]:
ls_uav = log_specgram_one_pass(uav_raw, 1)#freq 442...
ls_none = log_specgram_one_pass(none_raw, 0)#freq 442...
print(ls_uav[0].shape, ls_uav[1].shape)
print(ls_none[0].shape, ls_none[1].shape)

X_ls, y_ls = combine(ls_uav[0], ls_none[0], ls_uav[1], ls_none[1])

In [None]:
delta2_uav = delta_mfcc5(uav_raw, 1, order=2)
print(delta2_uav[0].shape, delta2_uav[1].shape)
delta2_none = delta_mfcc5(none_raw, 0, order=2)
print(delta2_none[0].shape, delta2_none[1].shape)


X_delta2, y_delta2 = combine(delta2_uav[0], delta2_none[0], delta2_uav[1], delta2_none[1])

In [None]:
delta_uav = delta_mfcc5(uav_raw, 1, order=1)
print(delta_uav[0].shape, delta_uav[1].shape)
delta_none = delta_mfcc5(none_raw, 0, order=1)
print(delta_none[0].shape, delta_none[1].shape)


X_delta, y_delta = combine(delta_uav[0], delta_none[0], delta_uav[1], delta_none[1])

In [10]:
mfcc_uav = mfcc5(uav_raw, 1)
print(mfcc_uav[0].shape, mfcc_uav[1].shape)
mfcc_none = mfcc5(none_raw, 0)
print(mfcc_none[0].shape, mfcc_none[1].shape)


X_mfcc, y_mfcc = combine(mfcc_uav[0], mfcc_none[0], mfcc_uav[1], mfcc_none[1])

(3538944,)
(319, 2)
(319, 16, 43, 1) (319, 2)
(8761977,)
(793, 2)
(793, 16, 43, 1) (793, 2)


#### more background data (+ksquare ac sound)

In [11]:
X_train, X_test, y_train, y_test = split_save(X_mfcc, y_mfcc, 'mfcc5_diverse_0726', save=True)

In [5]:
X_train, X_test, y_train, y_test = load_Xy('mfcc5_diverse_0726')

#### gallagher's dataset selected background data

In [None]:
X_train, X_test, y_train, y_test = split_save(X_mfcc, y_mfcc, 'mfcc5_longwindow_0723', save=True)

In [3]:
X_train, X_test, y_train, y_test = load_Xy('mfcc5_longwindow_0723')

In [None]:
X_train, X_test, y_train, y_test = split_save(X_delta2, y_delta2, 'delta2', save=True)

In [None]:
X_train, X_test, y_train, y_test = load_Xy('delta2')

In [None]:
X_train, X_test, y_train, y_test = split_save(X_delta, y_delta, 'delta', save=True)

In [None]:
X_train, X_test, y_train, y_test = load_Xy('delta')

In [None]:
X_train, X_test, y_train, y_test = split_save(X_ls, y_ls, 'logspec', save=True)

In [None]:
X_train, X_test, y_train, y_test = load_Xy('logspec')

In [4]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(703, 16, 43, 1) (176, 16, 43, 1)
(703, 2) (176, 2)


# Tensorflow finally!

## Training

### Parameters
- learning rate, epoch 유의

In [5]:
n_mfcc = 16
n_frame = 43
n_classes = 2
n_channels = 1

learning_rate = 0.001
training_epochs = 500

### Experiment 1 - Two convolutional layer
- filter size: [13,4] n_mfcc가 16이고 frequency 대역이 중요해서 필터는 n_mfcc에 맞게 되도록 길게
- <b>Xavier initializer</b> for both conv layers

In [6]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

In [7]:
initializer = tf.contrib.layers.xavier_initializer()

In [8]:
# rectangular filter
conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[13, 4],
                         kernel_initializer=initializer,
                         activation=tf.nn.relu)
print(conv1)


pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3],
                                padding='SAME', strides=1)
print(pool1)

conv2 = tf.layers.conv2d(inputs=pool1, filters=1, kernel_size=[2, 2],
                         kernel_initializer=initializer,
                         padding="SAME", activation=tf.nn.relu)
print(conv2)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=2)
print(pool2)



Tensor("conv2d/Relu:0", shape=(?, 4, 40, 1), dtype=float32)
Tensor("max_pooling2d/MaxPool:0", shape=(?, 4, 40, 1), dtype=float32)
Tensor("conv2d_1/Relu:0", shape=(?, 4, 40, 1), dtype=float32)
Tensor("max_pooling2d_1/MaxPool:0", shape=(?, 2, 20, 1), dtype=float32)


In [9]:
flat = tf.reshape(pool2, [-1, 2*20*1])
print(flat)

dense3 = tf.layers.dense(inputs=flat, units=200, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense3, units=2)
print(dense3)



Tensor("Reshape_1:0", shape=(?, 40), dtype=float32)
Tensor("dense/Relu:0", shape=(?, 200), dtype=float32)


In [None]:
learning_rate = tf.placeholder(tf.float32, shape=[])

In [10]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [11]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

#### Tensorboard

In [12]:
# 추적할 값들 저장하는 writer
train_writer = tf.summary.FileWriter( './summaries/5/train ', sess.graph)

In [13]:
# 모든 트레인가능한 변수 추적할거임
for var in tf.trainable_variables():
    tf.summary.histogram(var.name, var)
    
merged_summary = tf.summary.merge_all()

INFO:tensorflow:Summary name conv2d/kernel:0 is illegal; using conv2d/kernel_0 instead.
INFO:tensorflow:Summary name conv2d/bias:0 is illegal; using conv2d/bias_0 instead.
INFO:tensorflow:Summary name conv2d_1/kernel:0 is illegal; using conv2d_1/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_1/bias:0 is illegal; using conv2d_1/bias_0 instead.
INFO:tensorflow:Summary name dense/kernel:0 is illegal; using dense/kernel_0 instead.
INFO:tensorflow:Summary name dense/bias:0 is illegal; using dense/bias_0 instead.
INFO:tensorflow:Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
INFO:tensorflow:Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.


In [14]:
# cost, accuracy도 추적할거임
cost_ph = tf.placeholder(tf.float32,shape=None,name='cost_summary')
cost_summary = tf.summary.scalar('cost', cost_ph)

accuracy_ph = tf.placeholder(tf.float32,shape=None, name='accuracy_summary')
accuracy_summary = tf.summary.scalar('accuracy', accuracy_ph)

# 위에 두개 merge
performance_summary = tf.summary.merge([cost_summary, accuracy_summary])



#### model save/restore

In [15]:
model_path = '../models/cnn/cnn_basic_xavier2_2'
saver = tf.train.Saver()

In [17]:
saver.save(sess, model_path)

'../models/cnn/cnn_basic_xavier2_2'

In [15]:
saver.restore(sess, model_path)

INFO:tensorflow:Restoring parameters from ../models/cnn/cnn_basic_xavier2_2


#### train!

In [17]:
from sklearn.metrics import accuracy_score

batch_size = 100
counter = 0
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(y_train.shape[0] / batch_size)
    for i in range(0, y_train.shape[0], batch_size):
        sess.run(optimizer, feed_dict={learning_rate: 0.01})
        feed_dict={X:X_train[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
        # run merged_summary
        summary1, c, _ = sess.run([merged_summary, cost, optimizer], feed_dict=feed_dict)
        # summary에 값 넣음
        train_writer.add_summary(summary1, counter)
        #cost_history = np.append(cost_history,cost)
        avg_cost += c/total_batch
        counter += 1
    val_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test})
    val_true = sess.run(tf.argmax(y_test,1))
    accuracy = accuracy_score(val_pred, val_true)
    print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost),
          'validation: {:.4f}'.format(accuracy))
    # run performance summary
    summary2 = sess.run(performance_summary, feed_dict={cost_ph:avg_cost, accuracy_ph:accuracy})
    # add to the writer
    train_writer.add_summary(summary2, counter)




Epoch: 0001 cost =  0.047007716 validation: 0.8864
Epoch: 0002 cost =  0.046119933 validation: 0.8864
Epoch: 0003 cost =  0.032190574 validation: 0.8807
Epoch: 0004 cost =  0.026117134 validation: 0.8807
Epoch: 0005 cost =  0.025748257 validation: 0.8807
Epoch: 0006 cost =  0.024927198 validation: 0.8807
Epoch: 0007 cost =  0.023928186 validation: 0.8807
Epoch: 0008 cost =  0.023551691 validation: 0.8807
Epoch: 0009 cost =  0.023457921 validation: 0.8807
Epoch: 0010 cost =  0.023246897 validation: 0.8750
Epoch: 0011 cost =  0.023038668 validation: 0.8750
Epoch: 0012 cost =  0.022928765 validation: 0.8750
Epoch: 0013 cost =  0.022835743 validation: 0.8750
Epoch: 0014 cost =  0.022726348 validation: 0.8750
Epoch: 0015 cost =  0.022628257 validation: 0.8750
Epoch: 0016 cost =  0.022578938 validation: 0.8750
Epoch: 0017 cost =  0.022500012 validation: 0.8750
Epoch: 0018 cost =  0.022426528 validation: 0.8750
Epoch: 0019 cost =  0.022400617 validation: 0.8750
Epoch: 0020 cost =  0.022321858

Epoch: 0162 cost =  0.021024115 validation: 0.8580
Epoch: 0163 cost =  0.020993811 validation: 0.8580
Epoch: 0164 cost =  0.021051446 validation: 0.8523
Epoch: 0165 cost =  0.021010228 validation: 0.8523
Epoch: 0166 cost =  0.021037781 validation: 0.8523
Epoch: 0167 cost =  0.021014770 validation: 0.8523
Epoch: 0168 cost =  0.020999990 validation: 0.8523
Epoch: 0169 cost =  0.021023346 validation: 0.8523
Epoch: 0170 cost =  0.021000689 validation: 0.8523
Epoch: 0171 cost =  0.021007468 validation: 0.8523
Epoch: 0172 cost =  0.020999211 validation: 0.8523
Epoch: 0173 cost =  0.021000488 validation: 0.8523
Epoch: 0174 cost =  0.020982775 validation: 0.8523
Epoch: 0175 cost =  0.020993829 validation: 0.8523
Epoch: 0176 cost =  0.020982423 validation: 0.8523
Epoch: 0177 cost =  0.020983398 validation: 0.8523
Epoch: 0178 cost =  0.020985730 validation: 0.8523
Epoch: 0179 cost =  0.020974687 validation: 0.8523
Epoch: 0180 cost =  0.020984138 validation: 0.8523
Epoch: 0181 cost =  0.020955173

Epoch: 0323 cost =  0.020855629 validation: 0.8523
Epoch: 0324 cost =  0.020875940 validation: 0.8523
Epoch: 0325 cost =  0.020846363 validation: 0.8580
Epoch: 0326 cost =  0.020868483 validation: 0.8523
Epoch: 0327 cost =  0.020900600 validation: 0.8523
Epoch: 0328 cost =  0.020887380 validation: 0.8523
Epoch: 0329 cost =  0.020875197 validation: 0.8580
Epoch: 0330 cost =  0.020886749 validation: 0.8523
Epoch: 0331 cost =  0.020852446 validation: 0.8523
Epoch: 0332 cost =  0.020858761 validation: 0.8523
Epoch: 0333 cost =  0.020850208 validation: 0.8636
Epoch: 0334 cost =  0.020861770 validation: 0.8523
Epoch: 0335 cost =  0.020892852 validation: 0.8523
Epoch: 0336 cost =  0.020880523 validation: 0.8523
Epoch: 0337 cost =  0.020872776 validation: 0.8636
Epoch: 0338 cost =  0.020875875 validation: 0.8523
Epoch: 0339 cost =  0.020838371 validation: 0.8523
Epoch: 0340 cost =  0.020862238 validation: 0.8523
Epoch: 0341 cost =  0.020836631 validation: 0.8636
Epoch: 0342 cost =  0.020846940

Epoch: 0484 cost =  0.020767187 validation: 0.8693
Epoch: 0485 cost =  0.020754980 validation: 0.8693
Epoch: 0486 cost =  0.020752345 validation: 0.8636
Epoch: 0487 cost =  0.020750845 validation: 0.8636
Epoch: 0488 cost =  0.020746413 validation: 0.8636
Epoch: 0489 cost =  0.020751640 validation: 0.8636
Epoch: 0490 cost =  0.020739541 validation: 0.8636
Epoch: 0491 cost =  0.020808878 validation: 0.8693
Epoch: 0492 cost =  0.020774503 validation: 0.8636
Epoch: 0493 cost =  0.020764625 validation: 0.8636
Epoch: 0494 cost =  0.020784502 validation: 0.8636
Epoch: 0495 cost =  0.020757615 validation: 0.8636
Epoch: 0496 cost =  0.020772843 validation: 0.8636
Epoch: 0497 cost =  0.020761348 validation: 0.8636
Epoch: 0498 cost =  0.020762557 validation: 0.8636
Epoch: 0499 cost =  0.020761311 validation: 0.8636
Epoch: 0500 cost =  0.020769470 validation: 0.8636


In [18]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test})
y_true = sess.run(tf.argmax(y_test,1))

### Results
- mfcc
- long frame (0.5sec), long window (0.25)
- batch 100
- rectangular filter size
- epoch 2500
- learning rate 0.001

##### 어쩌다 잘된날...!

In [13]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.972
Accuracy:  0.9715909090909091
             precision    recall  f1-score   support

          0       0.98      0.97      0.98       113
          1       0.95      0.97      0.96        63

avg / total       0.97      0.97      0.97       176

[[110   3]
 [  2  61]]


##### Xavier init 사용했을때
- 197 에폭부터 accuracy 쭉 유지됨

In [21]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.966
Accuracy:  0.9659090909090909
             precision    recall  f1-score   support

          0       0.96      0.99      0.97       113
          1       0.98      0.92      0.95        63

avg / total       0.97      0.97      0.97       176

[[112   1]
 [  5  58]]


##### Xavier init 사용했을때 2
- 473 에폭부터 accuracy 쭉 유지됨

In [19]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.875
Accuracy:  0.875
             precision    recall  f1-score   support

          0       0.92      0.88      0.90       113
          1       0.81      0.86      0.83        63

avg / total       0.88      0.88      0.88       176

[[100  13]
 [  9  54]]


### more background data!
- KSQ 에어콘 소리 포함...!

In [22]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.839
Accuracy:  0.8385650224215246
             precision    recall  f1-score   support

          0       0.90      0.87      0.88       158
          1       0.71      0.75      0.73        65

avg / total       0.84      0.84      0.84       223

[[138  20]
 [ 16  49]]
