# SnapPoint 신경망

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import utils

from PIL import Image
import os
import PIL.Image as pilimg
import numpy as np
import pandas as pd
from keras.preprocessing import image

import matplotlib.pyplot as plt

Using TensorFlow backend.


In [25]:
data_x = np.load('npy_data/data.npy')
data_y = np.load('npy_data/data_label.npy')

In [26]:
mfcc_x = np.load('npy_data/mfcc_data_x.npy')
mfcc_x.shape

(19401, 18, 40)

In [27]:
np.random.seed(1337)
np.random.shuffle(mfcc_x)
np.random.seed(1337)
np.random.shuffle(data_y)

In [28]:
data_x.shape

(19401, 8820)

In [30]:
data_y

array([0., 0., 1., ..., 1., 0., 0.])

In [31]:
n=int(len(mfcc_x)*0.85)
n

16490

In [32]:
mfcc_x[n:].shape

(2911, 18, 40)

In [33]:
train_x = mfcc_x[:n]
train_y = data_y[:n]
test_x = mfcc_x[n:]
test_y = data_y[n:]

In [34]:
from keras.utils import to_categorical
# Step 1. Data Preprocessing
train_x=np.reshape(train_x,(-1,train_x.shape[1],train_x.shape[2],1))
test_x=np.reshape(test_x,(-1,test_x.shape[1],test_x.shape[2],1))
train_y = utils.to_categorical(train_y,10)
test_y = utils.to_categorical(test_y,10)

In [35]:
train_x.shape
train_y.shape

(16490, 10)

In [36]:
pool_size = (2, 2)  # size of pooling area for max pooling
kernel_size = (3, 3)  # convolution kernel size
np.random.seed(1337)

model = tf.keras.models.Sequential()
model.add(Conv2D(32, kernel_size, padding='same', input_shape=(18,40,1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.5))

model.add(Conv2D(64, kernel_size, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.5))

model.add(Conv2D(128, kernel_size, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(600))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile (optimizer= 'adam', 
                loss='categorical_crossentropy', metrics = ['accuracy'])

In [37]:
model.fit(train_x, train_y, batch_size=128, epochs=12, verbose=2, validation_split=0.2)

Train on 13192 samples, validate on 3298 samples
Epoch 1/12
13192/13192 - 1s - loss: 2.9380 - accuracy: 0.7397 - val_loss: 0.5587 - val_accuracy: 0.7835
Epoch 2/12
13192/13192 - 1s - loss: 0.2907 - accuracy: 0.8900 - val_loss: 0.4442 - val_accuracy: 0.8599
Epoch 3/12
13192/13192 - 1s - loss: 0.2212 - accuracy: 0.9183 - val_loss: 0.2958 - val_accuracy: 0.9215
Epoch 4/12
13192/13192 - 1s - loss: 0.1736 - accuracy: 0.9336 - val_loss: 0.1951 - val_accuracy: 0.9645
Epoch 5/12
13192/13192 - 1s - loss: 0.1514 - accuracy: 0.9438 - val_loss: 0.2130 - val_accuracy: 0.9588
Epoch 6/12
13192/13192 - 1s - loss: 0.1345 - accuracy: 0.9500 - val_loss: 0.1604 - val_accuracy: 0.9751
Epoch 7/12
13192/13192 - 1s - loss: 0.1247 - accuracy: 0.9557 - val_loss: 0.1032 - val_accuracy: 0.9857
Epoch 8/12
13192/13192 - 1s - loss: 0.1072 - accuracy: 0.9606 - val_loss: 0.1273 - val_accuracy: 0.9845
Epoch 9/12
13192/13192 - 1s - loss: 0.1057 - accuracy: 0.9613 - val_loss: 0.0876 - val_accuracy: 0.9900
Epoch 10/12
131

<tensorflow.python.keras.callbacks.History at 0x190ae143b88>

In [38]:
test_loss, test_acc = model.evaluate(test_x, test_y, verbose=2)
print('test_loss = ', test_loss, 'test_acc = ', test_acc)

predictions = model.predict(test_x)

2911/1 - 0s - loss: 0.0764 - accuracy: 0.9924
test_loss =  0.0628766495617998 test_acc =  0.9924425


In [257]:
model.save('SnapPoint.h5')

In [41]:
import glob
import librosa
def cutting_sound(filepath):
    batch_size = 0
    data_height = 18
    data_width = 40
    train = np.zeros((batch_size, data_height, data_width))

    file_list = glob.glob(filepath + '*.wav')
    for file_name in file_list:
        y, sr = librosa.load(file_name, sr=40000)
        y_mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40, hop_length=500)
        _, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=500)
        for frame_number in beat_frames:
            listSound = np.expand_dims(y_mfcc.T[frame_number-1:frame_number+4],axis=0)
            train = np.concatenate((train,listSound),axis=0)
    return train

In [290]:
sample=cutting_sound("./test/")
sample=np.reshape(sample,(-1,sample.shape[1],sample.shape[2],1))
predictions = model.predict(sample)
sample_y = utils.to_categorical(np.ones(8),10)
sample_y

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [291]:
cnt=0
for i in predictions:
    if np.argmax(i) == 1:
        print("great ", cnt+1)
    else:
        print("failed")
    cnt+=1

great  1
great  2
great  3
great  4
great  5
great  6
great  7
great  8


In [288]:
predictions

array([[1.67478658e-02, 9.82653916e-01, 8.57273044e-05, 5.33437851e-05,
        8.33375889e-05, 3.09740535e-05, 5.13193008e-05, 2.78371572e-05,
        6.94553773e-06, 2.58727203e-04],
       [3.99518646e-02, 9.53156352e-01, 6.98683027e-04, 9.01997846e-04,
        1.22854800e-03, 9.12750431e-04, 5.59262873e-04, 7.77408713e-04,
        1.52117093e-04, 1.66110659e-03],
       [1.26863718e-01, 8.54404211e-01, 1.46091043e-03, 3.98336444e-03,
        3.80505878e-03, 8.16942484e-04, 2.38740584e-03, 1.78354804e-03,
        3.59150261e-04, 4.13575396e-03],
       [7.32165277e-02, 9.18803096e-01, 9.09890921e-04, 1.57846848e-03,
        1.18899497e-03, 6.57666998e-04, 1.00478681e-03, 8.83436122e-04,
        2.54236511e-04, 1.50281773e-03],
       [1.50058803e-03, 9.98321116e-01, 3.01190230e-05, 3.73838084e-05,
        3.61244638e-05, 5.80593951e-06, 2.88122501e-05, 1.09512284e-05,
        1.53256929e-06, 2.75999701e-05],
       [2.63508886e-01, 5.65124154e-01, 1.76612996e-02, 2.87296921e-02,
   