In [1]:
import librosa
import wave
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import os

##### 변수 설정 부분 #####
DATA_PATH = "./data"
X_train = [] #train_data 저장할 공간
X_test = []
Y_train = []
Y_test = []
tf_classes = 0

def load_wave_generator(path): 
       
    batch_waves = []
    labels = []
    X_data = []
    Y_label = []    
    global X_train, X_test, Y_train, Y_test, tf_classes
    
    folders = os.listdir(path)

    for folder in folders:
        if not os.path.isdir(path):continue # 폴더가 아니면 continue                   
        files = os.listdir(path+"/"+folder)        
        print("Foldername :",folder,"-",len(files),"파일")
        
        # 폴더 이름과 그 폴더에 속하는 파일 갯수 출력
        for wav in files:
            if not wav.endswith(".wav"):continue
            else:               
                y, sr = librosa.load(path+"/"+folder+"/"+wav)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=int(sr*0.01),n_fft=int(sr*0.02)).T
              
                X_data.extend(mfcc)
                
                label = [0 for i in range(len(folders))]
                label[tf_classes] = 1
                
                for i in range(len(mfcc)):
                    Y_label.append(label)
                    
        tf_classes = tf_classes+1
   
    print("X_data :",np.shape(X_data))
    print("Y_label :",np.shape(Y_label))
    X_train, X_test, Y_train, Y_test = train_test_split(np.array(X_data), np.array(Y_label))

    xy = (X_train, X_test, Y_train, Y_test)
    np.save("./data.npy",xy)
    

load_wave_generator(DATA_PATH)


print(tf_classes,"개의 클래스!!")
print("X_train :",np.shape(X_train))
print("Y_train :",np.shape(Y_train))
print("X_test :",np.shape(X_test))
print("Y_test :",np.shape(Y_test))



Foldername : 0 - 20 파일




Foldername : 1 - 20 파일
Foldername : 2 - 20 파일
Foldername : 3 - 20 파일
Foldername : 4 - 20 파일
X_data : (54264, 20)
Y_label : (54264, 5)
5 개의 클래스!!
X_train : (40698, 20)
Y_train : (40698, 5)
X_test : (13566, 20)
Y_test : (13566, 5)


In [2]:
##################  화자인식 NN 버전 ##################
X_train, X_test, Y_train, Y_test = np.load("./data.npy")
X_train = X_train.astype("float")
X_test = X_test.astype("float")

# v1
tf.reset_default_graph() # 기존에 생성된 graph를 모두 삭제하고, reset시켜 중복되는 것을 막아준다. 
                         # context가 유지되는 주피터에서는 사용해야한다.
tf.set_random_seed(777)
learning_rate = 0.001
training_epochs = 200
keep_prob = tf.placeholder(tf.float32)
sd = 1 / np.sqrt(20) # standard deviation 표준편차(표본표준편차라 1/root(n))

#mfcc의 기본은 20
# 20ms일 때216은 각 mfcc feature의 열이 216
X = tf.placeholder(tf.float32, [None, 20])
Y = tf.placeholder(tf.float32, [None, tf_classes])

#1차 히든레이어
W1 = tf.get_variable("w1",shape=[20, 256],initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256], mean=0, stddev=sd), name="b1")
L1 = tf.nn.relu(tf.matmul(X, W1) + b1) # 1차 히든레이어는 'Relu' 함수를 쓴다.
L1 = tf.nn.dropout(L1, keep_prob = keep_prob)

# 2차 히든 레이어
W2 = tf.get_variable("w2", shape=[256, 256],initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256], mean=0, stddev=sd), name="b2")
L2 = tf.nn.tanh(tf.matmul(L1, W2) + b2) # 2차 히든레이어는 'Relu' 함수를 쓴다.
L2 = tf.nn.dropout(L2, keep_prob = keep_prob)

# 3차 히든 레이어
W3 = tf.get_variable("w3", shape=[256, 256], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([256], mean=0, stddev=sd), name="b3")
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3) # 3차 히든레이어는 'Relu' 함수를 쓴다.
L3 = tf.nn.dropout(L3, keep_prob = keep_prob)

# 4차 히든 레이어
W4 = tf.get_variable("w4", shape=[256, 128], initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([128], mean=0, stddev=sd), name="b4")
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4) # 4차 히든레이어는 'Relu' 함수를 쓴다.
L4 = tf.nn.dropout(L4, keep_prob = keep_prob)

# 5차 히든 레이어
W5 = tf.get_variable("w5", shape=[128, 128], initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([128], mean=0, stddev=sd), name="b5")
L5 = tf.nn.relu(tf.matmul(L4, W5) + b5) # 5차 히든레이어는 'Relu' 함수를 쓴다.
L5 = tf.nn.dropout(L5, keep_prob = keep_prob)

# 6차 히든 레이어
W6 = tf.get_variable("w6", shape=[128, 128], initializer=tf.contrib.layers.xavier_initializer())
b6 = tf.Variable(tf.random_normal([128], mean=0, stddev=sd), name="b6")
L6 = tf.nn.relu(tf.matmul(L5, W6) + b6) # 6차 히든레이어는 'Relu' 함수를 쓴다.
L6 = tf.nn.dropout(L6, keep_prob = keep_prob)

# 7차 히든 레이어
W7 = tf.get_variable("w7", shape=[128, 128], initializer=tf.contrib.layers.xavier_initializer())
b7 = tf.Variable(tf.random_normal([128], mean=0, stddev=sd), name="b7")
L7 = tf.nn.relu(tf.matmul(L6, W7) + b7) # 7차 히든레이어는 'Relu' 함수를 쓴다.
L7 = tf.nn.dropout(L7, keep_prob = keep_prob)

# 최종 레이어
W8 = tf.get_variable("w8", shape=[128, tf_classes], initializer=tf.contrib.layers.xavier_initializer())
b8 = tf.Variable(tf.random_normal([tf_classes], mean=0, stddev=sd), name="b8")
hypothesis = tf.matmul(L7, W8) + b8


cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

is_correct = tf.equal(tf.arg_max(hypothesis, 1), tf.arg_max(Y, 1))


batch_size=1
x_len = len(X_train)

if(x_len%2==0):
    batch_size = 2
elif(x_len%3==0):
    batch_size = 3
elif(x_len%4==0):
    batch_size = 4
else:
    batch_size = 1

split_X = np.split(X_train,batch_size)
split_Y = np.split(Y_train,batch_size)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for epoch in range(training_epochs):
    avg_cost = 0
    for i in range(batch_size):
        batch_xs = split_X[i]
        batch_ys = split_Y[i]
        feed_dict = {X:batch_xs, Y:batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / batch_size

    print('Epoch:', '%04d' % (epoch), 'cost =', '{:.9f}'.format(avg_cost))

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy: ", sess.run(accuracy, feed_dict={X: X_test, Y:Y_test, keep_prob:1}))

print('Learning Finished!')

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch: 0000 cost = 1.806586623
Epoch: 0001 cost = 1.687602818
Epoch: 0002 cost = 1.654644847
Epoch: 0003 cost = 1.636134982
Epoch: 0004 cost = 1.633174717
Epoch: 0005 cost = 1.624652088
Epoch: 0006 cost = 1.619630754
Epoch: 0007 cost = 1.618709266
Epoch: 0008 cost = 1.612985969
Epoch: 0009 cost = 1.608114600
Epoch: 0010 cost = 1

In [3]:
#학습만 반복 코스트 보며 설정
for epoch in range(training_epochs):
    avg_cost = 0
    for i in range(batch_size):
        batch_xs = split_X[i]
        batch_ys = split_Y[i]
        feed_dict = {X:batch_xs, Y:batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / batch_size
        #if(epoch%10==0):
    print('Epoch:', '%04d' % (epoch), 'cost =', '{:.9f}'.format(avg_cost))

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy: ", sess.run(accuracy, feed_dict={X: X_test, Y:Y_test, keep_prob:1}))

print('Learning Finished!')


Epoch: 0000 cost = 0.320977494
Epoch: 0001 cost = 0.318241999
Epoch: 0002 cost = 0.323594540
Epoch: 0003 cost = 0.318467900
Epoch: 0004 cost = 0.315076470
Epoch: 0005 cost = 0.319164157
Epoch: 0006 cost = 0.314166978
Epoch: 0007 cost = 0.316449389
Epoch: 0008 cost = 0.312153250
Epoch: 0009 cost = 0.314399913
Epoch: 0010 cost = 0.309893131
Epoch: 0011 cost = 0.315916792
Epoch: 0012 cost = 0.308318987
Epoch: 0013 cost = 0.310395285
Epoch: 0014 cost = 0.311116755
Epoch: 0015 cost = 0.309693947
Epoch: 0016 cost = 0.305347294
Epoch: 0017 cost = 0.305240914
Epoch: 0018 cost = 0.302133217
Epoch: 0019 cost = 0.309332758
Epoch: 0020 cost = 0.307028860
Epoch: 0021 cost = 0.304714024
Epoch: 0022 cost = 0.304688767
Epoch: 0023 cost = 0.303506508
Epoch: 0024 cost = 0.306443810
Epoch: 0025 cost = 0.300378606
Epoch: 0026 cost = 0.300659299
Epoch: 0027 cost = 0.302543953
Epoch: 0028 cost = 0.301655203
Epoch: 0029 cost = 0.297092944
Epoch: 0030 cost = 0.299972668
Epoch: 0031 cost = 0.295625404
Epoch: 0

In [4]:
#학습만 반복 코스트 보며 설정 2
for epoch in range(training_epochs):
    avg_cost = 0
    for i in range(batch_size):
        batch_xs = split_X[i]
        batch_ys = split_Y[i]
        feed_dict = {X:batch_xs, Y:batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / batch_size
        #if(epoch%10==0):
    print('Epoch:', '%04d' % (epoch), 'cost =', '{:.9f}'.format(avg_cost))

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy: ", sess.run(accuracy, feed_dict={X: X_test, Y:Y_test, keep_prob:1}))

print('Learning Finished!')

Epoch: 0000 cost = 0.225638971
Epoch: 0001 cost = 0.222127765
Epoch: 0002 cost = 0.226730965
Epoch: 0003 cost = 0.226748623
Epoch: 0004 cost = 0.223086998
Epoch: 0005 cost = 0.222402737
Epoch: 0006 cost = 0.223859757
Epoch: 0007 cost = 0.222078249
Epoch: 0008 cost = 0.221331477
Epoch: 0009 cost = 0.218945146
Epoch: 0010 cost = 0.222047679
Epoch: 0011 cost = 0.220785998
Epoch: 0012 cost = 0.221992075
Epoch: 0013 cost = 0.218559809
Epoch: 0014 cost = 0.222764201
Epoch: 0015 cost = 0.219104469
Epoch: 0016 cost = 0.223196648
Epoch: 0017 cost = 0.217967391
Epoch: 0018 cost = 0.223534048
Epoch: 0019 cost = 0.223019280
Epoch: 0020 cost = 0.219170161
Epoch: 0021 cost = 0.219099492
Epoch: 0022 cost = 0.220327310
Epoch: 0023 cost = 0.220915020
Epoch: 0024 cost = 0.215582646
Epoch: 0025 cost = 0.216632172
Epoch: 0026 cost = 0.217530042
Epoch: 0027 cost = 0.217409648
Epoch: 0028 cost = 0.215097569
Epoch: 0029 cost = 0.218768761
Epoch: 0030 cost = 0.218093969
Epoch: 0031 cost = 0.215680078
Epoch: 0

In [5]:
saver = tf.train.Saver()
saver.save(sess, './my_voice_model2')

'./my_voice_model2'

In [6]:
y, sr = librosa.load("./test_이윤진.wav")

X_test = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=int(sr*0.01),n_fft=int(sr*0.02)).T

'''
0 정유경
1 배철수
2 이윤진
3 강정윤
4 임찬주
'''
label = [0 for i in range(5)]#class가 3개이니까 y_test만드는 과정
label[2] = 1
Y_test = []
for i in range(len(X_test)):
    Y_test.append(label)

print(np.shape(X_test))
print(np.shape(Y_test))


#correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#print("Accuracy: ", sess.run(accuracy, feed_dict={X: X_test, Y:Y_test, keep_prob:1}))
#print("Label :",sess.run(tf.argmax(Y_test,1)))

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("predict")
print(pd.value_counts(pd.Series(sess.run(tf.argmax(hypothesis, 1),
                                    feed_dict={X: X_test, keep_prob:1}))))
print("Accuracy: ", sess.run(accuracy, feed_dict={X: X_test, Y:Y_test, keep_prob:1}))




(533, 20)
(533, 5)
predict
2    521
1      7
3      3
4      2
dtype: int64
Accuracy:  0.97748595
