In [1]:
import os
import time
import random
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf


  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.python.platform import gfile
from sklearn.utils import shuffle

In [3]:
batch_size = 100
iterations = 2000
epoch = 100
eval_every = 5
test_iteration = 15
height = 20
width = 44
num_labels = 0
train_path = "G:/datasets/train"
test_path = "G:/datasets/test"
learning_rate = 0.001
logdir = '/log'
test_logdir = '/test_log'
label_to_index_map = {}

In [4]:
def init(path) :
    labels = os.listdir(path)
    index = 0
    for label in labels :
        label_to_index_map[label] = index
        index+=1
    global num_labels
    num_labels = len(label_to_index_map)

In [5]:
def encoding_labels(label) :
    encoding = [0] * len(label_to_index_map)
    encoding[label_to_index_map[label]] = 1
    return encoding

In [6]:
def get_mfccs(wave_path, pad_width = width) :
    wave, sr = librosa.load(wave_path, mono=True)
    mfccs = librosa.feature.mfcc(y=wave, sr=sr, n_mfcc=height)
    mfccs = np.pad(mfccs, ((0,0), (0, pad_width - len(mfccs[0]))), mode = 'constant')
    return mfccs
                   

In [7]:
def get_data(path) :
    y = []
    X = []
    path = os.path.join(path, '*', '*.wav')
    waves = gfile.Glob(path)
    for wave_path in waves :
        _, label = os.path.split(os.path.dirname(wave_path))
        X.append(get_mfccs(wave_path))
        y.append(encoding_labels(label))
        if len(y) % 400 == 0 :
            print(label)
    return X, y
        
    

In [9]:
def my_model(input) :
    with tf.name_scope("Conv1") :
        input4d = tf.reshape(input, [-1, height, width, 1])
        w1 = tf.Variable(tf.truncated_normal([9, 8, 1, 44], stddev=0.01), name="W")
        b1 = tf.Variable(tf.zeros(44), name="B")
        conv1 = tf.nn.conv2d(input4d, w1, strides=[1,1,1,1], padding="SAME")
        act1 = tf.nn.relu(conv1 + b1)
        #drop1 = tf.nn.dropout(act1, dropout)
        max_pool1 = tf.nn.max_pool(act1, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
    
    with tf.name_scope("Conv2") :
        w2 = tf.Variable(tf.truncated_normal([3, 4, 44, 44], stddev=0.01), name="W")
        b2 = tf.Variable(tf.zeros(44), name="B")
        conv2 = tf.nn.conv2d(max_pool1, w2, strides=[1,1,1,1], padding="SAME")
        act2 = tf.nn.relu(conv2+b2)
        #drop2 = tf.nn.dropout(act2, dropout)
        max_pool2 = tf.nn.max_pool(act2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
        
    shape_of_conv2 = max_pool2.get_shape()
    count = int(shape_of_conv2[1] * shape_of_conv2[2] * shape_of_conv2[3])
    reshaped_output = tf.reshape(max_pool2, [-1, count])
    
    with tf.name_scope("FC1") :
        inputFC1_count = count
        outputFC1_count = inputFC1_count // 5
        w3 = tf.Variable(tf.truncated_normal([inputFC1_count, outputFC1_count ], stddev = 0.01))
        b3 = tf.Variable(tf.zeros(outputFC1_count))
        fc1 = tf.add(tf.matmul(reshaped_output, w3), b3)
        fc1 = tf.nn.relu(fc1)
   
    with tf.name_scope("FC2") :
        inputFC2_count = outputFC1_count
        outputFC2_count = inputFC2_count // 4
        w4 = tf.Variable(tf.truncated_normal([inputFC2_count, outputFC2_count], stddev = 0.01))
        b4 = tf.Variable(tf.zeros(outputFC2_count))
        fc2 = tf.add(tf.matmul(fc1, w4), b4)
        fc2 = tf.nn.relu(fc2)
     
        
    with tf.name_scope("FC3") :
        inputFC3_count = outputFC2_count
        outputFC3_count = num_labels
        w5 = tf.Variable(tf.truncated_normal([inputFC3_count, outputFC3_count], stddev = 0.01))
        b5 = tf.Variable(tf.zeros(outputFC3_count))
        fc3 = tf.add(tf.matmul(fc2, w5), b5)
    
    return fc3

In [17]:
def main() :
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.float32, shape=(None, height, width), name="input")
    y = tf.placeholder(tf.int32, shape=(None, num_labels), name="labels")
    
    #dropout = tf.placeholder(tf.float32, name="dropout")
    
    logits = my_model(X)
    
    with tf.name_scope("loss") :
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))
        tf.summary.scalar("loss", loss)
        
    with tf.name_scope("train") :
        optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
        train_step = optimizer.minimize(loss)
        
    with tf.name_scope("accuracy") :
        predicted = tf.argmax(logits, 1)
        truth = tf.argmax(y, 1)
        correct_prediction = tf.equal(predicted, truth)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        confusion_matrix = tf.confusion_matrix(truth, predicted, num_classes = num_labels)
        tf.summary.scalar("accuracy", accuracy)
        
    with tf.Session() as sess :
        summary = tf.summary.merge_all()
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(logdir)
        writer.add_graph(sess.graph)

        print("Понеслось\n")
        X_train, y_train = get_data(train_path)
        X_test, y_test = get_data(test_path)
        number = len(X_train)
        start_time = time.time()
        for i in range(epoch) :
            X_train, y_train = shuffle(X_train, y_train)
            for l in range(0, number, batch_size) :
                end = l + batch_size
                X_batch, y_batch = X_train[l:end], y_train[l:end]
                sess.run(train_step, feed_dict = {X: X_batch, y: y_batch})
                
            num_examples = len(X_test)
            total_accuracy = 0
            for k in range(0, num_examples,batch_size) :
                X_batchT, y_batchT = X_test[k:k+batch_size], y_test[k:k+batch_size]
                res_accuracy = sess.run(accuracy, feed_dict={X: X_batchT, y: y_batchT})
                total_accuracy += (res_accuracy * len(X_batchT))
                if i % (eval_every * 20) == 0 :
                    saver.save(sess, "./smthnn.ckpt")
            print("Epoch:", i, " Accuracy:", total_accuracy / num_examples )
            print(df)

        print("\nTotal training time", time.time()-start_time)

In [18]:
init(train_path)
main()

Понеслось

bed
bed
bed
bed
bird
bird
bird
bird
cat
cat
cat
cat
dog
dog
dog
dog
down
down
down
down
down
down
eight
eight
eight
eight
eight
eight
five
five
five
five
five
five
four
four
four
four
four
four
go
go
go
go
go
go


UnboundLocalError: local variable 'X_batchT' referenced before assignment