In [1]:
import glob
import os
import librosa
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
%matplotlib inline


In [2]:
plt.style.use('ggplot')
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [3]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield int(start), int(start + window_size)
        start += (window_size / 2)

In [4]:
def extract_features(parent_dir,sub_dirs1,sub_dirs2,file_ext="*",bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    labels = []
    for d_name in sub_dirs1:
        #print(d_name,sub_dirs2)
        for l, sub_dir in enumerate(sub_dirs2):
            #print(sub_dir)
            for fn in glob.glob(os.path.join(parent_dir,d_name,sub_dir, file_ext)):
                #print(fn)
                sound_clip,s = librosa.load(fn)
                label = fn.split('/')[3].split('_')[0]
                print(fn)
                for (start,end) in windows(sound_clip,window_size):
                    if(len(sound_clip[start:end]) == window_size):
                        signal = sound_clip[start:end]
                        melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                        logspec = librosa.logamplitude(melspec)
                        logspec = logspec.T.flatten()[:, np.newaxis].T
                        log_specgrams.append(logspec)
                        labels.append(label)

    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features), np.array(labels,dtype = np.int)

In [5]:
def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [6]:
parent_dir = 'New Dataset'
sub_dirs1=["rachit","abhi"]
sub_dirs2 = ["f1"]
features,labels = extract_features(parent_dir,sub_dirs1,sub_dirs2)


New Dataset/rachit/f1/37_rachit_18.wav
New Dataset/rachit/f1/37_rachit_19.wav
New Dataset/rachit/f1/37_rachit_20.wav
New Dataset/rachit/f1/37_rachit_21.wav
New Dataset/rachit/f1/37_rachit_22.wav
New Dataset/rachit/f1/37_rachit_23.wav
New Dataset/rachit/f1/37_rachit_24.wav
New Dataset/rachit/f1/37_rachit_25.wav
New Dataset/rachit/f1/37_rachit_26.wav
New Dataset/rachit/f1/37_rachit_27.wav
New Dataset/rachit/f1/37_rachit_28.wav
New Dataset/rachit/f1/37_rachit_29.wav
New Dataset/rachit/f1/37_rachit_30.wav
New Dataset/rachit/f1/37_rachit_31.wav
New Dataset/rachit/f1/37_rachit_32.wav
New Dataset/rachit/f1/37_rachit_33.wav
New Dataset/rachit/f1/37_rachit_34.wav
New Dataset/rachit/f1/37_rachit_35.wav
New Dataset/rachit/f1/37_rachit_36.wav
New Dataset/rachit/f1/37_rachit_37.wav
New Dataset/rachit/f1/37_rachit_01.wav
New Dataset/rachit/f1/37_rachit_02.wav
New Dataset/rachit/f1/37_rachit_03.wav
New Dataset/rachit/f1/37_rachit_04.wav
New Dataset/rachit/f1/37_rachit_05.wav
New Dataset/rachit/f1/37_

In [7]:
print(features.shape)

(2193, 60, 41, 2)


In [13]:
import pickle
f = open('Features_updated_CNN.pickle', 'wb')
pickle.dump(features_1, f, pickle.HIGHEST_PROTOCOL)
f.close()

MemoryError: 

In [17]:
import h5py
h5f = h5py.File('Updated_CNN_Features.h5', 'w')
h5f.create_dataset('dataset_1', data=features_1)
h5f.close()

In [18]:
import h5py
h5f = h5py.File('Updated_CNN_Lables.h5', 'w')
h5f.create_dataset('dataset_2', data=labels_1)
h5f.close()

In [18]:
import pickle
f = open('Lables_updated_CNN.pickle', 'wb')
pickle.dump(labels_1, f, pickle.HIGHEST_PROTOCOL)
f.close()

NameError: name 'labels_1' is not defined

In [8]:
import pickle
pkl_file = open('Lables_CNN.pickle', 'rb')
labels_1=pickle.load(pkl_file)
pkl_file = open('Features_CNN.pickle', 'rb')
features_1=pickle.load(pkl_file)

In [9]:
print(features_1.shape)
print(features.shape)

(72100, 60, 41, 2)
(2193, 60, 41, 2)


In [10]:
features_1=np.concatenate((features_1,features),axis=0)


In [11]:
print(features_1.shape)

(74293, 60, 41, 2)


In [12]:
labels_1=np.concatenate((labels_1,labels),axis=0)
print(labels_1.shape)

(74293,)


In [None]:
for i,j in enumerate(labels):
    if j==36:
        labels[i]=0

In [None]:
print(features.shape)

In [None]:
labels = one_hot_encode(labels)

In [None]:
print(labels[71500])

In [None]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(1.0, shape = shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x,W,strides=[1,2,2,1], padding='SAME')

def apply_convolution(x,kernel_size,num_channels,depth):
    weights = weight_variable([kernel_size, kernel_size, num_channels, depth])
    biases = bias_variable([depth])
    return tf.nn.relu(tf.add(conv2d(x, weights),biases))

def apply_max_pool(x,kernel_size,stride_size):
    return tf.nn.max_pool(x, ksize=[1, kernel_size, kernel_size, 1], 
                          strides=[1, stride_size, stride_size, 1], padding='SAME')

In [None]:
rnd_indices = np.random.rand(len(labels)) < 0.70
train_x = features[rnd_indices]
train_y = labels[rnd_indices]
test_x = features[~rnd_indices]
test_y = labels[~rnd_indices]

In [None]:
# frames = 41
bands = 60

feature_size = 2460 #60x41
num_labels = 36
num_channels = 2

batch_size = 1000
kernel_size = 30
depth = 20
num_hidden = 200

learning_rate = 0.01
training_iterations = 1000

In [None]:
X = tf.placeholder(tf.float32, shape=[None,bands,frames,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])

cov = apply_convolution(X,kernel_size,num_channels,depth)

shape = cov.get_shape().as_list()
cov_flat = tf.reshape(cov, [-1, shape[1] * shape[2] * shape[3]])

f_weights = weight_variable([shape[1] * shape[2] * depth, num_hidden])
f_biases = bias_variable([num_hidden])
f = tf.nn.sigmoid(tf.add(tf.matmul(cov_flat, f_weights),f_biases))

out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.matmul(f, out_weights) + out_biases)

In [None]:
print(test_x.shape)
print(test_y.shape)

In [None]:
cross_entropy = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
cost_history = np.empty(shape=[1],dtype=float)
session=tf.Session()
init = tf.global_variables_initializer()
session.run(init)
for itr in range(training_iterations):    
    print(itr,end=" ")
    offset = (itr * batch_size) % (train_y.shape[0] - batch_size)
    batch_x = train_x[offset:(offset + batch_size), :, :, :]
    batch_y = train_y[offset:(offset + batch_size), :]

    _, c = session.run([optimizer, cross_entropy],feed_dict={X: batch_x, Y : batch_y})
    cost_history = np.append(cost_history,c)



In [None]:
for itr in range(training_iterations):    
    print(itr,end=" ")
    offset = (itr * batch_size) % (train_y.shape[0] - batch_size)
    batch_x = train_x[offset:(offset + batch_size), :, :, :]
    batch_y = train_y[offset:(offset + batch_size), :]

    _, c = session.run([optimizer, cross_entropy],feed_dict={X: batch_x, Y : batch_y})
 

In [None]:
test_size=test_x.shape[0]
print(test_size)
print('Test accuracy: ',round(session.run(accuracy, feed_dict={X: test_x[:100], Y: test_y[:100]}) , 3))    # s.run() always returns a tuple
    

In [None]:
#print('Test accuracy: ',round(session.run(accuracy, feed_dict={X: test_x, Y: test_y}) , 3))

n=(int)(test_size/batch_size)
for step in range((int)(test_size / batch_size)):
    offset = step * batch_size
    batch_data = test_x[offset:(offset + batch_size), :, :, :]
    batch_labels = test_y[offset:(offset + batch_size)]
    #print(batch_data,batch_labels)
    #feed_dict = { X:batch_data,Y:batch_size}
    print('Test accuracy: ',round(session.run(accuracy, feed_dict={X: batch_data, Y: batch_labels}) , 3))    # s.run() always returns a tuple
    #(predictions,) = s.run([test_prediction],feed_dict=feed_dict)
    #correct += numpy.sum(numpy.argmax(predictions, 1) == numpy.argmax(batch_labels, 1))
    #test_error = 100.0 - (100.0 * (correct / float(test_size)))
    # Finally print the result!
    #test_error = error_rate(test_prediction.eval(), test_labels)
    #print('Test error: %.1f%%' %test_error)
     #if FLAGS.self_test:
        #print('test_error', test_error)


In [None]:
fig = plt.figure(figsize=(15,10))
plt.plot(cost_history)
plt.axis([0,training_iterations,0,np.max(cost_history)])
plt.show()