### Importing libraries

In [1]:
import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt
import glob 
from scipy.io.wavfile import read
import IPython.display as ipd
import librosa
import librosa.display
import librosa.feature
import pickle

### Creating Dataset - First 11 classes excluding silence

In [None]:
# Collecting all subdirectories
folders = sorted(glob.glob('./train/audio/*/')) 

# Input X, Output Y, and labels for each class
Y= []
X = []
labels_map = []

#Words required for the competition 
reqd_words = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']

#Creating labels for each of the 10 required classes 
label = 0 

#Words not in the required list are labelled as class 10 : unknown 
unknown_class = 10

#not including background noise data 
for word_folder in folders[1:]: 
    
    #Extracting the word from the folder names
    cleaned = word_folder.replace("./train/audio\\", "")
    folder_name_cleaned = cleaned.replace("\\", "") 
    
    #Collecting all files in a folder
    files = sorted(glob.glob(str(word_folder)+"/*.wav")) 
    
    #For each file, extracting bits and sampling rate using librosa and storing bits in X 
    for file in files: 
        sr, bits = read(file)
        
        #If the number of bits is less than 16000, the array is padded with zeros
        if len(bits)!=16000:
            b_p = np.lib.pad(bits, (0,16000-len(bits)), 'constant', constant_values=(0, 0))
            X.append(b_p)
        else: 
            X.append(np.array(bits))
    
    #Labelling from 0-9 only those words that are required
    if str(folder_name_cleaned) in reqd_words:
        
        Y.extend([label for f in range(0,len(files))])
        labels_map.append((folder_name_cleaned,label))
        label+=1
        
    #If the word is not required, it is labelled as class 10: unknown   
    else: 
        Y.extend([unknown_class for f in range(0,len(files))])
        labels_map.append((folder_name_cleaned,unknown_class))
    
    #Keeping track of the process
    print('Collected '+str(folder_name_cleaned)+' Folder')
    
np.save('labels_map.npy',labels_map)

### Shape of X, Y and label analysis

In [None]:
X = np.array(X)
Y = np.array(Y)
Y = Y.reshape(Y.shape[0],1)
print('Shape of X: ',X.shape)
print('Shape of Y: ',Y.shape)
print('Number of samples: ',X.shape[0])
print('Number of bits in each sample: ',X.shape[1])
print('\nWords and their corresponding numerical labels:\n ')
for tup in labels_map: 
    print(tup)

### Creating 12000 1-second samples for noise

In [25]:
folders = sorted(glob.glob('./train/audio/*/')) 

files_noise = sorted(glob.glob(str(folders[0])+"/*.wav")) 
X_noise = []
Y_noise = []

for noise_file in files_noise:
    sr, y = read(noise_file)
    random_start = np.random.permutation(150000)

    for start in random_start[0:5000]:
        if len(y[start:start+16000])!=16000: 
            b_p = np.lib.pad(y, (0,16000-len(y[start:start+16000])), 'constant', constant_values=(0, 0))
            X_noise.append(b_p)
            Y_noise.append(11)
        else:
            X_noise.append(y[start:start+16000])
            Y_noise.append(11)

X_noise = np.array(X_noise)
Y_noise = np.array(Y_noise)
Y_noise = Y_noise.reshape(Y_noise.shape[0],1)
print('Shape of noise batch input: ',X_noise.shape)
print('Shape of noise batch output: ',Y_noise.shape)

Shape of noise batch input:  (5000, 16000)
Shape of noise batch output:  (5000, 1)




In [35]:
ipd.Audio(X_noise[2000],rate=16000)
with open('intermediate_pickles/X_silence.pkl', 'wb') as x:
    pickle.dump(X_noise, x)
with open('intermediate_pickles/Y_silence.pkl', 'wb') as y:
    pickle.dump(Y_noise, y)

### Shuffling the noise samples

In [None]:
shuffle_noise = np.random.permutation(X_noise.shape[0])

X_n = X_noise[shuffle_noise,:]
Y_n = Y_noise[shuffle_noise,:]

### Saving files

In [None]:
with open('intermediate_pickles/X_noise_shuffled.pkl', 'wb') as xns:
    pickle.dump(X_n, xns)
with open('intermediate_pickles/Y_noise_shuffled.pkl', 'wb') as yns:
    pickle.dump(Y_n, yns)
with open('intermediate_pickles/X.pkl', 'wb') as x:
    pickle.dump(X, x)
with open('intermediate_pickles/Y.pkl', 'wb') as y:
    pickle.dump(Y, y)

### Reloading saved pickle files

In [4]:
X = pickle.load( open( "intermediate_pickles/X.pkl", "rb" ) )
Y = pickle.load( open( "intermediate_pickles/Y.pkl", "rb" ) )
X_n = pickle.load( open( "intermediate_pickles/X_noise_shuffled.pkl", "rb" ) )
Y_n = pickle.load( open( "intermediate_pickles/Y_noise_shuffled.pkl", "rb" ) )

print('Shape of X: ',X.shape)
print('Shape of Y: ',Y.shape)
print('Shape of noise batch input: ',X_n.shape)
print('Shape of noise batch output: ',Y_n.shape)

Shape of X:  (64721, 16000)
Shape of Y:  (64721, 1)


In [2]:
#Extra 5000 noise samples as silence class

X_n = pickle.load( open( "intermediate_pickles/X_noise_shuffled.pkl", "rb" ) )
Y_n = pickle.load( open( "intermediate_pickles/Y_noise_shuffled.pkl", "rb" ) )

rand = np.random.permutation(X_n.shape[0])
X_silence_noised = X_n[rand[0:5000]]
Y_silence_noised = Y_n[rand[0:5000]]
np.save('data/X_silence_noised.npy',X_silence_noised)
np.save('data/Y_silence_noised.npy',Y_silence_noised)

ipd.Audio(X_n[0],rate=16000)