In [1]:
import h5py
import numpy as np
import os
import copy

import shutil
import datetime
from tqdm import tqdm
import sys

from utils import *

import tensorflow as tf
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
from tensorflow.python.platform import gfile
from tensorflow.python.ops import io_ops

In [2]:
data_dir = "/workspace/dataset/speech/train/audio"
val_list = os.path.join(data_dir, '../validation_list.txt')
test_list = os.path.join(data_dir, '../testing_list.txt')

BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
desired_samples = 16000

search_path = os.path.join(data_dir, '*', '*.wav')

label_words = ['silence', 'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']

data = {'train':[], 'val':[]} 
labels = {'train':[], 'val':[]}

'''wav processor'''
with tf.name_scope('wav_loader'):
    wav_filename = tf.placeholder(tf.string, [])
    wav_loader = io_ops.read_file(wav_filename)
    wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1, desired_samples=desired_samples)

In [3]:
val_files = []
with open(val_list) as f:
    val_files = f.read().splitlines()

test_files = []
with open(test_list) as f:
    test_files = f.read().splitlines()

In [4]:
dataset_files=gfile.Glob(search_path)
ndata = len(dataset_files)

with tf.Session() as sess:
    with tqdm(total=ndata, file=sys.stdout) as pbar:
        for i in xrange(ndata):
            wav_path = dataset_files[i]
            p, category = os.path.split(os.path.dirname(wav_path))
            category = category.lower()

            # ignore the background noise folder
            if category == BACKGROUND_NOISE_DIR_NAME:
                continue
            
            # find the dataset split membership 
            _, fname = os.path.split(wav_path)
            
            f = os.path.join(category, fname)
            if f in test_files:
                split_cat = 'val'
            else:
                split_cat = 'train'
                
            # update the category list
            if category not in label_words:
                label_words.append(category)
                
            # decode the wav
            wav_data = sess.run(
                        wav_decoder,
                        feed_dict={wav_filename: wav_path})
            
            # assign to the splitted dataset
            label = word2label(category, label_words)
            data[split_cat].append(wav_data.audio)
            labels[split_cat].append(label)
            
            pbar.update(1)
            
print('loading dataset is finished!')  

100%|█████████▉| 64721/64727 [14:16<00:00, 75.53it/s]  
loading dataset is finished!


In [5]:
if not os.path.exists('data'):
    os.makedirs( 'data' )
    
h5f = h5py.File("data/dataset.hdf5", "w")

h5f.create_dataset('X_train', data=data['train'])
h5f.create_dataset('X_val', data=data['val'])
h5f.create_dataset('Y_train', data=labels['train'])
h5f.create_dataset('Y_val', data=labels['val'])

h5f.close()

In [8]:
f = open('data/categories.txt', 'w')
for item in label_words:
    f.write("%s\n" % item)
f.close()

silence
yes
no
up
down
left
right
on
off
stop
go
bed
happy
sheila
three
eight
cat
dog
five
house
four
nine
marvin
six
zero
two
wow
bird
tree
one
seven
