## Agenda
[Configure](#Configure)  
[Training_data](#Training_data)  
[Test_data](#Test_data)

In [1]:
import os
os.chdir("/home/maikfangogoair/tensorflow/tensorflow/examples/speech_commands")

In [55]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os.path
import sys

import numpy as np
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf

import input_data
import models
from tensorflow.python.platform import gfile

import re
import io

## Configure
[back to top](#Agenda)

In [3]:
wanted_words="yes,no,up,down,left,right,on,off,stop,go"
sample_rate=16000#Expected sample rate of the wavs
clip_duration_ms=1000#Expected duration in milliseconds of the wavs
window_size_ms=30.0#How long each spectrogram timeslice is
window_stride_ms=10.0#How long each spectrogram timeslice is
dct_coefficient_count=40#How many bins to use for the MFCC fingerprint
data_url="http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz"
data_dir="/home/maikfangogoair/tmp/label_data"
silence_percentage=10.0 #How much of the training data should be silence.
unknown_percentage=10.0 #How much of the training data should be unknown words.
validation_percentage=10 #What percentage of wavs to use as a validation set.
testing_percentage=10 #What percentage of wavs to use as a test set.
time_shift_ms=100.0 #Range to randomly shift the training audio by in time.
batch_size=100
background_frequency=0.8#How many of the training samples have background noise mixed in.
background_volume=0.1#How loud the background noise should be, between 0 and 1

## Training_data
[back to top](#Agenda)

In [4]:
model_settings = models.prepare_model_settings(
    len(input_data.prepare_words_list(wanted_words.split(','))),
    sample_rate, clip_duration_ms, window_size_ms,
    window_stride_ms, dct_coefficient_count)
audio_processor = input_data.AudioProcessor(
    data_url, data_dir, silence_percentage,
    unknown_percentage,
    wanted_words.split(','), validation_percentage,
    testing_percentage, model_settings)
fingerprint_size = model_settings['fingerprint_size']
label_count = model_settings['label_count']
time_shift_samples = int((time_shift_ms * sample_rate) / 1000)

40 98


In [5]:
print(fingerprint_size)
print(label_count)
print(time_shift_samples)
print(model_settings)
print(audio_processor.set_size("training"))
print(audio_processor.set_size("validation"))
print(audio_processor.set_size("testing"))

3920
12
1600
{'desired_samples': 16000, 'fingerprint_size': 3920, 'dct_coefficient_count': 40, 'sample_rate': 16000, 'window_size_samples': 480, 'label_count': 12, 'window_stride_samples': 160, 'spectrogram_length': 98}
22246
3093
3081


In [6]:
sess = tf.InteractiveSession()

In [7]:
train_fingerprints, train_ground_truth = audio_processor.get_data(
    -1, 0, model_settings, background_frequency,
    background_volume, time_shift_samples, 'training', sess)
validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(-1, 0, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
test_fingerprints, test_ground_truth = audio_processor.get_data(
    -1, 0, model_settings, 0.0, 0.0, 0, 'testing', sess)

In [8]:
print(train_fingerprints.shape[0])
print(validation_fingerprints.shape[0])
print(test_fingerprints.shape[0])

22246
3093
3081


In [9]:
dir="/home/maikfangogoair/tmp/save/"
np.save(dir+"train_fingerprints.npy",train_fingerprints)
np.save(dir+"train_ground_truth.npy",train_ground_truth)
np.save(dir+"validation_fingerprints.npy",validation_fingerprints)
np.save(dir+"validation_ground_truth.npy",validation_ground_truth)
np.save(dir+"test_fingerprints.npy",test_fingerprints)
np.save(dir+"test_ground_truth.npy",test_ground_truth)

In [10]:
print(audio_processor.words_list)
print(audio_processor.word_to_index)
print(len(audio_processor.background_data))

['_silence_', '_unknown_', 'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']
{'house': 1, 'tree': 1, 'three': 1, 'go': 11, 'wow': 1, 'cat': 1, 'zero': 1, 'eight': 1, 'nine': 1, 'two': 1, 'dog': 1, 'down': 5, 'happy': 1, 'on': 8, 'four': 1, 'left': 6, 'up': 4, 'off': 9, 'no': 3, 'seven': 1, 'stop': 10, 'yes': 2, 'six': 1, 'bed': 1, 'right': 7, 'one': 1, 'sheila': 1, 'five': 1, '_silence_': 0, 'bird': 1, 'marvin': 1}
6


In [11]:
print(len(audio_processor.data_index['training']))
print(train_ground_truth.shape)

22246
(22246,)


In [12]:
print([x['label']for x in audio_processor.data_index['training'][:20]])
print(train_ground_truth[:20])
print([audio_processor.words_list[int(x)] for x in train_ground_truth[:20]])

['bird', 'no', 'no', 'go', 'no', '_silence_', 'down', 'happy', 'right', 'no', 'wow', 'on', 'right', 'off', 'on', 'go', 'go', '_silence_', 'no', 'no']
[  1.   3.   3.  11.   3.   0.   5.   1.   7.   3.   1.   8.   7.   9.   8.
  11.  11.   0.   3.   3.]
['_unknown_', 'no', 'no', 'go', 'no', '_silence_', 'down', '_unknown_', 'right', 'no', '_unknown_', 'on', 'right', 'off', 'on', 'go', 'go', '_silence_', 'no', 'no']


## Test_data
[back to top](#Agenda)

In [15]:
test_wav = os.listdir('/home/maikfangogoair/test/audio/')
directory = '/home/maikfangogoair/test/audio/'
audio_processor.data_index['real_test'] = []
for file_name in test_wav:
    line = {'file': directory + file_name, 'label': 'no'}
    audio_processor.data_index['real_test'].append(line)

In [16]:
len(audio_processor.data_index['real_test'])

158538

In [17]:
real_test_fingerprints, real_test_ground_truth = audio_processor.get_data(
    -1, 0, model_settings, 0.0, 0.0, 0, 'real_test', sess)

In [21]:
size = real_test_fingerprints.shape[0]

158538

In [59]:
#purge
for x in [x for x in os.listdir(dir) if len(re.findall('real_test_fingerprints_.+.npy', x))>0]:
    print("removing %s" % (dir+x))
    os.remove(dir+x)
for x in [x for x in os.listdir(dir) if len(re.findall('real_test_ground_truth_.+.npy', x))>0]:
    print("removing %s" % (dir+x))
    os.remove(dir+x)
for x in [x for x in os.listdir(dir) if len(re.findall('real_test_file_.+.txt', x))>0]:
    print("removing %s" % (dir+x))
    os.remove(dir+x)

removing /home/maikfangogoair/tmp/save/real_test_fingerprints_0.npy
removing /home/maikfangogoair/tmp/save/real_test_fingerprints_47559.npy
removing /home/maikfangogoair/tmp/save/real_test_fingerprints_31706.npy
removing /home/maikfangogoair/tmp/save/real_test_fingerprints_15853.npy
removing /home/maikfangogoair/tmp/save/real_test_fingerprints_63412.npy
removing /home/maikfangogoair/tmp/save/real_test_fingerprints_79265.npy
removing /home/maikfangogoair/tmp/save/real_test_ground_truth_47559.npy
removing /home/maikfangogoair/tmp/save/real_test_ground_truth_63412.npy
removing /home/maikfangogoair/tmp/save/real_test_ground_truth_31706.npy
removing /home/maikfangogoair/tmp/save/real_test_ground_truth_0.npy
removing /home/maikfangogoair/tmp/save/real_test_ground_truth_15853.npy
removing /home/maikfangogoair/tmp/save/real_test_file_47559.txt
removing /home/maikfangogoair/tmp/save/real_test_file_15853.txt
removing /home/maikfangogoair/tmp/save/real_test_file_0.txt
removing /home/maikfangogoai

In [60]:
steps = 10 
forward = size // steps
start = 0
for i in range(0, size, forward):
    end = min(i+forward, size)
    print("saving from %f to %f" % (i, end) )
    np.save(dir+"real_test_fingerprints_"+ str(i) +".npy",real_test_fingerprints[i:end])
    np.save(dir+"real_test_ground_truth_"+ str(i) +".npy",real_test_ground_truth[i:end])
    with io.open(dir+"real_test_file_"+ str(i) +".txt",'w') as f:
        for x in audio_processor.data_index['real_test'][i:end]:
            f.write(x["file"].split("/")[-1]+'\n')

saving from 0.000000 to 15853.000000
saving from 15853.000000 to 31706.000000
saving from 31706.000000 to 47559.000000
saving from 47559.000000 to 63412.000000
saving from 63412.000000 to 79265.000000
saving from 79265.000000 to 95118.000000
saving from 95118.000000 to 110971.000000
saving from 110971.000000 to 126824.000000
saving from 126824.000000 to 142677.000000
saving from 142677.000000 to 158530.000000
saving from 158530.000000 to 158538.000000
