Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
…, #46, #47, and #48 Merge of pull requests #49, #50, and #52. Fixes issues #2, #4, #11, #12, #46, #47, and #48
- Loading branch information
Showing
13 changed files
with
1,221 additions
and
334 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,5 @@ | |
*.pyc | ||
.DS_Store | ||
/logs | ||
/data/ted/TEDLIUM_release2 | ||
/data/ted/TEDLIUM_release2.tar.gz |
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import numpy as np | ||
import scipy.io.wavfile as wav | ||
|
||
from python_speech_features import mfcc | ||
|
||
def audiofile_to_input_vector(audio_filename, numcep, numcontext): | ||
# Load wav files | ||
fs, audio = wav.read(audio_filename) | ||
|
||
# Get mfcc coefficients | ||
orig_inputs = mfcc(audio, samplerate=fs, numcep=numcep) | ||
|
||
# For each time slice of the training set, we need to copy the context this makes | ||
# the numcep dimensions vector into a numcep + 2*numcep*numcontext dimensions | ||
# because of: | ||
# - numcep dimensions for the current mfcc feature set | ||
# - numcontext*numcep dimensions for each of the past and future (x2) mfcc feature set | ||
# => so numcep + 2*numcontext*numcep | ||
train_inputs = np.array([], np.float32) | ||
train_inputs.resize((orig_inputs.shape[0], numcep + 2*numcep*numcontext)) | ||
|
||
# Prepare pre-fix post fix context (TODO: Fill empty_mfcc with MCFF of silence) | ||
empty_mfcc = np.array([]) | ||
empty_mfcc.resize((numcep)) | ||
|
||
# Prepare train_inputs with past and future contexts | ||
time_slices = range(train_inputs.shape[0]) | ||
context_past_min = time_slices[0] + numcontext | ||
context_future_max = time_slices[-1] - numcontext | ||
for time_slice in time_slices: | ||
### Reminder: array[start:stop:step] | ||
### slices from indice |start| up to |stop| (not included), every |step| | ||
# Pick up to numcontext time slices in the past, and complete with empty | ||
# mfcc features | ||
need_empty_past = max(0, (context_past_min - time_slice)) | ||
empty_source_past = list(empty_mfcc for empty_slots in range(need_empty_past)) | ||
data_source_past = orig_inputs[max(0, time_slice - numcontext):time_slice] | ||
assert(len(empty_source_past) + len(data_source_past) == numcontext) | ||
|
||
# Pick up to numcontext time slices in the future, and complete with empty | ||
# mfcc features | ||
need_empty_future = max(0, (time_slice - context_future_max)) | ||
empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future)) | ||
data_source_future = orig_inputs[time_slice + 1:time_slice + numcontext + 1] | ||
assert(len(empty_source_future) + len(data_source_future) == numcontext) | ||
|
||
if need_empty_past: | ||
past = np.concatenate((empty_source_past, data_source_past)) | ||
else: | ||
past = data_source_past | ||
|
||
if need_empty_future: | ||
future = np.concatenate((data_source_future, empty_source_future)) | ||
else: | ||
future = data_source_future | ||
|
||
past = np.reshape(past, numcontext*numcep) | ||
now = orig_inputs[time_slice] | ||
future = np.reshape(future, numcontext*numcep) | ||
|
||
train_inputs[time_slice] = np.concatenate((past, now, future)) | ||
assert(len(train_inputs[time_slice]) == numcep + 2*numcep*numcontext) | ||
|
||
# Whiten inputs (TODO: Should we whiten) | ||
train_inputs = (train_inputs - np.mean(train_inputs))/np.std(train_inputs) | ||
|
||
# Return results | ||
return train_inputs |
This file was deleted.
Oops, something went wrong.
File renamed without changes.
Oops, something went wrong.