In [1]:
import os
# use CPU or GPU
os.environ['KERAS_BACKEND'] = 'theano'
#os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['THEANO_FLAGS'] = 'device=cuda0'

In [2]:
import keras
import numpy as np

Using Theano backend.
Using cuDNN version 5110 on context None
Mapped name None to device cuda0: GeForce GTX 1080 Ti (0000:02:00.0)


### Data and weight loaders

In [3]:
from glob import glob
from random import Random
import json

rng = Random(42)

In [4]:
import kenlm
import beamsearch
reload(beamsearch)
from utils import argmax_decode, word_error_rate, for_tf_or_th
from beamsearch import beam_decode, beam_decode_u

lm = kenlm.Model('data/lm/lm.binary')

def iterate_weights(model_path):
    """Iterate over saved model weights"""
    for model_weight in glob(os.path.join(model_path, '') + '*.h5'):
        yield model_weight

def pick_sample_files(desc_file, count, min_duration, max_duration):
    metadata = []
    with open(desc_file) as f:
        for line in f:
            metadata.append(json.loads(line))
    legitimates = [ sample for sample in metadata if min_duration <= sample['duration'] <= max_duration ]
    rng.shuffle(legitimates)
    return legitimates[:count]

def test_generator(datagen, test_samples, batch_size=64, normalize=True):
    global in_
    texts = [s['text'] for s in test_samples]
    durations = [s['duration'] for s in test_samples]
    paths = [s['key'] for s in test_samples]
    features = [datagen.featurize(p) for p in paths]
    if normalize:
        features = [datagen.normalize(f) for f in features]

    for i in range( np.ceil(len(features) / float(batch_size)).astype(int) ):
        batch_durations = durations[i*batch_size: (i+1)*batch_size]
        batch_features = features[i*batch_size: (i+1)*batch_size]
        batch_texts = texts[i*batch_size: (i+1)*batch_size]
        batch_paths = paths[i*batch_size: (i+1)*batch_size]
        max_length = max([f.shape[0] for f in batch_features])
        batch_array = np.zeros((len(batch_features), max_length, features[0].shape[1]), dtype='float32')
        for fi in range(len(batch_features)):
            batch_array[fi, :batch_features[fi].shape[0], :] = batch_features[fi]
        yield {'x': batch_array, 'y': batch_texts, 'path': batch_paths, 'duration': batch_durations}

def best_lm_alternative(true_sentence, wer, predictions, verbose=False):
    """ predictions is a list of tuples which first denote sentence and next is It's probablity
    """
    best, best_score = None, np.finfo('float32').min
    for s, p in predictions:
        lm_score = lm.score(s)
        if lm_score > best_score:
            best, best_score = s, lm_score
    if best == predictions[0][0]:
        if verbose:
            print "langauge model didn't change prediction"
        best_wer = wer
    else:
        best_wer = word_error_rate([true_sentence], [best], decoded=True)[0]
        if verbose:
            print "langauge model changed prediction, WER changed from {old_wer} to {new_wer}".format(
                old_wer = wer, new_wer = best_wer
            )
    return best, best_wer

def evaluate(batch_generator, output_fn, learning_phase=False, use_lm=False, beam_width=12):
    all_nolm_wers, all_lm_wers = [], []
    for batch in batch_generator:
        net_out = output_fn([batch['x'], learning_phase])[0]
        mtp_net_out = for_tf_or_th(net_out, net_out.swapaxes(0, 1))
        pred_texts = [argmax_decode(o) for o in mtp_net_out]
        nolm_wers = word_error_rate(batch['y'], pred_texts, True)
        all_nolm_wers.append(nolm_wers)
        
        if use_lm:
            alt_beam_preds = lambda i: zip(*beam_decode_u(mtp_net_out[i, :, :], beam_width, normalize=True))
            pred_texts, lm_wers = zip(*[best_lm_alternative(batch['y'][i], nolm_wers[i], alt_beam_preds(i))
                                      for i in range(mtp_net_out.shape[0])])
            all_lm_wers.append(np.array(lm_wers))
            all_wers = all_lm_wers
        else:
            all_wers = all_nolm_wers
        
        for i, y in enumerate(batch['y']):
            print 'r:{}\np:{}\n{}: WER: {}, DURATION: {}, PATH: {}'.format(y, pred_texts[i], i, all_wers[-1][i], batch['duration'][i], batch['path'][i])
        print 'batch mean WER: {}'.format(all_wers[-1].mean())
    if use_lm:
        print 'LM WER: {} No LM WER: {}'.format(np.concatenate(all_lm_wers).mean(), np.concatenate(all_nolm_wers).mean())
    else:
        'whole mean WER: {}'.format(np.concatenate(all_wers).mean())
    return mtp_net_out, pred_texts, all_wers, batch['y']

### Customize data generator

In [16]:
test_desc = '/home/reith/deepspeech/ba-dls-deepspeech/descs/test-clean.json'
#test_desc = '/home/reith/deepspeech/ba-dls-deepspeech/descs/test-other.json'
#test_desc = '/home/reith/deepspeech/ba-dls-deepspeech/descs/dev-clean.json'

In [6]:
from data_generator import DataGenerator
datagen = DataGenerator()

In [18]:
test_samples = pick_sample_files(test_desc, 1024, 0, 30)

Normalize by input data

In [8]:
train_desc = '/home/reith/deepspeech/ba-dls-deepspeech/descs/train-clean-360.json'
datagen.load_train_data(train_desc, 15)
datagen.fit_train(100)

Or load them

In [17]:
datagen.reload_norm('860-1000')

### Load model

#### Theano mode

Load and test weights of a half-phoneme model

In [10]:
#model_dir = '/home/reith/deepspeech/ba-dls-deepspeech/models/22-cont-23-i9696-lr1e-4-train-360-dur15/'
#model_dir = '/home/reith/deepspeech/ba-dls-deepspeech/models/23-cont-i2494-joingrus-dur15-nobn-lr5e-5/'
model_dir = '/home/reith/deepspeech/ba-dls-deepspeech/models/24-cont-train-860'

A summary of training procedure:
- 7 Epochs of dual phoneme-text on train-100 (20)
- 3 Epochs on train-500 for phoenme fine-tuning (21)
- 3 Epochs on train-500 for text fine-tuning (22)
- 2 Epochs on train-360 (23)
- 2 Epochs on train-360 dropping phoneme branch and and batch normalization (24)

make half phoneme model 

In [10]:
from model_wrp import HalfPhonemeModelWrapper
model_wrp = HalfPhonemeModelWrapper()
model = model_wrp.compile(nodes=1000, conv_context=5, recur_layers=5)
output_fn = model_wrp.compile_output_fn()

  self.model = Model(input=acoustic_input, output=[phoneme_out, text_out])


or gru model

In [11]:
from model_wrp import GruModelWrapper
model_wrp = GruModelWrapper()
model = model_wrp.compile(nodes=1000, conv_context=5, recur_layers=5, batch_norm=False)
output_fn = model_wrp.compile_output_fn()

  activation=for_tf_or_th('softmax', 'linear')
  self.model = Model(input=acoustic_input, output=[network_output])


In [14]:
# model.load_weights(os.path.join(model_dir, 'best-val-weights.h5'))
model.load_weights(os.path.join(model_dir, 'model_19336_weights.h5'))

#### Tensorflow model

A summary of training procedure:
- 3 Epochs of dual phoneme-text on train-100 by dropout of 0.3 and leaky relu factor of 0.05 (40)
- 5 Epochs on train-100 for phoenme fine-tuning (41)
- 5 Epochs on train-100 for text fine-tuning (42)
- 5 Epochs on train-360 (43)
- 5 Epochs on train-860 dropping phoneme branch and and batch normalization and reduced dropout to 0.1 (44)
- 20 Epochs on train-860 reduced learning rate down to 5e-5 and for samples up to 20 seconds long (45)

In [9]:
model_dir = '/home/reith/deepspeech/ba-dls-deepspeech/models/44-cont-45-i14490-dur20-lr5e-5'

In [10]:
from model_wrp import GruModelWrapper
model_wrp = GruModelWrapper()
model = model_wrp.compile(nodes=1000, conv_context=5, recur_layers=5, dropout=.1, lirelu_alpha=.05, batch_norm=False)
output_fn = model_wrp.compile_output_fn()

  activation=for_tf_or_th('softmax', 'linear')
  self.model = Model(input=acoustic_input, output=[network_output])


In [11]:
model.load_weights(os.path.join(model_dir, 'best-val-weights.h5'))

In [None]:
model.summary()

### Evaluate model

In [19]:
res = evaluate(test_generator(datagen, test_samples, normalize=True), output_fn, use_lm=False)

r:the condition is that i will be permitted to make luther talk american streamline him so to speak because you will never get people whether in or outside the lutheran church actually to read luther unless we make him talk as he would talk today to americans
p:the condition is that i will be permitted to make lutratalk american stream line him certof speak because you will never get people whether ann aroutside the liter in church actually to read luther an must be mak in talk as he would talk to day to americans
0: WER: 0.0778210116732, DURATION: 16.1250625, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3979/2830-3979-0001.wav
r:we are to hear christ who has been appointed by the father as our divine teacher
p:we are to hear crist to his benapranted by the father as our divine teacher
1: WER: 0.0838709677419, DURATION: 4.88, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3980/2830-3980-0052.wav
r:in the communities of the western culture this point is at present found among the lowe

r:they knew no north no south no east no west they stood positively by the constitution and would have nothing to do in the bloody strife between brothers unless indeed they were summoned by the authority to which they had already once loyally responded to furnish men and arms for their country's need
p:they knew no north no south no east now esk they saod positively by the constitution and whan have nothing to do in the bloody strife between brothers inless in deed theye were summoned by the authority too hich they had already once loedly responded to furnish men in arms for they countries need
0: WER: 0.0653266331658, DURATION: 17.84, PATH: /mnt/ml-data/LibriSpeech/test-clean/4077/13754/4077-13754-0005.wav
r:as the ambassador of a government is honored for his office and not for his private person so the minister of christ should exalt his office in order to gain authority among men
p:as the ambassador of a government is honored for his office and not for his private person so the mi

r:it is hardly necessary to say more of them here
p:it is hardly necessary to say more of them here
0: WER: 0.0, DURATION: 3.545, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/287645/8463-287645-0001.wav
r:there were a few rose bushes however and a number of apple trees probably the descendants of those planted by the reverend mister blackstone the first settler of the peninsula that half mythological personage who rides through our early annals seated on the back of a bull
p:there were a few rose bushes however and a number of aple trees probably the descendants of those planted by the reverent o mister blackstone the first settleer of the pinnansula that half mithalogical personage who rides through our early annals seated on the back of a ball
1: WER: 0.0310786106033, DURATION: 16.27, PATH: /mnt/ml-data/LibriSpeech/test-clean/1221/135767/1221-135767-0023.wav
r:edison held that the electricity sold must be measured just like gas or water and he proceeded to develop a meter
p:ever si

r:independent of her fortune she has beauty to captivate the heart of any man and with all her follies she has a frankness in her manner an unaffected wisdom in her thoughts a vivacity in her conversation and withal a softness in her demeanour that might alone engage the affections of a man of the nicest sentiments and the strongest understanding
p:in the pen en of her fortune she has beauty to captivate the heart of any man and with all or follies she has a frankness in her manner and unaffected wisdom in her fhoughts of a vacity an her conversation and withall a softness in her demeanour that might alone angage the affections of a man of the nicest sentiments and the strongest understanding
0: WER: 0.0373563218391, DURATION: 21.735, PATH: /mnt/ml-data/LibriSpeech/test-clean/4992/23283/4992-23283-0012.wav
r:know then son of my heart that this fainting lady is your real bride i say real because she is the one whom your father and i have chosen for you and the portrait was a pretence
p:

r:in autumn the wood cutters always came and felled some of the largest trees
p:in autumn the wood cotters always came and felled some of te largest trees
0: WER: 0.0201342281879, DURATION: 6.42, PATH: /mnt/ml-data/LibriSpeech/test-clean/672/122797/672-122797-0007.wav
r:when she finished alexander shook himself out of a reverie
p:when she finished alexander shook himself out of a reverye
1: WER: 0.0172413793103, DURATION: 3.76, PATH: /mnt/ml-data/LibriSpeech/test-clean/4446/2273/4446-2273-0027.wav
r:yet these thoughts affected hester prynne less with hope than apprehension
p:yet these thoughts affected hester prim less with hope than apprehension
2: WER: 0.041095890411, DURATION: 4.825, PATH: /mnt/ml-data/LibriSpeech/test-clean/1221/135766/1221-135766-0002.wav
r:cried the young ladies and they quickly put out the fire
p:cried the young ladies and they quickly put out the fire 
3: WER: 0.00884955752212, DURATION: 4.0, PATH: /mnt/ml-data/LibriSpeech/test-clean/672/122797/672-122797-0032.

r:i don't anticipate
p:i don't anticipate 
0: WER: 0.027027027027, DURATION: 2.175, PATH: /mnt/ml-data/LibriSpeech/test-clean/121/127105/121-127105-0030.wav
r:not only this but on the table i found a small ball of black dough or clay with specks of something which looks like sawdust in it
p:not only this but on the table i found a small ball of black door wore clay with specks of something which looks like sawdustint 
1: WER: 0.0579150579151, DURATION: 7.065, PATH: /mnt/ml-data/LibriSpeech/test-clean/1580/141083/1580-141083-0012.wav
r:he worked me very hard he wanted to be beating me all the time
p:he worked me very hard he wanted to be beating me all the time
2: WER: 0.0, DURATION: 4.325, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/287645/8463-287645-0010.wav
r:only for a minute or so
p:only for a minute or so 
3: WER: 0.0212765957447, DURATION: 1.98, PATH: /mnt/ml-data/LibriSpeech/test-clean/1580/141083/1580-141083-0051.wav
r:ain't they the greatest
p:ain't they the greates
4: WER

r:her hair is still like flax and her blue eyes are just like a baby's and she has the same three freckles on her little nose and talks about going back to her bains de mer
p:her hair est still like flax and er blue eyes or just like a babies and she has the same three freckles on her little knose and talks about going back to her band a maire
0: WER: 0.0647058823529, DURATION: 9.645, PATH: /mnt/ml-data/LibriSpeech/test-clean/4446/2273/4446-2273-0016.wav
r:asked phronsie in intense interest slipping down out of polly's arms and crowding up close to jasper's side
p:asked pronzie and entense interests lipping down out of polly's arms and crowding up close o jasper side
1: WER: 0.0710900473934, DURATION: 6.79, PATH: /mnt/ml-data/LibriSpeech/test-clean/237/126133/237-126133-0014.wav
r:mainhall liked alexander because he was an engineer
p:main holl like alexander because he was an ungininer
2: WER: 0.0873786407767, DURATION: 3.495, PATH: /mnt/ml-data/LibriSpeech/test-clean/4446/2271/4446-22

r:mornin girls hope ye feel as well as ye look
p:marning girls ol pe vill as well is e look
0: WER: 0.209302325581, DURATION: 3.275, PATH: /mnt/ml-data/LibriSpeech/test-clean/8555/284447/8555-284447-0009.wav
r:though thrown into prison for this enterprise and detained some time he was not discouraged but still continued by his countenance and protection to infuse spirit into the distressed royalists
p:thoh thrown into prison for this enterprise and detained some time he was not discouraged but still continued by his countence and protection to enfuse spirit into the distressed rioalists
1: WER: 0.0263157894737, DURATION: 17.74, PATH: /mnt/ml-data/LibriSpeech/test-clean/8224/274381/8224-274381-0000.wav
r:constantine easily believed that the heretics who presumed to dispute his opinions or to oppose his commands were guilty of the most absurd and criminal obstinacy and that a seasonable application of moderate severities might save those unhappy men from the danger of an everlasting cond

r:their diving stone poised on its rude supports and rocking under their plunges and the rough hewn stones of the sloping breakwater over which they scrambled in their horseplay gleamed with cold wet lustre
p:ther diving stone posed on its rood upports and rocking under their plunges thand the rough hum stones of the sloping breake water over which they scrambled in their horse play gleamed with cold wet luster
0: WER: 0.0562347188264, DURATION: 13.37, PATH: /mnt/ml-data/LibriSpeech/test-clean/1089/134691/1089-134691-0021.wav
r:all the morning they trudged up the mountain path and at noon unc and ojo sat on a fallen tree trunk and ate the last of the bread which the old munchkin had placed in his pocket
p:all the morning they trudged up the mountain path and at noon uncan ojo sat on a fallen tree trunk and ete the last of the bread which the old munchkuin had placed in his pocket
1: WER: 0.0140845070423, DURATION: 10.49, PATH: /mnt/ml-data/LibriSpeech/test-clean/1284/1180/1284-1180-000

r:he stood a moment bewildered then turned and rushed upon the island a great sheet of dazzling sunlight swept the place and beneath lay a mighty mass of olive green thick tall wet and willowy
p:he stood a moment be woldered then turned and rushed upon the island a great sheet of dazzling sunlight swept the place and beneath lay a mighty mass of olive green they tall wet and willaway
0: WER: 0.0288713910761, DURATION: 12.46, PATH: /mnt/ml-data/LibriSpeech/test-clean/1995/1837/1995-1837-0014.wav
r:now that too is over
p:now that two is over
1: WER: 0.05, DURATION: 3.03, PATH: /mnt/ml-data/LibriSpeech/test-clean/672/122797/672-122797-0065.wav
r:why are we to be divided
p:why are wed to be divided
2: WER: 0.0204081632653, DURATION: 2.11, PATH: /mnt/ml-data/LibriSpeech/test-clean/3575/170457/3575-170457-0002.wav
r:for in the times before the great flood athens was the greatest and best of cities and did the noblest deeds and had the best constitution of any under the face of heaven
p:four 

r:the earth is not devoid of resemblance to a jail
p:the earth is not devoid of hisemblance to a chail
0: WER: 0.0721649484536, DURATION: 3.99, PATH: /mnt/ml-data/LibriSpeech/test-clean/4507/16021/4507-16021-0043.wav
r:in person welcome aboard professor your cabin is waiting for you
p:in person walkaboid forfessor your cabin is waiting for you
1: WER: 0.138211382114, DURATION: 4.395, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/294828/8463-294828-0032.wav
r:the meter continued in general service during eighteen ninety nine and probably up to the close of the century
p:the meter continued and general service during eighteen ninety nine and probably up to the close of the century
2: WER: 0.0135746606335, DURATION: 7.085, PATH: /mnt/ml-data/LibriSpeech/test-clean/2300/131720/2300-131720-0036.wav
r:paul answers the man who is named jesus christ and the son of god gave himself for our sins
p:paulionsers the man who is named jesus crist and the son of god gave himself for her since
3: WER:

r:when a private in the eighth cavalry he had been on the point of quitting the army at twenty eight years of age but unexpectedly he had been appointed orderly to captain servadac
p:when a private in the egpth cavalry he had been on the point of quitting the army at twenty eight years of age but unexpectedly he had been appointed orderly to captain servadac
0: WER: 0.00845070422535, DURATION: 10.73, PATH: /mnt/ml-data/LibriSpeech/test-clean/5105/28233/5105-28233-0008.wav
r:what could he do he caught up everything which would betray him and he rushed into your bedroom to conceal himself
p:what could he do he caught up everything which would betray him and he rushed into your bedroom to conceal himself 
1: WER: 0.00436681222707, DURATION: 5.73, PATH: /mnt/ml-data/LibriSpeech/test-clean/1580/141083/1580-141083-0037.wav
r:by degrees all his happiness all his brilliancy subsided into regret and uneasiness so that his limbs lost their power his arms hung heavily by his sides and his head dr

r:but take it whilst i live and wear montfichet's shield in the days when my eyes can be rejoiced by so brave a sight for you will ne'er disgrace our scutcheon i warrant me
p:but take it will stilive and wer montfeches yhe old in the days when my eyes can be rejoiced by so bravasight for you will never desgrat sours duchan i warran't me
0: WER: 0.129129129129, DURATION: 10.515, PATH: /mnt/ml-data/LibriSpeech/test-clean/61/70970/61-70970-0004.wav
r:he set off abruptly for the bull walking rapidly lest his father's shrill whistle might call him back and in a few moments he had rounded the curve at the police barrack and was safe
p:he set off abruptly for the bull walking rapidly lest his father' shrill whistle might call him back and an a few moments he had rounded the curve at the police barrack and was safe
1: WER: 0.00826446280992, DURATION: 11.6, PATH: /mnt/ml-data/LibriSpeech/test-clean/1089/134691/1089-134691-0002.wav
r:it's been on only two weeks and i've been half a dozen times a

r:said missus horton a few minutes after
p:said missus horton a few minutes after
0: WER: 0.0, DURATION: 3.12, PATH: /mnt/ml-data/LibriSpeech/test-clean/4992/23283/4992-23283-0002.wav
r:from the blackness behind the light they heard a voice warrenton's
p:from the blackness behind the light they heard a voice warrantons
1: WER: 0.0229007633588, DURATION: 4.03, PATH: /mnt/ml-data/LibriSpeech/test-clean/61/70970/61-70970-0029.wav
r:is it better than anywhere else
p:is it better than anywhere else 
2: WER: 0.015873015873, DURATION: 2.84, PATH: /mnt/ml-data/LibriSpeech/test-clean/3729/6852/3729-6852-0030.wav
r:he gave way to the others very readily and retreated unperceived by the squire and mistress fitzooth to the rear of the tent
p:he gave way to the others very readily and retreated unperceived by the squire and mistress fitzooth to the rear of the tent
3: WER: 0.0, DURATION: 6.375, PATH: /mnt/ml-data/LibriSpeech/test-clean/61/70968/61-70968-0011.wav
r:they are all sketches made about t

r:if the count were on board a strange fatality was bringing him to the presence of his rival
p:if the count were on board a strange fetality was bringing him to the presence of his rival
0: WER: 0.010989010989, DURATION: 6.015, PATH: /mnt/ml-data/LibriSpeech/test-clean/5105/28240/5105-28240-0004.wav
r:have you not met them anywhere
p:have you not met them anywhere
1: WER: 0.0, DURATION: 2.765, PATH: /mnt/ml-data/LibriSpeech/test-clean/672/122797/672-122797-0009.wav
r:there was something in his air and manner that betrayed to the scout the utter confusion of the state of his mind
p:there was something in his air in temanner that betrayed to the scout the utter confusion of the state of his mind
2: WER: 0.0308370044053, DURATION: 6.285, PATH: /mnt/ml-data/LibriSpeech/test-clean/1320/122617/1320-122617-0003.wav
r:you will say that a woman has no need of such a caution there can be no peril in it for her
p:you will say that i woman has no need of such a caution there can be no peril in it

r:shall i never miss home talk and blessing and the common kiss that comes to each in turn nor count it strange when i look up to drop on a new range of walls and floors another home than this
p:shall i never miss home talk and blessing and the common kiss the comes to each in turn nor countid strange when i look up tho drop on a new range of walls and floors another home than this
0: WER: 0.0184696569921, DURATION: 14.755, PATH: /mnt/ml-data/LibriSpeech/test-clean/908/31957/908-31957-0004.wav
r:for some years it was not found feasible to operate motors on alternating current circuits and that reason was often urged against it seriously
p:for some years it was not found feasible to operate motors on alternating current circuits and that reason was often urged against it seriously
1: WER: 0.0, DURATION: 9.605, PATH: /mnt/ml-data/LibriSpeech/test-clean/2300/131720/2300-131720-0009.wav
r:is thee going to the yearly meeting ruth asked one of the girls
p:is thee going to the early meeting r

In [47]:
res = evaluate(test_generator(datagen, test_samples, normalize=True), output_fn, beam_width=27, use_lm=True)

r:poyser is not at home is he
p:poisoris not at home asse
0: WER: 0.192307692308, DURATION: 2.21, PATH: /mnt/ml-data/LibriSpeech/test-clean/2094/142345/2094-142345-0047.wav
r:this missus poyser said blushing and believing that the captain was really interested in her milk pans and would adjust his opinion of her to the appearance of her dairy
p:this missus poisor said blushing and believing that the captain was really interested in her miutpans and would edjust his opinion of her to the appearance of her dairy
1: WER: 0.0326409495549, DURATION: 10.01, PATH: /mnt/ml-data/LibriSpeech/test-clean/2094/142345/2094-142345-0059.wav
r:then is the time to introduce a meal on the stage
p:then is the time to entroduce a meal on the stage
2: WER: 0.0204081632653, DURATION: 2.86, PATH: /mnt/ml-data/LibriSpeech/test-clean/7176/92135/7176-92135-0037.wav
r:at tea time they were sad and silent and the meal went away untouched by any of the three
p:and ti time they were sad and silent and the meal went 

r:the king's ears were now open to montrose's counsels who proposed none but the boldest and most daring agreeably to the desperate state of the royal cause in scotland
p:the kings ears were now open to montroses councels who proposed none but the boldest and most daring agreeably to the desperate state of the royal cause in scotland
0: WER: 0.0121212121212, DURATION: 13.085, PATH: /mnt/ml-data/LibriSpeech/test-clean/8224/274381/8224-274381-0003.wav
r:in other words these three men took down the lectures which luther addressed to his students in the course of galatians and roerer prepared the manuscript for the printer
p:an other words these three men took down the lectures which lotheradressed to his tents in the course of glations and roar prepared the manuscript for the princer
1: WER: 0.0542168674699, DURATION: 9.44, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3979/2830-3979-0008.wav
r:had eva crasweller not been good looking had jack been still at college had sir kennington ova

r:the delawares are children of the tortoise and they outstrip the deer
p:the delawares are children of the tortis and the outstripped the deer
0: WER: 0.0434782608696, DURATION: 3.855, PATH: /mnt/ml-data/LibriSpeech/test-clean/1320/122617/1320-122617-0022.wav
r:their distinctive characters however display one broad and unfailing difference
p:there it istinctive characters however display one broad and unfailing difference
1: WER: 0.05, DURATION: 5.5, PATH: /mnt/ml-data/LibriSpeech/test-clean/7729/102255/7729-102255-0023.wav
r:the genealogies which you have recited to us out of your own annals solon are a mere children's story
p:the genialogies which wu have recited thil earts out of your own annal solen i emuwed children story
2: WER: 0.164179104478, DURATION: 7.815, PATH: /mnt/ml-data/LibriSpeech/test-clean/2961/961/2961-961-0011.wav
r:luis was out of danger in a fortnight in a month he rose from his bed and during all that time he was visited daily by his mother and grandmother and 

r:but i didn't know you've only to tell me now
p:but i didn't know you've only to tell me now 
0: WER: 0.0112359550562, DURATION: 3.245, PATH: /mnt/ml-data/LibriSpeech/test-clean/4446/2275/4446-2275-0032.wav
r:the crampness and the poverty are all intended
p:the cramtness in the poverty are an tended
1: WER: 0.113636363636, DURATION: 3.23, PATH: /mnt/ml-data/LibriSpeech/test-clean/1188/133604/1188-133604-0040.wav
r:he never loses sight of the purpose of his epistle
p:he never lose his sight of the purpose of his apistl
2: WER: 0.0588235294118, DURATION: 2.675, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3980/2830-3980-0060.wav
r:tabby had lived with them for ten or twelve years and was as charlotte expressed it one of the family
p:tabby had lived with them for tenortollv years and was as sharlotte expressed it one of the family
3: WER: 0.0452261306533, DURATION: 6.525, PATH: /mnt/ml-data/LibriSpeech/test-clean/3575/170457/3575-170457-0047.wav
r:thrusting open the proper entrance of 

r:i never see lou's scythe over here
p:i never see loose sight over here
0: WER: 0.194029850746, DURATION: 2.875, PATH: /mnt/ml-data/LibriSpeech/test-clean/237/134493/237-134493-0010.wav
r:and what through the left hand window
p:and what through the left hand windows
1: WER: 0.0133333333333, DURATION: 2.64, PATH: /mnt/ml-data/LibriSpeech/test-clean/2094/142345/2094-142345-0004.wav
r:thought kills me that i am not thought to leap large lengths of miles when thou art gone but that so much of earth and water wrought i must attend time's leisure with my moan receiving nought by elements so slow but heavy tears badges of either's woe
p:thought kills me that i am not thought to leave large length of miles when thou art gone but that so much of earth and water wrought i must attend times lesure with my moan receiving not by elements so slow but heavy tears badgers of eithers woe
2: WER: 0.0222222222222, DURATION: 23.505, PATH: /mnt/ml-data/LibriSpeech/test-clean/121/123852/121-123852-0003.wav

r:paul came later and is beneath us
p:parking later in his beneath us
0: WER: 0.28125, DURATION: 2.48, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3980/2830-3980-0003.wav
r:why should the mistress of the vales of har utter a sigh
p:why should the mistress of the veils of har utterasigh
1: WER: 0.0727272727273, DURATION: 4.06, PATH: /mnt/ml-data/LibriSpeech/test-clean/908/157963/908-157963-0009.wav
r:delivered in a strong tone of assent announced the gratification the savage would receive in witnessing such an exhibition of weakness in an enemy so long hated and so much feared
p:delivered in a strong tone of a cent andnounced the gratification the savage would receive and witnessing such an exhibition of weatness and an enemy so long hated and so much feared
2: WER: 0.0360110803324, DURATION: 10.755, PATH: /mnt/ml-data/LibriSpeech/test-clean/1320/122617/1320-122617-0013.wav
r:his troubled blue eyes glanced at each of us and finally rested with an expression of blank dismay upon banni

r:one of mister hopkins's first tasks after calling his faithful henchmen around him was to make a careful canvass of the voters of his district to see what was still to be accomplished
p:one ofister hopkins first tasks after calling his faithful inchman around him was to make a careful kilndess of the voters of his district to see what was still to be accomplished
0: WER: 0.0497237569061, DURATION: 10.355, PATH: /mnt/ml-data/LibriSpeech/test-clean/6829/68771/6829-68771-0001.wav
r:and one more this morning
p:and one more this morning 
1: WER: 0.0196078431373, DURATION: 2.06, PATH: /mnt/ml-data/LibriSpeech/test-clean/1580/141084/1580-141084-0022.wav
r:there was a unanimous groan at this and much reproach after which in his preoccupied way he explained
p:there was a unanimous groan at this and much reproach after which in his preoccupied way he explained
2: WER: 0.0, DURATION: 7.725, PATH: /mnt/ml-data/LibriSpeech/test-clean/121/127105/121-127105-0003.wav
r:give me a check for a hundred 

r:anyhow we'll leave instructions to ship the whole menagerie to france
p:anyhow will eveenstroctions to ship the whome and aguri to france
0: WER: 0.164179104478, DURATION: 4.53, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/294828/8463-294828-0019.wav
r:departing from five hundred thousand throats three cheers burst forth in succession
p:departing from five hundred thousand throats three cheers burst forth in succession
1: WER: 0.0, DURATION: 5.37, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/294828/8463-294828-0037.wav
r:who taught you to scrub a floor i should like to know
p:who tight you toscrib u floor asiould like to know
2: WER: 0.126213592233, DURATION: 3.155, PATH: /mnt/ml-data/LibriSpeech/test-clean/2094/142345/2094-142345-0018.wav
r:the mystery of their origin their capacity for evolving latent faculties of crime and the steady vitality with which they survive the hearse and speak their deep mouthed malignities in every new born generation have associated them somehow in

r:i did not even take the precaution of smoking up the chimney
p:i did not even take the precaution of smoking up the chimney 
0: WER: 0.00826446280992, DURATION: 3.47, PATH: /mnt/ml-data/LibriSpeech/test-clean/5683/32866/5683-32866-0025.wav
r:paul takes pride in his ministry not to his own praise but to the praise of god
p:paltecs pridin his ministry not to his own phrase but to the praise of god
1: WER: 0.0718954248366, DURATION: 4.84, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3980/2830-3980-0008.wav
r:most people talk too much so it is a relief to find one who talks too little
p:most people talk too much so it is a relief to find one who talks to little
2: WER: 0.00662251655629, DURATION: 5.115, PATH: /mnt/ml-data/LibriSpeech/test-clean/1284/1181/1284-1181-0015.wav
r:have your will child if the boy also wills it montfichet answered feeling too ill to oppose anything very strongly just then
p:have your will child if the boy also wills iten montby shay answered feeling too ill to

r:effects of the increased use and disuse of parts
p:iffeccidyancreased use and dis use of parts 
0: WER: 0.217391304348, DURATION: 3.565, PATH: /mnt/ml-data/LibriSpeech/test-clean/5142/36586/5142-36586-0004.wav
r:the count had thrown himself back on his seat leaning his shoulders against the partition of the tent and remained thus his face buried in his hands with heaving chest and restless limbs
p:they count had the rown himself back on his seat leaning his shoulders against the partition of the tent and remained bus his face buried in his hands with heaving chest and restlesslimbs he
1: WER: 0.0265957446809, DURATION: 13.16, PATH: /mnt/ml-data/LibriSpeech/test-clean/6930/75918/6930-75918-0005.wav
r:so saying she led the way on tiptoe followed by the children out of the room and round by a circuitous route to the piazza there
p:so saying she led the way untiptoe followed by the children out of the room and round by a sircuitous root to the piazza there
2: WER: 0.0314960629921, DURATI

r:the chaos in which his ardour extinguished itself was a cold indifferent knowledge of himself
p:the chaos in which his ardor extinguished itself was a cold in different knowledge of himself
0: WER: 0.010752688172, DURATION: 6.73, PATH: /mnt/ml-data/LibriSpeech/test-clean/1089/134686/1089-134686-0008.wav
r:to meet the needs of this conflict wretchedness has invented a language of combat which is slang
p:to beat the needs of this conflect wretchedness has invented an language of combat which is slaing
1: WER: 0.0412371134021, DURATION: 9.215, PATH: /mnt/ml-data/LibriSpeech/test-clean/4507/16021/4507-16021-0025.wav
r:not once did he comment on the length or the hardships of a journey
p:not once to he comment on the length of the hardships of the journey
2: WER: 0.0814814814815, DURATION: 4.17, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/294828/8463-294828-0009.wav
r:the last drop fly as luck would have it caught just in the corner of the hawk's angrily open beak hooking itself firmly

r:young fitzooth had been commanded to his mother's chamber so soon as he had come out from his converse with the squire
p:young fit youth had been commanded to his mother's chamber so soon as he had come out from his converse with the squire
0: WER: 0.0210970464135, DURATION: 6.075, PATH: /mnt/ml-data/LibriSpeech/test-clean/61/70970/61-70970-0000.wav
r:yes all alone by himself asserted jasper vehemently and winking furiously to the others to stop their laughing he did now truly phronsie
p:yes all alone by himself a certed jasper beametly and winking puriously to the others to stop their laughing he did now truly froncy
1: WER: 0.0746268656716, DURATION: 9.34, PATH: /mnt/ml-data/LibriSpeech/test-clean/237/126133/237-126133-0015.wav
r:to celebrate the arrival of her son silvia gave a splendid supper to which she had invited all her relatives and it was a good opportunity for me to make their acquaintance
p:to celebrate the arrival of her son sylvia gave a splendid supper to which she ha

r:to fade away like morning beauty from her mortal day down by the river of adona her soft voice is heard and thus her gentle lamentation falls like morning dew
p:to fade away like morning beauty from her mortal day down by the river of a dona her soft voices heard and thus her gentle lamentation falls like morning due
0: WER: 0.0222222222222, DURATION: 12.62, PATH: /mnt/ml-data/LibriSpeech/test-clean/908/157963/908-157963-0000.wav
r:the word of our god shall stand forever
p:the word of ourgod shall stand forever
1: WER: 0.012987012987, DURATION: 3.625, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3979/2830-3979-0012.wav
r:three seconds before the arrival of j b hobson's letter i no more dreamed of chasing the unicorn than of trying for the northwest passage
p:three seconds before the arrival of jbi hopsans letter i know more dreamed of chasing the unucon than of trying for the northwest passage
2: WER: 0.0510948905109, DURATION: 9.19, PATH: /mnt/ml-data/LibriSpeech/test-clean/8463/2

r:i love thee freely as men strive for right i love thee purely as they turn from praise
p:i love thee freely as men strive for right i love thee purely as they turned from prayse
0: WER: 0.0229885057471, DURATION: 8.515, PATH: /mnt/ml-data/LibriSpeech/test-clean/908/31957/908-31957-0023.wav
r:here lay the reading of the riddle with infinite work and pain some one had dug a canal from the lagoon to the creek into which the former had drained by a long and crooked way thus allowing it to empty directly
p:here lay the reading of the widdle with infinite work and pain some one had done a a nol from the lagoon to the great into which the former had drained by a long and cooked way thus allowing itto empty directly
1: WER: 0.0498812351544, DURATION: 12.825, PATH: /mnt/ml-data/LibriSpeech/test-clean/1995/1837/1995-1837-0018.wav
r:it is only a pencil outline by edward burne jones in illustration of the story of psyche it is the introduction of psyche after all her troubles into heaven
p:it is

r:stephen's heart began slowly to fold and fade with fear like a withering flower
p:stevens heart began slowly to fold and fade with fear like a withering flower
0: WER: 0.025641025641, DURATION: 6.615, PATH: /mnt/ml-data/LibriSpeech/test-clean/1089/134686/1089-134686-0031.wav
r:the condition is that i will be permitted to make luther talk american streamline him so to speak because you will never get people whether in or outside the lutheran church actually to read luther unless we make him talk as he would talk today to americans
p:the condition is that i will be permitted to make litheratalk american stream line ham certof speak because you will never get people whether ann aroutside the luteran church actually to read luther and must be makcontalk as he would talk to day to americans
1: WER: 0.0834951456311, DURATION: 16.1250625, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3979/2830-3979-0001.wav
r:many of these affiliated gentlemen of leisure are at the same time lesser men of 

r:the vicious character of sin is brought out by the words who gave himself for our sins
p:the vicious character of san is brought out by the words who gave himself for her since
0: WER: 0.0520231213873, DURATION: 5.5550625, PATH: /mnt/ml-data/LibriSpeech/test-clean/2830/3980/2830-3980-0069.wav
r:the old servant told him quietly as they crept back to gamewell that this passage way led from the hut in the pleasance to sherwood and that geoffrey for the time was hiding with the outlaws in the forest
p:the oldservant told him quietly as they crept back to gamwell that this passage way let from the hut in the pleasant to shurewood and that jeffrey for the time was hiding with the outlaws in the forest
1: WER: 0.0320987654321, DURATION: 10.4, PATH: /mnt/ml-data/LibriSpeech/test-clean/61/70970/61-70970-0038.wav
r:the governor on his part becoming doubtful of the legality of employing missouri militia to enforce kansas laws was also eager to secure the help of federal troops
p:the gavernor on

In [41]:
print '\n'.join(['{s} {p}'.format(s=e[0], p=e[1]) for e in zip(*beam_decode_u(res[0][:, 49, :], 8))])

ulme letmteaebh es ra pe c heo knsn 169.558395386
ulme letmteaebh es ra pe c eo knsn 169.513000488
ulme letmteaebh es rae pe c heo knsn 169.449188232
ulme letmteaebh es rae pe c eo knsn 169.403793335
ulme letmteaebh s ra pe c heo knsn 169.402740479
ultme letmteaebh es ra pe c heo knsn 169.377624512
ulme letmteaebh es ra pe c heo ksn 169.311843872
ulme letmteaebh es ra pe c eo ksn 169.266448975


In [389]:
print '\n'.join(['{s} {p}'.format(s=e[0], p=e[1]) for e in zip(*beam_decode_u(res[0][:, 49, :], 20, eps=.5, normalize=True))])

oh little cloud the vertun said i charged thee to tell me why thou complain is now when in one hour thou fataway then we shall seek thee but not fine ah fell as like ty thee 16591.984375
oh little cloud the virtun said i charged thee to tell me why thou complain is now when in one hour thou fataway then we shall seek thee but not fine ah fell as like ty thee 16591.9765625
oh little cloud the vertin said i charged thee to tell me why thou complain is now when in one hour thou fataway then we shall seek thee but not fine ah fell as like ty thee 16591.9257812
oh little cloud the virtin said i charged thee to tell me why thou complain is now when in one hour thou fataway then we shall seek thee but not fine ah fell as like ty thee 16591.9160156
oh little cloud the vertun said i charge thee to tell me why thou complain is now when in one hour thou fataway then we shall seek thee but not fine ah fell as like ty thee 16591.7851562
oh little cloud the virtun said i charge thee to tell me why t

In [378]:
print '\n'.join(['{s} {p}'.format(s=e[0], p=e[1]) for e in zip(*beam_decode_u(res[0][:, 1, :], 20, normalize=True))])

ah the swab the cruel squart 12610.1259766
ah the swab the qruel squart 12609.7929688
ah the swab the cruel squar 12609.7626953
ah the swam the cruel squart 12609.5244141
ah the swab the cruel squarp 12609.5
ah the swab the qruel squar 12609.4296875
ah the swab the cruel swuart 12609.3925781
ah the swab othe cruel squart 12609.2734375
ah the swam the qruel squart 12609.1914062
ah the swab the cruel squant 12609.1806641
ah the swab the qruel squarp 12609.1669922
ah the swam the cruel squar 12609.1601562
ah the swab the cruel squamt 12609.1191406
ah the swab the qruel swuart 12609.0595703
ah the swab the cruel swuar 12609.0283203
ah the swap the cruel squart 12608.9619141
ah the swab othe qruel squart 12608.9404297
ah the swab othe cruel squar 12608.9101562
ah the swab the cruel squat 12608.9101562
ah the swam the cruel squarp 12608.8984375


In [386]:
for i in range(1, 100000, 1000):
    eps = 1000 / i
    preds = [e[0] for e in zip(*beam_decode_u(res[0][:, 1, :], 20, normalize=True)) if 'swamp' in e[0]]
    if preds:
        print preds, eps

In [371]:
res[0][1].min()

-11.001603

In [377]:
import beamsearch
reload(beamsearch)
from beamsearch import beam_decode_u

In [374]:
# thus idleness is the mother
# thus i don't lissisthe mother
def edits(word):
    letters = ''.join([chr(i) for i in range(ord('a'), ord('z') + 1)])
    splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    deletes = [l + r[1:] for l, r in splits if r]
    transposes = [l + r[1] + r[0] + r[2:] for l, r in splits if len(r) >1]
    replaces = [l + c + r[1:] for c in letters for l, r in splits if r]
    inserts = [l + c + r for c in letters for l, r in splits if r]
    return set(deletes + transposes + replaces + inserts)

def edits_n(word, n):
    es = set([word])
    for i in range(n):
        es = reduce(lambda a, b: a.union(b), (edits(w) for w in es))
    return es

def words(text):
    return text.split()

def known_words(words):
    return {word for word in words if word in WORDS}

def candidate_words(word):
    return (known_words([word]) or known_words(edits_n(word, 1)) or known_words(edits_n(word, 2)) or [word])

list(candidate_words("swam"))

['swam']

In [336]:
with open('./data/lm/words.txt') as f:
    WORDS = set(words(f.read()))

In [None]:
r:a ring of amethyst i could not wear here plainer to my sight than that first kiss
p:a ring of amathyst i could not wear here plainer two my sight then that first kits

In [197]:
best_lm_alternative(res[3][3], res[2][3], zip(*beam_decode_u(res[0][:, 3, :], 12, normalize=True)))

she doesn't take up with anybody you know
she doesn't take up with anybody you know
langauge model changed prediction, WER changed from 0.0243902439024 to 0.0


"she doesn't take up with anybody you know"

In [187]:
print best_lm_alternative(res[3][46], res[2][46], zip(*beam_decode_u(res[0][:, 46, :], 12, normalize=False)))
print res[1][46]

sir i have it in command to inform your excellency that you have been appointed governor of the crown colony which is called britannula
sir i have in command to anform your excellency that you have been appointed governor of the crown colony which is called britain mula
langauge model changed prediction, WER changed from 0.0334572490706 to 0.0334572490706
sir i have in command to anform your excellency that you have been appointed governor of the crown colony which is called britain mula
sir i have in command to anform your excellency that you have been appointed governor of the crown colony which is called britaan mula


In [165]:
import edit_distance
ref = 'there is no danger of the modern commentators on the timaeus falling into the absurdities of the neo platonists'
pre = 'there is old danger of the madern commontychers un ther to meas falling into dubsurdities of the newo platinists'
pre = 'there is old danger of the madern commontychers un ther to mes falling into dubsurdities of the newo platinists'
#print edit_distance.SequenceMatcher(ref, pre).ratio()
word_error_rate([ref], [pre], decoded=True)[0]

0.16216216216216217

#### custom samples

In [None]:
samples = [
    {"duration": 4.905, "text": "he began a confused complaint against the wizard who had vanished behind the curtain on the left", "key": "/mnt/ml-data/LibriSpeech/test-clean/61/70968/61-70968-0000.wav"},
    {"duration": 3.61, "text": "give not so earnest a mind to these mummeries child", "key": "/mnt/ml-data/LibriSpeech/test-clean/61/70968/61-70968-0001.wav"}    
]

In [None]:
evaluate(test_generator(datagen, samples, normalize=True), output_fn)
