In [4]:
import numpy as np
from tqdm.auto import tqdm
from time import time
import json, pickle, os, string, kenlm, json
from collections import defaultdict, Counter
from itertools import groupby
import Levenshtein as Lev
import math


In [2]:
def lse(*args):
  """
  Stable log sum exp.
  """
  if all(a == -float('inf') for a in args):
      return -float('inf')
  a_max = max(args)
  lsp = math.log(sum(math.exp(a - a_max)
                      for a in args))
  return a_max + lsp

In [3]:
#s1 = True text
#s2 = predicted text

def wer_(s1, s2):
    """
    Computes the Word Error Rate, defined as the edit distance between the
    two provided sentences after tokenizing to words.
    Arguments:
        s1 (string): space-separated sentence
        s2 (string): space-separated sentence
    """

    # build mapping of words to integers
    b = set(s1.split() + s2.split())
    word2char = dict(zip(b, range(len(b))))

    # map the words to a char array (Levenshtein packages only accepts
    # strings)
    w1 = [chr(word2char[w]) for w in s1.split()]
    w2 = [chr(word2char[w]) for w in s2.split()]
    
    return Lev.distance(''.join(w1), ''.join(w2))

def cer_(s1, s2):
    """
    Computes the Character Error Rate, defined as the edit distance.

    Arguments:
        s1 (string): space-separated sentence
        s2 (string): space-separated sentence
    """
    s1, s2, = s1.replace(' ', ''), s2.replace(' ', '')

    return Lev.distance(s1, s2)




#When using the above implementation, use the code belove to calculate the wer in percentatge: 
#pred = list of ouput prediction of model (it is the text) # example [" MY NAME IS HEMANT", " I AM A GOD"]
# total_wer = 0
# for x in range(len(pred)):
#     transcript, reference = data_[x][1], pred[x]
#     wer_inst = wer(transcript, reference)
#     total_wer += float(wer_inst)
# print("WER is : ",total_wer/len(pred),"%")
# wer_(pred,true)/len(pred.split(' '))*100

In [4]:
out = np.load("out.npy")[0]
with open("true.txt", "r") as f:
    reference = f.read().strip()
    
with open("pred.txt", "r") as f:
    transcript = f.read().strip()

In [5]:
def ctc_best_path(out,labels):
    "implements best path decoding as shown by Graves"
    out = [labels[i] for i in np.argmax(out, axis=1) if i!=labels[-1]]
    o = ""
    for i,j in groupby(out):
        o = o + i
    return o.replace("_","")

In [6]:
labels = "_'ABCDEFGHIJKLMNOPQRSTUVWXYZ "

In [7]:
gred_txt = ctc_best_path(out,labels)
print(gred_txt)
np.round(wer_(gred_txt,reference)/len(reference.split(' '))*100,3)

AND CHARGED IFEVER HE MIGHT FIND SIR GAWANE AND SIR UWANE TO BRING THEM TO THE COURT AGAIN AND THEN WERE THEY ALL GLAD AND SO PRAY DHAS OR MORE HOUSE TO RIDE WITH THEM TO THE KING'S COURT


23.077

### WORD LM Implementation

In [179]:
lm_w = kenlm.LanguageModel('/home/hemant/asr_wm/lm/lm.binary')

In [532]:
def ctc_beam_search(out,labels, prune=0.0001, k=20, lm=None,alpha=0.3,beta=12):
    
    bc_i = 0 # blank/special charatcter index 
    F = out.shape[1]
    dummy_ = np.vstack((np.zeros(F), out))
    out = np.log(np.vstack((np.zeros(F), out)))
    steps = out.shape[0]
    
    pb, pnb = defaultdict(Counter), defaultdict(Counter)
    pb[0][''], pnb[0][''] = 0, -float("inf")
    prev_beams = ['']
    for t in tqdm(range(1,steps)):
        pruned_alphabet = [labels[i] for i in np.where(dummy_[t] > prune)[0]]
        for b in prev_beams:
            
            for c_t in pruned_alphabet:
                index = labels.index(c_t)
                if c_t == "_": #Extending with a blank
                    pb[t][b] += lse(pb[t][b],out[t][index]+pb[t-1][b], out[t][index]+pnb[t-1][b])
                    
                    continue
                
                else:
                    i_plus = b + c_t
                    if len(b.replace(' ', '')) > 0  and c_t == b[-1]: #Extending with the same character as the last one
                        pnb[t][b] = lse(out[t][index]+pnb[t-1][b],pnb[t][b])
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index]+pb[t-1][b])
                    elif c_t == " " and len(b.replace(' ', '')) > 0 : # LM constraints
                        
                        pnb[t][i_plus] = lse(out[t][index]+pb[t-1][b],out[t][index] + pnb[t-1][b])
                    else:
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index]+pb[t-1][b], out[t][index]+pnb[t-1][b])

#                     If the new beam is not in the previous beams
                    if i_plus not in prev_beams:
                        pb[t][i_plus] = lse(pb[t][i_plus],out[t][labels.index("_")]+pb[t - 1][i_plus], out[t][labels.index("_")]+ pnb[t - 1][i_plus])
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index] + pnb[t - 1][i_plus])

        ptot = pb[t] + pnb[t]
#         print(t,'---',ptot)
        for i in ptot.keys():
            if len(i.replace(' ','' )) > 0 and i[-1] == ' ':
                words = i.strip()
                prob = ([i for i in lm.full_scores(words,eos=False,bos=False)][-1][0])*alpha
                word_inser = beta*np.log(len(i.strip().split(' '))+1)
                print('+++',len(i.strip().split(' ')),'___',i.strip().split(' '),prob,word_inser,lse(ptot[i],prob,word_inser),ptot[i])
                ptot[i] = lse(ptot[i],prob,word_inser)

        prev_beams = sort_beam(ptot,k)
#         print('\n\n\n')
    return prev_beams[0], pb, pnb


In [535]:
beam_txt, pb, pnb = ctc_beam_search(out[:],labels,0.0000,k=10,lm=lm_w,alpha=0,beta=0)

  


HBox(children=(FloatProgress(value=0.0, max=681.0), HTML(value='')))

+++ 1 ___ ['ITAND'] -0.0 0.0 1.1989399467368642 0.2750521142990805
+++ 1 ___ ['ATAND'] -0.0 0.0 1.1989399438820922 0.275052107107723
+++ 1 ___ ['HTAND'] -0.0 0.0 1.198939943094716 0.2750521051242718
+++ 1 ___ ['STAND'] -0.0 0.0 1.1989399428291712 0.27505210445534645
+++ 1 ___ ['NTAND'] -0.0 0.0 1.1989399426903233 0.27505210410558023
+++ 1 ___ ['WTAND'] -0.0 0.0 1.1989399424203189 0.27505210342542097
+++ 1 ___ ['LTAND'] -0.0 0.0 1.198939942369599 0.27505210329765406
+++ 1 ___ ['MTAND'] -0.0 0.0 1.1989399422730558 0.27505210305445593
+++ 1 ___ ['RTAND'] -0.0 0.0 1.1989399422486875 0.2750521029930706
+++ 1 ___ ['TAND'] -0.0 0.0 1.1989399421848537 0.275052102832269
+++ 1 ___ ['ITAND'] -0.0 0.0 4.467958160921548 4.44474948447552
+++ 1 ___ ['ATAND'] -0.0 0.0 4.46795815407908 4.4447494774723895
+++ 1 ___ ['HTAND'] -0.0 0.0 4.467958152191855 4.444749475540852
+++ 1 ___ ['STAND'] -0.0 0.0 4.467958151555382 4.444749474889435
+++ 1 ___ ['NTAND'] -0.0 0.0 4.467958151222585 4.444749474548824
+++ 1 

+++ 4 ___ ['STAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777966189105 4.188902881421674
+++ 4 ___ ['NTAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777966105596 4.188902881335633
+++ 4 ___ ['WTAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777965943198 4.188902881168309
+++ 4 ___ ['LTAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777965912693 4.18890288113688
+++ 4 ___ ['MTAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777965854628 4.188902881077053
+++ 4 ___ ['RTAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777965839971 4.188902881061952
+++ 4 ___ ['TAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 4.218777965801577 4.188902881022393
+++ 4 ___ ['ITAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 3.9520707020578154 3.912883054022669
+++ 4 ___ ['ITAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 1.6833727873936999 1.2189648804715327
+++ 4 ___ ['ATAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 3.952070700349827 3.9128830522464195
+++ 4 ___ ['ATAND', 'CHARGED', 'IF', 'EVER'] -0.0 0.0 1.6833727873931703 1.2189648804706903
+++ 4 

+++ 6 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734010799223 4.521215611401031
+++ 6 ___ ['ATAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009889171 4.521215610471183
+++ 6 ___ ['HTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009638169 4.521215610214721
+++ 6 ___ ['STAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009553517 4.521215610128229
+++ 6 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009509256 4.521215610083004
+++ 6 ___ ['WTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009423183 4.52121560999506
+++ 6 ___ ['LTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009407015 4.521215609978539
+++ 6 ___ ['MTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009376239 4.5212156099470935
+++ 6 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT'] -0.0 0.0 4.542734009368468 4.521215609939154
+++ 6 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGH

+++ 8 ___ ['STAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.57479806446742 2.409524279097104
+++ 8 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.5747980644603885 2.4095242790888083
+++ 8 ___ ['WTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.574798064446713 2.4095242790726754
+++ 8 ___ ['LTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.574798064444144 2.409524279069645
+++ 8 ___ ['MTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.574798064439256 2.409524279063878
+++ 8 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.574798064438021 2.409524279062421
+++ 8 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.574798064434788 2.409524279058607
+++ 8 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR'] -0.0 0.0 2.824225238422309 2.6978575543069008
+++ 8 ___ ['ITAND', 'CHARGED', 'IF', '

+++ 10 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.6325231800836693 2.477279312334384
+++ 10 ___ ['ATAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.6325231800812525 2.4772793123315613
+++ 10 ___ ['HTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.632523180080585 2.477279312330782
+++ 10 ___ ['STAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.63252318008036 2.477279312330519
+++ 10 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.632523180080243 2.4772793123303822
+++ 10 ___ ['WTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.632523180080014 2.477279312330115
+++ 10 ___ ['LTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND'] -0.0 0.0 2.632523180079971 2.4772793123300643
+++ 10 ___ ['MTAND', 'C

+++ 12 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221514 5.55257191951235
+++ 12 ___ ['ATAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221466 5.552571919512301
+++ 12 ___ ['HTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221452 5.552571919512287
+++ 12 ___ ['STAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221449 5.5525719195122845
+++ 12 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221446 5.552571919512282
+++ 12 ___ ['WTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE'] -0.0 0.0 5.560296948221442 5.552571919512277
+++ 12 ___ ['LTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FI

+++ 15 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING'] -0.0 0.0 3.4295121495972727 3.3625114111885166
+++ 15 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', 'BRING'] -0.0 0.0 3.3898563315548316 3.320048442058045
+++ 14 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', 'BRING'] -0.0 0.0 3.389052940174811 3.319186938717166
+++ 14 ___ ['ATAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', 'BRING'] -0.0 0.0 3.3890529401748073 3.3191869387171624
+++ 14 ___ ['STAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', 'BRING'] -0.0 0.0 3.389052940174807 3.3191869387171615
+++ 15 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', 'BRRING'] -

+++ 18 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO'] -0.0 0.0 1.1004261424463093 0.005431720876731579
+++ 17 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO'] -0.0 0.0 1.1050750014521844 0.019264172079018316
+++ 17 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO'] -0.0 0.0 1.1004261424463093 0.005431720876731579
+++ 19 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE'] -0.0 0.0 4.326031600587762 4.299235972143748
+++ 19 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE'] -0.0 0.0 4.326029273176385 4.299233581524784
+++ 18 ___ ['NTAND'

+++ 21 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN'] -0.0 0.0 6.673822256178981 6.671291937902696
+++ 21 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN'] -0.0 0.0 6.313558380780419 6.309928650309658
+++ 20 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN'] -0.0 0.0 6.312036293880461 6.308401024375948
+++ 20 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN'] -0.0 0.0 6.303782860684694 6.3001174082861775
+++ 20 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', 'BRING', 'THEM',

+++ 24 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN'] -0.0 0.0 4.869534916444504 4.854061955458366
+++ 24 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN'] -0.0 0.0 4.869060024067457 4.853579656162138
+++ 23 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN'] -0.0 0.0 4.869058356288167 4.853577962364119
+++ 23 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN'] -0.0 0.0 4.869049356622568 4.853568822295573
+++ 23 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', '

+++ 27 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL'] -0.0 0.0 4.478685509869452 4.455727335021227
+++ 27 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL'] -0.0 0.0 4.478516511185937 4.455554411216497
+++ 26 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL'] -0.0 0.0 4.478515917767938 4.455553804014723
+++ 26 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL'] -0.0 0.0 4.478

+++ 28 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD'] -0.0 0.0 2.703724956079539 2.559957026541082
+++ 28 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD'] -0.0 0.0 2.703676029948736 2.559900535427842
+++ 27 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD'] -0.0 0.0 2.7036758581609885 2.559900337077435
+++ 27 ___ ['RTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THE

+++ 30 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO'] -0.0 0.0 4.3590052889144095 4.333090164567555
+++ 30 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO'] -0.0 0.0 2.5619014218430975 2.394292167593269
+++ 30 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO'] -0.0 0.0 4.358949320078218 4.333032726292246
+++ 30 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'C

+++ 31 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD'] -0.0 0.0 1.116083284720573 0.05152311177305906
+++ 31 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD'] -0.0 0.0 1.1036266639602617 0.014968318613235701
+++ 31 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD'] -0.0 0.0 1.1160831658904196 0.051522767217457154
+++ 31 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '

+++ 33 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR'] -0.0 0.0 1.1373944132406741 0.1121064240658671
+++ 33 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR'] -0.0 0.0 1.125994352727324 0.07999457809384815
+++ 33 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR'] -0.0 0.0 1.137091851270302 0.11126268383011106
+++ 33 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'G

+++ 35 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR', 'MORE', 'HOUUSE'] -0.0 0.0 4.524177220505111 4.502251330001338
+++ 35 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUUSE'] -0.0 0.0 4.524177120657105 4.502251227939897
+++ 35 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUUSE'] -0.0 0.0 4.524059910227799 4.502131419030438
+++ 34 ___ ['NTAND', 'CHARGED'

+++ 38 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR', 'MORE', 'HOUUSE', '', 'TO', 'RIDE'] -0.0 0.0 4.019511476113236 3.98292687129982
+++ 38 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR', 'MORE', 'HOUUSE', '', 'TO', 'RIDE'] -0.0 0.0 1.631015533223628 1.1343205923238766
+++ 38 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUUSE', '', 'TO', 'RIDE'] -0.0 0.0 4.0195114761

+++ 39 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUSE', '', 'TO', 'RIDE', 'WITH', 'THEM'] -0.0 0.0 1.110983018194548 0.03666233802625247
+++ 39 ___ ['NTAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUSE', '', 'TO', 'RIDE', 'WITH', 'THEM'] -0.0 0.0 1.0992218831709284 0.0018276698234495439
+++ 40 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYS', 'OR', 'MORE', 'HOUUSE', '', 'TO',

+++ 43 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUUSE', '', 'TO', 'RIDE', 'WITH', 'THEM', 'TO', 'THE', "KING'S"] -0.0 0.0 6.489058991717526 6.486014404365902
+++ 43 ___ ['TAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', 'PRAYD', 'DHAYSS', 'OR', 'MORE', 'HOUUSE', '', 'TO', 'RIDE', 'WITH', 'THEM', 'TO', 'THE', "KING'S"] -0.0 0.0 1.7999295794606982 1.3985246305347536
+++ 43 ___ ['ITAND', 'CHARGED', 'IF', 'EVER', 'HE', 'MIGHT', 'FIND', 'SIR', 'GAWAINE', 'AND', 'SIR', 'UWAINE', '', 'TO', '', 'BRING', 'THEM', 'TO', 'THE', 'COURT', 'AGAAIN', '', 'AND', 'THEN', 'WERE', 'THEY', 'ALL', 'GLAD', 'AND', 'SO', '

In [523]:
print(beam_txt)
np.round(wer_(beam_txt,reference)/len(reference.split(' '))*100,3)

                                                                                                                                                                                                       Z


100.0

In [524]:
len(beam_txt.split()), len(reference.split())

(1, 39)

In [None]:
lm_w = kenlm.LanguageModel('/home/hemant/sopi_deep/lm/3_gram_phase2.arpa')

In [17]:
def sort_beam(ptot,k):
    if len(ptot) < k:
        return [i for i in ptot.keys()]
    else:
        dict_ = sorted(dict((v,k) for k,v in ptot.items()).items(),reverse=True)[:k]
        return [i[1] for i in dict_]

#using WORD LM
def ctc_beam_search(out,labels, prune=0.0001, k=20, lm=None,alpha=0.3,beta=12):
    "implements CTC Prefix Search Decoding Algo13.043478260869565%'rithm as shown by Graves"
    '''
    out = ctc output
    labels = string of labels
    prune = prune the ctc output
    k=beam-width
    lm=word age model used
    alpha,beta = hyper-parameters
    '''

    bc_i = 0 # blank/special charatcter index 
    F = out.shape[1]
    out = np.log(np.vstack((np.zeros(F), out)))
    steps = out.shape[0]
    
    # time steps: ([beam, score]), for each time step we have a number of beams and each beam has a score.
    pb, pnb = defaultdict(Counter), defaultdict(Counter) 

#     pb[0][''], pnb[0][''] = 1, 0
    pb[0][''], pnb[0][''] = 0, -float("inf")
    prev_beams = ['']
#     print(out)
    for t in range(1,steps):
        pruned_alphabet = [labels[i] for i in np.where(out[t] > prune)[0]]
        for b in prev_beams:
            
            for c_t in labels: #pruned_alphabet:
                index = labels.index(c_t)
                #Collapsing case (copy case as the last character in the beam)
                if c_t == "_": #Extending with a blank
                    pb[t][b] = lse(pb[t][b],out[t][index]+pb[t-1][b], out[t][index]+pnb[t-1][b])
                    continue
                else:
                    i_plus = b + c_t
                    if len(b) > 0 and c_t == b[-1]: #Extending with the same character as the last one
                        pnb[t][b] = lse(out[t][index]+pnb[t-1][b],pnb[t][b])
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index]*pb[t-1][b])
                    #Extending case as the last character is different
                    elif c_t == " " and len(b.replace(' ', '')) > 0 : # LM constraints
                        prob = [i[0] for i in lm.full_scores(i_plus,eos=False,bos=False)][-1]
                        lm_p = (10**prob)**alpha
                        pnb[t][i_plus] += lm_p*out[t][index]*(pb[t-1][b] + pnb[t-1][b])*(len(b.split())+1)**beta
                    else:
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index]+pb[t-1][b], out[t][index]+pnb[t-1][b])
                    # If the new beam is not in the previous beams
                    if i_plus not in prev_beams:
                        pb[t][i_plus] = lse(pb[t][i_plus],out[t][labels.index("_")]+pb[t - 1][i_plus], out[t][labels.index("_")]+ pnb[t - 1][i_plus])
                        pnb[t][i_plus] = lse(pnb[t][i_plus],out[t][index] + pnb[t - 1][i_plus])

        ptot = pb[t] + pnb[t]
#         print(t,'---',pb,'\n',pnb,'\n',ptot,'\n')
        prev_beams = sort_beam(ptot,k)
    
    return prev_beams[0], pb, pnb

beam_txt, pb, pnb = ctc_beam_search(out[:15],labels,0,k=2,lm=lm_w,alpha=0,beta=0)







In [None]:
F = out.shape[1]
np.log(np.vstack((np.zeros(F), out)))[:15][1]

In [None]:
beam_txt, pb, pnb = ctc_beam_search(out,labels,0,k=100,lm=lm_w,alpha=0,beta=0)
print(beam_txt)
wer_(beam_txt,reference)/len(reference.split(' '))*100

In [None]:
beam_txt

In [None]:
np.log(out)

In [None]:
import os
os.chdir("/home/hemant/E2E_NER-Through-Speech/S2T/")
from opts import add_decoder_args, add_inference_args
from utils import load_model
import os
# from ctc_decoders import *
import argparse

import numpy as np
import torch
from tqdm import tqdm
from data.data_loader import SpectrogramParser

from opts import add_decoder_args, add_inference_args
from utils import load_model

In [None]:
decoding = 'beam_w'
prune = 0.00001
beam_width = 50
alpha = 1
beta = 4
lm = kenlm.LanguageModel('/home/hemant/asr_wm/data/ner/align_wav_txt/4_gram.arpa')

torch.set_grad_enabled(False)
device = torch.device("cuda")
model = load_model(device, "/home/hemant/asr_wm/models/deep/final.pth")
spect_parser = SpectrogramParser(model.audio_conf, normalize=True)

torch.cuda.set_device(int(0))
with open("/home/hemant/asr_wm/dev.csv","r") as f:
    csv = f.readlines()

total_cer, total_wer, num_tokens, num_chars = 0, 0, 0, 0
lm = lm_w
output = []
from tqdm.auto import tqdm
for i in tqdm(csv[:1]):
    audio_path, reference_path = i.split(",")

    spect = spect_parser.parse_audio(audio_path).contiguous()
    spect = spect.view(1, 1, spect.size(0), spect.size(1))
    spect = spect.to(device)

    input_sizes = torch.IntTensor([spect.size(3)]).int()
    out, output_sizes = model(spect, input_sizes)
    out = out.cpu().detach().numpy()[0]

    transcript = ctc_beam_search(out,labels,prune,beam_width,lm,alpha,beta)
#     transcript = ctc_best_path(out,labels)
    with open(reference_path.replace("\n",""),"r") as f:
        reference = f.readline()
        
#     break
    output.append([transcript,reference])
    wer_inst = wer_(transcript,reference)
    cer_inst = cer_(transcript, reference)
    total_wer += wer_inst
    total_cer += cer_inst
    num_tokens += len(reference.split(' '))
    num_chars += len(reference.replace(' ', ''))

        
wer = (float(total_wer) / num_tokens)*100
cer = (float(total_cer) / num_chars)*100
print('Test Summary \t'
    'Average WER {wer:.3f}\t'
    'Average CER {cer:.3f}\t?'.format(wer=wer, cer=cer))

In [None]:
alpha, beta = np.linspace(0.2,2,15), np.linspace(2,10,5)

In [None]:
alpha

In [None]:
beta

In [None]:
alpha, beta = np.linspace(0.2,2,15), np.linspace(0,6,15)
values = [[i,j] for j in beta for i in alpha]

total_cer, total_wer, num_tokens, num_chars = 0, 0, 0, 0
lm = lm_w
output = []

for alpha,beta in tqdm(values):
    for i in csv:
        audio_path, reference_path = i.split(",")

        spect = spect_parser.parse_audio(audio_path).contiguous()
        spect = spect.view(1, 1, spect.size(0), spect.size(1))
        spect = spect.to(device)

        input_sizes = torch.IntTensor([spect.size(3)]).int()
        out, output_sizes = model(spect, input_sizes)
        out = out.cpu().detach().numpy()[0]

        if decoding == "greedy": transcript = ctc_best_path(out,labels)
        elif decoding == "beam_w": transcript = ctc_beam_search(out,labels,prune,beam_width,lm,alpha,beta)
        elif decoding == "beam_c": transcript = ctc_beam_search_clm(out,labels,prune,beam_width,lm,alpha=alpha,beta=beta)

        with open(reference_path.replace("\n",""),"r") as f:
            reference = f.readline()
        wer_inst = wer_(transcript,reference)
        cer_inst = cer_(transcript, reference)
        total_wer += wer_inst
        total_cer += cer_inst
        num_tokens += len(reference.split(' '))
        num_chars += len(reference.replace(' ', ''))
    np.save("/home/hemant/ctc_decoders/abw.npy",np.array(output))
    wer = (float(total_wer) / num_tokens)*100
    cer = (float(total_cer) / num_chars)*100
    output.append([alpha,beta,wer])
    print("aplha: ",alpha,"beta: ",beta)
    print('Test Summary \t'
        'Average WER {wer:.3f}\t'
        'Average CER {cer:.3f}\t'.format(wer=wer, cer=cer))