# Error Visualization

- Quantifying Errors
	- From Determined Test Cases,  
For each word, measure the number of *correctly* and *incorrectly* interpreted
- Classifying Errors
	- Miss interpretation of the words  
For each word, log the transcription, what is the the misinterpretation of the error  
→ calculate the occurence  
→ most common error for a given word


In [2]:
import os
import numpy
import glob
import jiwer
import copy

def idx_to_file(idx):
    return "/".join(idx.split("-")[:-1])


def read_transcription(fpath):
    file = open(fpath)
    transcription = file.readline()
    file.close()

    return transcription


In [3]:
import jiwer

def preprocess_text(text):
    text = text.lower()
    text = jiwer.RemoveMultipleSpaces()(text)
    text = jiwer.ExpandCommonEnglishContractions()(text)
    text = jiwer.RemoveWhiteSpace(replace_by_space=True)(text) # must remove trailing space after it
    text = jiwer.Strip()(text)
    return text


In [4]:
from asr_evaluation.asr_evaluation import asr_evaluation

In [5]:
root_dir = "LibriSpeech/test-clean/"

model_dir = "deepspeech"
# model_dir = "finetuned_deepspeech"

wers = []
infos = []

### root_dir needs a trailing slash (i.e. /root/dir/)
for filename in glob.iglob(root_dir + '**/*.trans.txt', recursive=True):
    # print(filename)

    # filename = "LibriSpeech/test-clean/61/70968/61-70968.trans.txt"

    
    file = open(filename)

    for line in file.readlines():
    # for line in file.readlines()[0:5]:
        idx = line.split()[0]
        reference_text = " ".join(line.split()[1:])

        fname = os.path.join(root_dir, idx_to_file(idx), idx)
        flac_path = fname + ".flac"
        wav_path = fname + ".wav"
        transcription_path = fname + "." + model_dir + ".transcription.txt"

        if os.path.exists(transcription_path):
            transcription = read_transcription(transcription_path)

            transcription = preprocess_text(transcription)
            reference_text = preprocess_text(reference_text)
            
            wer = jiwer.wer(reference_text, transcription)
            wers.append(wer)
            if wer != 0 :
                evaluation = asr_evaluation.ASREvaluation()
                evaluation.detect_word_error(reference_text, transcription)
                confusion = evaluation.get_confusions()
                infos.append({"confusion" : confusion, "reference_text": reference_text, "transcription": transcription})
        else :
            # continue
            raise ValueError("missing transcription: " + transcription_path)
        
    file.close()

print(f"Average WER: {100 * sum(wers) / len(wers):.2f}%")


Average WER: 8.26%


In [6]:
def print_info(info):
    print("Reference: \t", info["reference_text"])
    print("Transcription: \t", info["transcription"])
    print("Confusion:")
    confusion = info["confusion"]
    if len(confusion["substitution"]) > 0:
        print("\tSubstitution:")
        for i in range(len(confusion["substitution"])):
            print(f"\t\t{confusion['substitution'][i]['word_reference']:10s} -> {confusion['substitution'][i]['word_substitution']:10s} \t count: {confusion['substitution'][i]['count']}")
    if len(confusion["insertion"]) > 0:
        print("\tInsertion:")
        for i in range(len(confusion["insertion"])):
            print(f"\t\t{confusion['insertion'][i]['word']:10s} \t count: {confusion['insertion'][i]['count']}")
    if len(confusion["deletion"]) > 0:
        print("\tDeletion:")
        for i in range(len(confusion["deletion"])):
            print(
                f"\t\t{confusion['deletion'][i]['word']:10s} \t count: {confusion['deletion'][i]['count']}")


In [7]:
print_info(infos[0])

Reference: 	 he knew the silver fleece his and zora is must be ruined
Transcription: 	 he knew the silver fleece his enormous be ruined
Confusion:
	Substitution:
		must       -> enormous   	 count: 1
	Deletion:
		and        	 count: 1
		zora       	 count: 1
		is         	 count: 1


In [8]:
print_info(infos[1])


Reference: 	 he panted to know if she too knew or knew and cared not or cared and knew not
Transcription: 	 he wanted to know if she too knew or knew and cared not or cared and knew not
Confusion:
	Substitution:
		panted     -> wanted     	 count: 1


In [9]:
print_info(infos[5])


Reference: 	 the squares of cotton sharp edged heavy were just about to burst to bolls
Transcription: 	 the squares of cotton sharp edge heavy were just about to burst to bulls
Confusion:
	Substitution:
		edged      -> edge       	 count: 1
		bolls      -> bulls      	 count: 1


In [29]:
substitution_errors = {}

for info in infos:
    confusion = info["confusion"]
    if len(confusion["substitution"]) > 0:
        for i in range(len(confusion["substitution"])):
            word_reference = confusion["substitution"][i]["word_reference"]
            word_substitution = confusion["substitution"][i]["word_substitution"]
            count = confusion["substitution"][i]["count"]
            if word_reference in substitution_errors:
                substitutions = substitution_errors[word_reference] 
                if word_substitution in substitutions :
                    substitution_errors[word_reference][word_substitution] = count + substitution_errors[word_reference][word_substitution]
                else :
                    substitution_errors[word_reference][word_substitution] = count
            else :
                substitution_errors[word_reference] = {word_substitution: count}


## sort things inside a substitution error
for substitution_error in substitution_errors.values():
    substitution_error = dict(sorted(substitution_error.items(),
                key=lambda item: item[1], reverse=True))

## sort words based on the highest occurence
substitution_errors = dict(sorted(substitution_errors.items(),
            key=lambda item: list(item[1].values())[0], reverse=True))

def print_substitution_error(substitution_errors, limit=2):
    for word, substitution in substitution_errors.items() :
        print("Word: ", word)
        print("Substitution: ")
        keys = list(substitution.keys())
        values = list(substitution.values())
        for i in range(min(limit, len(keys))) :
            print(f"\t{keys[i]:10s} count: {values[i]}")


print_substitution_error(substitution_errors)


Word:  in
Substitution: 
	and        count: 44
	living     count: 1
Word:  a
Substitution: 
	the        count: 34
	and        count: 3
Word:  this
Substitution: 
	the        count: 21
	guess      count: 1
Word:  an
Substitution: 
	and        count: 13
	on         count: 1
Word:  too
Substitution: 
	to         count: 10
	two        count: 3
Word:  two
Substitution: 
	to         count: 8
	lotto      count: 1
Word:  the
Substitution: 
	a          count: 7
	me         count: 2
Word:  men
Substitution: 
	man        count: 6
	then       count: 1
Word:  boolooroo
Substitution: 
	bolero     count: 6
	booleroo   count: 4
Word:  anyone
Substitution: 
	one        count: 6
Word:  uncas
Substitution: 
	once       count: 6
	on         count: 1
Word:  color
Substitution: 
	colour     count: 5
Word:  has
Substitution: 
	had        count: 5
	is         count: 2
Word:  it
Substitution: 
	i          count: 5
	was        count: 1
Word:  o
Substitution: 
	of         count: 5
	oh         count: 4
Word:  his