# Error Visualization

- Quantifying Errors
	- From Determined Test Cases,  
For each word, measure the number of *correctly* and *incorrectly* interpreted
- Classifying Errors
	- Miss interpretation of the words  
For each word, log the transcription, what is the the misinterpretation of the error  
→ calculate the occurence  
→ most common error for a given word


In [1]:
import os, pathlib
import numpy
import glob
import jiwer
import collections
import helper

# TODO: remove warning, put text preprocessing as helper




In [2]:
class Data:

    def __init__(self, name):
        self.name = name
        self.reference = []
        self.transcription = []

    def preprocess_text(self, text):
        return helper.preprocess_text(text)

    def get_name(self):
        return self.name
    
    def get_reference(self):
        return self.reference

    def get_transcription(self):
        return self.transcription

    def add_reference(self, reference):
        self.reference.append(self.preprocess_text(reference))
    
    def add_transcription(self, transcription):
        self.transcription.append(self.preprocess_text(transcription))

    def add_reference_transcription(self, reference, transcription):
        self.add_reference(reference)
        self.add_transcription(transcription)
    
    def length(self):
        assert len(self.reference) == len(self.transcription)
        return len(self.reference)

    def print_reference_transcription(self, i):
        if i >= 0 and i < len(self.reference):
            print("Reference:   \t: ", self.reference[i])
            print("Transcription: \t: ", self.transcription[i])

    
    def print_head(self):
        self.print_reference_transcription(i=0)

    def print_tail(self):
        self.print_reference_transcription(i=self.length()-1)


In [3]:
def read_librispeech_data():
    data = Data("librispeech")
    root_dir = "LibriSpeech/test-clean/"
    model_dir = "deepspeech"

    for filename in glob.iglob(root_dir + '**/*.trans.txt', recursive=True):
        
        file = open(filename)

        for line in file.readlines():
            idx = line.split()[0]
            reference_text = " ".join(line.split()[1:])

            fid = "/".join(idx.split("-")[:-1]) # idx to file id

            fname = os.path.join(root_dir, fid, idx)
            transcription_path = fname + "." + model_dir + ".transcription.txt"
            if os.path.exists(transcription_path):
                transcription = helper.read_transcription(transcription_path)
                data.add_reference_transcription(reference_text, transcription)
            else:
                raise ValueError("missing transcription: " + transcription_path)

        file.close()
    
    return data


In [4]:
data = read_librispeech_data()
data.print_head()
print()
data.print_tail()

Reference:   	:  eleven oclock had struck it was a fine clear night they were the only persons on the road and they sauntered leisurely along to avoid paying the price of fatigue for the recreation provided for the toledans in their valley or on the banks of their river
Transcription: 	:  eleven oclock had struck it was a fine clear night there were the only persons on the road and they sauntered leisurely along to avoid paying the price of fatigue for the recreation provided for the toledans in the valley or on the banks of their river

Reference:   	:  then the leader parted from the line
Transcription: 	:  then the leader parted from the line


In [5]:
def read_corpus(corpus_fpath: str):
    file = open(corpus_fpath)
    corpus = file.readlines()
    texts = []
    for text in corpus:
        texts.append(text[:-1])

    return texts

def read_crossasr_data():
    data = Data("crossasr")

    transcription_dir = "CrossASR/europarl-seed2021/data/transcription"
    tts_name = "rv"
    asr_name = "deepspeech"
    transcription_dir = os.path.join(transcription_dir, tts_name)
    transcription_dir = os.path.join(transcription_dir, asr_name)

    references = read_corpus("CrossASR/europarl-seed2021/corpus/europarl-20000.txt")
    
    for i in range(len(references)):
        transcription_path = os.path.join(transcription_dir, f"{i+1}.txt")
        transcription = helper.read_transcription(transcription_path)

        data.add_reference_transcription(references[i], transcription)

    return data



In [6]:
data = read_crossasr_data()
data.print_head()
print()
data.print_tail()

Reference:   	:  in the european year for intercultural dialogue we should also recognize the importance of cultural industries in creating awareness and understanding of other cultures and therefore their importance for social cohesion
Transcription: 	:  in the european year for inter cultural dialogue we should also recognize the importance of cultural industries in creating awareness and understanding of other cultures and therefore their importance for social cohesion

Reference:   	:  it must be organised by a single body responsible for ensuring that it is applied comprehensively consistently and effectively
Transcription: 	:  it must be organized by a single body responsible for ensuring that it is applied comprehensively consistently and effectively


In [7]:
from asr_evaluation.asr_evaluation import asr_evaluation

class Analyzer(object):

    
    def __init__(self):
        ## used for caching
        self.name = None
        self.infos = []
        self.word_count = []
        self.word_accuracy = None
        self.common_errors = None

        
    def analyze(self, data: Data):

        ## get from cache if it is already computed before
        if data.get_name() == self.name :
            return self.infos, self.word_count
        
        infos = []
        word_count = collections.Counter()
        
        for reference, transcription, in zip(data.get_reference(), data.get_transcription()) :
            
            ## create statistics for word counter
            word_count += collections.Counter(reference.split())
            
            ## create statistics for errors
            wer = jiwer.wer(reference, transcription)
            if wer != 0:
                evaluation = asr_evaluation.ASREvaluation()
                evaluation.detect_word_error(reference, transcription)
                confusion = evaluation.get_confusions()
                infos.append(
                    {"confusion": confusion, "reference": reference, "transcription": transcription})
        
        ## update the cache
        self.name = data.get_name()
        self.infos = infos
        self.word_count = word_count
        ## delete statistics
        self.word_accuracy = None
        self.common_errors = None

        
        return infos, word_count

    def get_word_accuracy(self, data: Data):
        """Calculate word accuracy, which is the number of error (deletion or subsitution) divided by the number of word count

        """

        ## use caching if it is already computed before
        if data.get_name() == self.name and self.word_accuracy != None:
            return self.word_accuracy

        
        infos, word_count = self.analyze(data)
        error_count = collections.Counter()
        for info in infos:
            confusion = info["confusion"]

            ## get error from word substitution
            if len(confusion["substitution"]) > 0:
                
                curr_error_count = {}
                for i in range(len(confusion["substitution"])):
                    word_reference = confusion["substitution"][i]["word_reference"]
                    count = confusion["substitution"][i]["count"]

                    curr_error_count[word_reference] = count
                
                error_count += collections.Counter(curr_error_count)

            ## get error from word deletion
            if len(confusion["deletion"]) > 0:

                curr_error_count = {}
                for i in range(len(confusion["deletion"])):
                    word_reference = confusion["deletion"][i]["word"]
                    count = confusion["deletion"][i]["count"]

                    curr_error_count[word_reference] = count

                error_count += collections.Counter(curr_error_count)

        
        word_accuracy = {}
        for word in word_count :
            
            ## if the word in the reference_text does not appear in the error word
            ## then all the occurence of the word is correctly predicted 
            if word not in error_count :
                word_accuracy[word] = 100.0 
            else :
                curr_word_accuracy = 100.0 - (100 * round(error_count[word]/word_count[word], 2))
                assert curr_word_accuracy <= 100 and curr_word_accuracy >= 0
                word_accuracy[word] = curr_word_accuracy

        ## sort the word accuracy based on the value
        word_accuracy = dict(sorted(word_accuracy.items(),
                                   key=lambda item: item[1], reverse=True))
        
        ## update cache
        self.word_accuracy = word_accuracy
        
        return word_accuracy


    def save_word_accuracy(self, word_accuracy, fpath):
        os.makedirs(pathlib.Path(fpath).parent.absolute(), exist_ok=True)
        with open(fpath, 'w') as file:
            keys = list(word_accuracy.keys())
            values = list(word_accuracy.values())
            file.write(f"{'Word':15s} {'Accuracy'}")
            for i in range(len(keys)):
                file.write(f"{keys[i]:15s} {values[i]}")


    def print_lowest_word_accuracy(self, word_accuracy, limit=10):
        keys = list(word_accuracy.keys())
        values = list(word_accuracy.values())
        print(f"{'Word':15s} {'Accuracy'}")
        for i in range(len(keys)-limit, len(keys)):
            print(f"{keys[i]:15s} {values[i]}")

    def print_highest_word_accuracy(self, word_accuracy, limit=10):
        keys = list(word_accuracy.keys())
        values = list(word_accuracy.values())
        print(f"{'Word':15s} {'Accuracy'}")
        for i in range(limit):
            print(f"{keys[i]:15s} {values[i]}")


    def get_most_common_errors(self, data: Data):
        
        infos, _ = self.analyze(data)
        
        ## geta result from caching if it is already computed before
        if data.get_name() == self.name and self.common_errors != None:
            return self.common_errors

        ## TODO: use Counter library
        common_errors = {}
        for info in infos:
            confusion = info["confusion"]
            if len(confusion["substitution"]) > 0:
                for i in range(len(confusion["substitution"])):
                    word_reference = confusion["substitution"][i]["word_reference"]
                    word_substitution = confusion["substitution"][i]["word_substitution"]
                    count = confusion["substitution"][i]["count"]
                    if word_reference in common_errors:
                        substitutions = common_errors[word_reference]
                        if word_substitution in substitutions:
                            common_errors[word_reference][word_substitution] = count + \
                                common_errors[word_reference][word_substitution]
                        else:
                            common_errors[word_reference][word_substitution] = count
                    else:
                        common_errors[word_reference] = {
                            word_substitution: count}

        ## TODO : put this in a separate function dict_sorter
        ## sort things inside a substitution error
        for key in common_errors.keys():
            common_errors[key] = dict(sorted(common_errors[key].items(),
                                            key=lambda item: item[1], reverse=True))

        ## sort words based on the highest occurence
        common_errors = dict(sorted(common_errors.items(),
                                        key=lambda item: list(item[1].values())[0], reverse=True))
        
        self.common_errors = common_errors
        
        return common_errors

    def print_common_error(self, common_errors, limit=2):
        count = 0
        print_limit = 16
        for word, common in common_errors.items():
            if count < print_limit :
                print("Word: ", word)
                # print("Substituion: ")
                keys = list(common.keys())
                values = list(common.values())
                for i in range(min(limit, len(keys))):
                    print(f"\t{keys[i]:10s} count: {values[i]}")
            count += 1

    def save_common_errors(self, common_errors, fpath):
        os.makedirs(pathlib.Path(fpath).parent.absolute(), exist_ok=True)
        with open(fpath, 'w') as file:
            for word, common in common_errors.items():
                file.write(f"Word: {word}\n")
                keys = list(common.keys())
                values = list(common.values())
                for i in range(len(keys)):
                    file.write(f"\t{keys[i]:10s} count: {values[i]}\n")
            


In [8]:
analyzer = Analyzer()

data = read_librispeech_data()

word_accuracy = analyzer.get_word_accuracy(data)
fpath = "output/librispeech/word_accuracy.txt"
analyzer.save_word_accuracy(word_accuracy, fpath)

print("=== Lowest Accuracy Rate")
analyzer.print_lowest_word_accuracy(word_accuracy)
print()

print("=== Highest Accuracy Rate")
analyzer.print_highest_word_accuracy(word_accuracy)
print()

=== Lowest Accuracy Rate
Word            Accuracy
mo              0.0
schooled        0.0
iridescent      0.0
toothed         0.0
teal            0.0
frequenter      0.0
clutching       0.0
talons          0.0
sank            0.0
feeding         0.0

=== Highest Accuracy Rate
Word            Accuracy
oclock          100.0
struck          100.0
fine            100.0
clear           100.0
only            100.0
persons         100.0
road            100.0
sauntered       100.0
leisurely       100.0
along           100.0



In [9]:
print("=== Most common errors")
common_errors = analyzer.get_most_common_errors(data)
fpath = "output/librispeech/common_errors.txt"
analyzer.save_common_errors(common_errors, fpath)
analyzer.print_common_error(common_errors)
print()


=== Most common errors
Word:  in
	and        count: 44
	an         count: 3
Word:  a
	the        count: 34
	of         count: 4
Word:  and
	in         count: 28
	a          count: 3
Word:  this
	the        count: 21
	spilling   count: 1
Word:  an
	and        count: 13
	on         count: 1
Word:  too
	to         count: 10
	two        count: 3
Word:  two
	to         count: 8
	lotto      count: 1
Word:  is
	as         count: 7
	his        count: 4
Word:  the
	a          count: 7
	to         count: 2
Word:  uncas
	once       count: 6
	one        count: 1
Word:  thee
	the        count: 6
	he         count: 4
Word:  of
	a          count: 6
	at         count: 2
Word:  anyone
	one        count: 6
Word:  men
	man        count: 6
	then       count: 1
Word:  boolooroo
	bolero     count: 6
	booleroo   count: 4
Word:  has
	had        count: 5
	as         count: 2



In [10]:
data = read_crossasr_data()

analyzer = Analyzer()
word_accuracy = analyzer.get_word_accuracy(data)
fpath = "output/crossasr/word_accuracy.txt"
analyzer.save_word_accuracy(word_accuracy, fpath)


print("=== Lowest Accuracy Rate")
analyzer.print_lowest_word_accuracy(word_accuracy)
print()

print("=== Highest Accuracy Rate")
analyzer.print_highest_word_accuracy(word_accuracy)
print()


=== Lowest Accuracy Rate
Word            Accuracy
ayuso           0.0
overexploited   0.0
oft             0.0
scams           0.0
maroni          0.0
formers         0.0
café            0.0
shah            0.0
masood          0.0
tunisias        0.0

=== Highest Accuracy Rate
Word            Accuracy
dialogue        100.0
should          100.0
cultural        100.0
creating        100.0
understanding   100.0
other           100.0
cultures        100.0
therefore       100.0
union           100.0
international   100.0



In [11]:
print("=== Most common errors")

common_errors = analyzer.get_most_common_errors(data)
fpath = "output/crossasr/common_errors.txt"
analyzer.save_common_errors(common_errors, fpath)

analyzer.print_common_error(common_errors)
print()


=== Most common errors
Word:  and
	in         count: 1842
	an         count: 154
Word:  today
	day        count: 336
	to         count: 8
Word:  is
	as         count: 281
	his        count: 42
Word:  a
	the        count: 281
	of         count: 80
Word:  has
	had        count: 265
	have       count: 8
Word:  favour
	favor      count: 207
	to         count: 1
Word:  rapporteur
	reporter   count: 183
	rapier     count: 16
Word:  cooperation
	operation  count: 163
	ation      count: 1
Word:  member
	members    count: 149
	ember      count: 2
Word:  programmes
	programs   count: 145
	program    count: 7
Word:  s
	as         count: 144
	us         count: 67
Word:  as
	is         count: 140
	his        count: 13
Word:  thank
	think      count: 138
	every      count: 2
Word:  ensure
	insure     count: 128
	insured    count: 8
Word:  mr
	r          count: 123
	m          count: 37
Word:  eu
	e          count: 120
	u          count: 74

