# Benchmark report

## Util functions

In [8]:
import pandas as pd
import json

def make_bold(txt):
    return f'\033[1m{txt}\033[0m'

def gen_results(dataframe, cohort=None):
    engines = ["indic_conformer", "google_speech", "slang_google_speech"]
    
    _df = pd.DataFrame(data={})
    
    wer = []
    intent_metrics = []
    entity_metrics = []
    
    for engine in engines:
        #print(f'{make_bold(engine+":")} {dataframe[engine+"_wer"].mean()}')
        wer.append(dataframe[engine+"_wer"].mean())
    
        
    #print("--"*25+"\n")
    
    for engine in engines:
        with open(f'data/intent_metrics_{engine}_{cohort}.json', 'r') as i:
            _intent_metrics = json.load(i)
            #print(f'{make_bold(engine+" intent f1-score: ")} {intent_metrics["weighted avg"]["f1-score"]}')
            intent_metrics.append(_intent_metrics["weighted avg"]["f1-score"])
    
    #print("--"*25+"\n")
    
    for engine in engines:
        with open(f'data/entity_metrics_{engine}_{cohort}.json', 'r') as e:
            _entity_metrics = json.load(e)
            #print(f'{make_bold(engine+" entity f1-score: ")} {entity_metrics["f1"]}')
            entity_metrics.append(_entity_metrics["f1"])
            
    _df["Engines"] = engines
    _df["Word Error Rate (WER)"] = wer
    _df["Intent Accuracy (IA)"] = intent_metrics
    _df["Entity Accuracy (E"] = entity_metrics
    
    return _df

pd.set_option('display.max_rows', 500)

## Raw transcription + Slang Normalizer comparisons

In [9]:
df = pd.read_csv('data/ASR_quality_metrics_1.csv')

gen_results(df, 1)

Unnamed: 0,Engines,Word Error Rate (WER),Intent Accuracy (IA),Entity Accuracy (E
0,indic_conformer,0.259178,0.697644,0.551685
1,google_speech,0.161722,0.73192,0.63911
2,slang_google_speech,0.161433,0.73192,0.63911


## Slang Normalizer + NeMo comparisons

In [10]:
df2 = pd.read_csv('data/ASR_quality_metrics_2.csv')

gen_results(df2, 2)

Unnamed: 0,Engines,Word Error Rate (WER),Intent Accuracy (IA),Entity Accuracy (E
0,indic_conformer,0.308179,0.724729,0.571271
1,google_speech,0.16487,0.73192,0.638771
2,slang_google_speech,0.163579,0.73192,0.636943


## Slang Normalizer + Number Parser comparisons

In [11]:
df3 = pd.read_csv('data/ASR_quality_metrics_3.csv')

gen_results(df3, 3)

Unnamed: 0,Engines,Word Error Rate (WER),Intent Accuracy (IA),Entity Accuracy (E
0,indic_conformer,0.302173,0.713823,0.579151
1,google_speech,0.161598,0.734378,0.644786
2,slang_google_speech,0.161309,0.734378,0.644786
