# Step 2.14: Getting Descriptive Statistics

This code will produce a number of descriptive statistics about the CSVs finalized in Step 2.13, including:

1. Percent of feature/non-feature correctness in ASR outputs
2. Bias ratio for feature/non-feature correctness in ASR outputs
3. Word Error Rate (WER) for feature/non-feature in ASR outputs

## Required Packages

The following packages are necessary to run this code: os, [pandas](https://pypi.org/project/pandas/)

## Intitial Setup

In [None]:
# Import required packages
import pandas as pd
import os

In [None]:
#filepath for the csv produced in Step 2.11
aint_file_path = "path"

be_file_path = "path"

done_file_path = "path"

#reads in the gold standard dataframe    
aint_gs_df = pd.read_csv(aint_file_path)

be_gs_df = pd.read_csv(be_file_path)

done_gs_df = pd.read_csv(done_file_path)

# Habitual/Non-Habitual Be

### Create Dataframes

In [None]:
# Create a dataframe of only habitual be instances
habitual_df = be_gs_df[be_gs_df['Habituality']==1]

# Create a dataframe of only non-habitual be instances
non_habitual_df = be_gs_df[be_gs_df['Habituality']==0]

### Get Correct/Incorrect Percentages

In [None]:
# Non-Habitual Be

amazon_non_habitual_incorrect = len(non_habitual_df[non_habitual_df['amazon_transcription_cleaned_correctness']==0])/len(non_habitual_df)*100
amazon_non_habitual_correct = len(non_habitual_df[non_habitual_df['amazon_transcription_cleaned_correctness']==1])/len(non_habitual_df)*100

deepspeech_non_habitual_incorrect = len(non_habitual_df[non_habitual_df['deepspeech_transcription_cleaned_correctness']==0])/len(non_habitual_df)*100
deepspeech_non_habitual_correct = len(non_habitual_df[non_habitual_df['deepspeech_transcription_cleaned_correctness']==1])/len(non_habitual_df)*100

google_non_habitual_incorrect = len(non_habitual_df[non_habitual_df['google_transcription_cleaned_correctness']==0])/len(non_habitual_df)*100
google_non_habitual_correct = len(non_habitual_df[non_habitual_df['google_transcription_cleaned_correctness']==1])/len(non_habitual_df)*100

IBMWatson_non_habitual_incorrect = len(non_habitual_df[non_habitual_df['IBMWatson_transcription_cleaned_correctness']==0])/len(non_habitual_df)*100
IBMWatson_non_habitual_correct = len(non_habitual_df[non_habitual_df['IBMWatson_transcription_cleaned_correctness']==1])/len(non_habitual_df)*100

microsoft_non_habitual_incorrect = len(non_habitual_df[non_habitual_df['microsoft_transcription_cleaned_correctness']==0])/len(non_habitual_df)*100
microsoft_non_habitual_correct = len(non_habitual_df[non_habitual_df['microsoft_transcription_cleaned_correctness']==1])/len(non_habitual_df)*100


# Habitual Be

amazon_habitual_incorrect = len(habitual_df[habitual_df['amazon_transcription_cleaned_correctness']==0])/len(habitual_df)*100
amazon_habitual_correct = len(habitual_df[habitual_df['amazon_transcription_cleaned_correctness']==1])/len(habitual_df)*100

deepspeech_habitual_incorrect = len(habitual_df[habitual_df['deepspeech_transcription_cleaned_correctness']==0])/len(habitual_df)*100
deepspeech_habitual_correct = len(habitual_df[habitual_df['deepspeech_transcription_cleaned_correctness']==1])/len(habitual_df)*100

google_habitual_incorrect = len(habitual_df[habitual_df['google_transcription_cleaned_correctness']==0])/len(habitual_df)*100
google_habitual_correct = len(habitual_df[habitual_df['google_transcription_cleaned_correctness']==1])/len(habitual_df)*100

IBMWatson_habitual_incorrect = len(habitual_df[habitual_df['IBMWatson_transcription_cleaned_correctness']==0])/len(habitual_df)*100
IBMWatson_habitual_correct = len(habitual_df[habitual_df['IBMWatson_transcription_cleaned_correctness']==1])/len(habitual_df)*100

microsoft_habitual_incorrect = len(habitual_df[habitual_df['microsoft_transcription_cleaned_correctness']==0])/len(habitual_df)*100
microsoft_habitual_correct = len(habitual_df[habitual_df['microsoft_transcription_cleaned_correctness']==1])/len(habitual_df)*100

### Create Lists

In [None]:
amazon_habitual_list = [amazon_habitual_correct, amazon_habitual_incorrect]
amazon_non_habitual_list = [amazon_non_habitual_correct, amazon_non_habitual_incorrect]

deepspeech_habitual_list = [deepspeech_habitual_correct, deepspeech_habitual_incorrect]
deepspeech_non_habitual_list = [deepspeech_non_habitual_correct, deepspeech_non_habitual_incorrect]

google_habitual_list = [google_habitual_correct, google_habitual_incorrect]
google_non_habitual_list = [google_non_habitual_correct, google_non_habitual_incorrect]

IBMWatson_habitual_list = [IBMWatson_habitual_correct, IBMWatson_habitual_incorrect]
IBMWatson_non_habitual_list = [IBMWatson_non_habitual_correct, IBMWatson_non_habitual_incorrect]

microsoft_habitual_list = [microsoft_habitual_correct, microsoft_habitual_incorrect]
microsoft_non_habitual_list = [microsoft_non_habitual_correct, microsoft_non_habitual_incorrect]

### Create Percentage Dataframes

In [None]:
habitual_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])
non_habitual_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])

### Add Information to Percentage Dataframes

In [None]:
habitual_percentage_df['amazon'] = amazon_habitual_list
habitual_percentage_df['deepspeech'] = deepspeech_habitual_list
habitual_percentage_df['google'] = google_habitual_list
habitual_percentage_df['IBMWatson'] = IBMWatson_habitual_list
habitual_percentage_df['microsoft'] = microsoft_habitual_list

non_habitual_percentage_df['amazon'] = amazon_non_habitual_list
non_habitual_percentage_df['deepspeech'] = deepspeech_non_habitual_list
non_habitual_percentage_df['google'] = google_non_habitual_list
non_habitual_percentage_df['IBMWatson'] = IBMWatson_non_habitual_list
non_habitual_percentage_df['microsoft'] = microsoft_non_habitual_list

habitual_percentage_df = habitual_percentage_df.round(2)
non_habitual_percentage_df = non_habitual_percentage_df.round(2)

### Create Bias Ratio Dataframe

In [None]:
#correct percentage of non-habitual divided by habitual
be_bias_ratio_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["Non-Habitual/Habitual"])

    
try:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'amazon'] = amazon_non_habitual_correct/amazon_habitual_correct

except ZeroDivisionError:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'amazon'] = "No Habitual Be correct"

try:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'deepspeech'] = deepspeech_non_habitual_correct/deepspeech_habitual_correct
    
except ZeroDivisionError:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'deepspeech'] = "No Habitual Be correct"
    
try:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'google'] = google_non_habitual_correct/google_habitual_correct
    
except ZeroDivisionError:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'google'] = "No Habitual Be correct"

try:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'IBMWatson'] = IBMWatson_non_habitual_correct/IBMWatson_habitual_correct
    
except ZeroDivisionError:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'IBMWatson'] = "No Habitual Be correct"
    
try:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'microsoft'] = microsoft_non_habitual_correct/microsoft_habitual_correct
    
except ZeroDivisionError:
    be_bias_ratio_df.at["Non-Habitual/Habitual", 'microsoft'] = "No Habitual Be correct"

### Create Word Error Rate Dataframe

In [None]:
habitual_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])
non_habitual_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])


# Non-Habitual Be WER

non_habitual_pre_post_WER_df.at["pre-feature", "amazon"]= non_habitual_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_habitual_pre_post_WER_df.at["post-feature", "amazon"]= non_habitual_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_habitual_pre_post_WER_df.at["pre-feature", "deepspeech"]= non_habitual_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_habitual_pre_post_WER_df.at["post-feature", "deepspeech"]= non_habitual_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_habitual_pre_post_WER_df.at["pre-feature", "google"]= non_habitual_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_habitual_pre_post_WER_df.at["post-feature", "google"]= non_habitual_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_habitual_pre_post_WER_df.at["pre-feature", "IBMWatson"]= non_habitual_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_habitual_pre_post_WER_df.at["post-feature", "IBMWatson"]= non_habitual_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_habitual_pre_post_WER_df.at["pre-feature", "microsoft"]= non_habitual_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_habitual_pre_post_WER_df.at["post-feature", "microsoft"]= non_habitual_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)


# Habitual Be WER

habitual_pre_post_WER_df.at["pre-feature", "amazon"]= habitual_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
habitual_pre_post_WER_df.at["post-feature", "amazon"]= habitual_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

habitual_pre_post_WER_df.at["pre-feature", "deepspeech"]= habitual_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
habitual_pre_post_WER_df.at["post-feature", "deepspeech"]= habitual_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

habitual_pre_post_WER_df.at["pre-feature", "google"]= habitual_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
habitual_pre_post_WER_df.at["post-feature", "google"]= habitual_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

habitual_pre_post_WER_df.at["pre-feature", "IBMWatson"]= habitual_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
habitual_pre_post_WER_df.at["post-feature", "IBMWatson"]= habitual_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

habitual_pre_post_WER_df.at["pre-feature", "microsoft"]= habitual_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
habitual_pre_post_WER_df.at["post-feature", "microsoft"]= habitual_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)

# Completive/Non-Completive Done

### Create Dataframes

In [None]:
# Create a dataframe of only completive done instances
completive_df = done_gs_df[done_gs_df['Completive']==1]

# Create a dataframe of only non-completive done instances
non_completive_df = done_gs_df[done_gs_df['Completive']==0]

### Get Correct/Incorrect Percentages

In [None]:
# Non-Completive Done

amazon_non_completive_incorrect = len(non_completive_df[non_completive_df['amazon_transcription_cleaned_correctness']==0])/len(non_completive_df)*100
amazon_non_completive_correct = len(non_completive_df[non_completive_df['amazon_transcription_cleaned_correctness']==1])/len(non_completive_df)*100

deepspeech_non_completive_incorrect = len(non_completive_df[non_completive_df['deepspeech_transcription_cleaned_correctness']==0])/len(non_completive_df)*100
deepspeech_non_completive_correct = len(non_completive_df[non_completive_df['deepspeech_transcription_cleaned_correctness']==1])/len(non_completive_df)*100

google_non_completive_incorrect = len(non_completive_df[non_completive_df['google_transcription_cleaned_correctness']==0])/len(non_completive_df)*100
google_non_completive_correct = len(non_completive_df[non_completive_df['google_transcription_cleaned_correctness']==1])/len(non_completive_df)*100

IBMWatson_non_completive_incorrect = len(non_completive_df[non_completive_df['IBMWatson_transcription_cleaned_correctness']==0])/len(non_completive_df)*100
IBMWatson_non_completive_correct = len(non_completive_df[non_completive_df['IBMWatson_transcription_cleaned_correctness']==1])/len(non_completive_df)*100

microsoft_non_completive_incorrect = len(non_completive_df[non_completive_df['microsoft_transcription_cleaned_correctness']==0])/len(non_completive_df)*100
microsoft_non_completive_correct = len(non_completive_df[non_completive_df['microsoft_transcription_cleaned_correctness']==1])/len(non_completive_df)*100


# Completive Done

amazon_completive_incorrect = len(completive_df[completive_df['amazon_transcription_cleaned_correctness']==0])/len(completive_df)*100
amazon_completive_correct = len(completive_df[completive_df['amazon_transcription_cleaned_correctness']==1])/len(completive_df)*100

deepspeech_completive_incorrect = len(completive_df[completive_df['deepspeech_transcription_cleaned_correctness']==0])/len(completive_df)*100
deepspeech_completive_correct = len(completive_df[completive_df['deepspeech_transcription_cleaned_correctness']==1])/len(completive_df)*100

google_completive_incorrect = len(completive_df[completive_df['google_transcription_cleaned_correctness']==0])/len(completive_df)*100
google_completive_correct = len(completive_df[completive_df['google_transcription_cleaned_correctness']==1])/len(completive_df)*100

IBMWatson_completive_incorrect = len(completive_df[completive_df['IBMWatson_transcription_cleaned_correctness']==0])/len(completive_df)*100
IBMWatson_completive_correct = len(completive_df[completive_df['IBMWatson_transcription_cleaned_correctness']==1])/len(completive_df)*100

microsoft_completive_incorrect = len(completive_df[completive_df['microsoft_transcription_cleaned_correctness']==0])/len(completive_df)*100
microsoft_completive_correct = len(completive_df[completive_df['microsoft_transcription_cleaned_correctness']==1])/len(completive_df)*100

### Create Lists

In [None]:
amazon_completive_list = [amazon_completive_correct, amazon_completive_incorrect]
amazon_non_completive_list = [amazon_non_completive_correct, amazon_non_completive_incorrect]

deepspeech_completive_list = [deepspeech_completive_correct, deepspeech_completive_incorrect]
deepspeech_non_completive_list = [deepspeech_non_completive_correct, deepspeech_non_completive_incorrect]

google_completive_list = [google_completive_correct, google_completive_incorrect]
google_non_completive_list = [google_non_completive_correct, google_non_completive_incorrect]

IBMWatson_completive_list = [IBMWatson_completive_correct, IBMWatson_completive_incorrect]
IBMWatson_non_completive_list = [IBMWatson_non_completive_correct, IBMWatson_non_completive_incorrect]

microsoft_completive_list = [microsoft_completive_correct, microsoft_completive_incorrect]
microsoft_non_completive_list = [microsoft_non_completive_correct, microsoft_non_completive_incorrect]

### Create Percentage Dataframes

In [None]:
completive_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])
non_completive_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])

### Add Information to Percentage Dataframes

In [None]:
completive_percentage_df['amazon'] = amazon_completive_list
completive_percentage_df['deepspeech'] = deepspeech_completive_list
completive_percentage_df['google'] = google_completive_list
completive_percentage_df['IBMWatson'] = IBMWatson_completive_list
completive_percentage_df['microsoft'] = microsoft_completive_list

non_completive_percentage_df['amazon'] = amazon_non_completive_list
non_completive_percentage_df['deepspeech'] = deepspeech_non_completive_list
non_completive_percentage_df['google'] = google_non_completive_list
non_completive_percentage_df['IBMWatson'] = IBMWatson_non_completive_list
non_completive_percentage_df['microsoft'] = microsoft_non_completive_list

completive_percentage_df = completive_percentage_df.round(2)
non_completive_percentage_df = non_completive_percentage_df.round(2)

### Create Bias Ratio Dataframe

In [None]:
#correct percentage of non-completive done divided by completive done
done_bias_ratio_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["Non-Completive/Completive"])


try:
    done_bias_ratio_df.at["Non-Completive/Completive", 'amazon'] = amazon_non_completive_correct/amazon_completive_correct

except ZeroDivisionError:
    done_bias_ratio_df.at["Non-Completive/Completive", 'amazon'] = "No Completive Done correct"

try:
    done_bias_ratio_df.at["Non-Completive/Completive", 'deepspeech'] = deepspeech_non_completive_correct/deepspeech_completive_correct
    
except ZeroDivisionError:
    done_bias_ratio_df.at["Non-Completive/Completive", 'deepspeech'] = "No Completive Done correct"
    
try:
    done_bias_ratio_df.at["Non-Completive/Completive", 'google'] = google_non_completive_correct/google_completive_correct
    
except ZeroDivisionError:
    done_bias_ratio_df.at["Non-Completive/Completive", 'google'] = "No Completive Done correct"

try:
    done_bias_ratio_df.at["Non-Completive/Completive", 'IBMWatson'] = IBMWatson_non_completive_correct/IBMWatson_completive_correct
    
except ZeroDivisionError:
    done_bias_ratio_df.at["Non-Completive/Completive", 'IBMWatson'] = "No Completive Done correct"
    
try:
    done_bias_ratio_df.at["Non-Completive/Completive", 'microsoft'] = microsoft_non_completive_correct/microsoft_completive_correct
    
except ZeroDivisionError:
    done_bias_ratio_df.at["Non-Completive/Completive", 'microsoft'] = "No Completive Done correct"

### Create Word Error Rate Dataframe

In [None]:
# Done WER

completive_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])
non_completive_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])


# Non-Completive Done WER

non_completive_pre_post_WER_df.at["pre-feature", "amazon"]= non_completive_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_completive_pre_post_WER_df.at["post-feature", "amazon"]= non_completive_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_completive_pre_post_WER_df.at["pre-feature", "deepspeech"]= non_completive_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_completive_pre_post_WER_df.at["post-feature", "deepspeech"]= non_completive_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_completive_pre_post_WER_df.at["pre-feature", "google"]= non_completive_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_completive_pre_post_WER_df.at["post-feature", "google"]= non_completive_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_completive_pre_post_WER_df.at["pre-feature", "IBMWatson"]= non_completive_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_completive_pre_post_WER_df.at["post-feature", "IBMWatson"]= non_completive_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_completive_pre_post_WER_df.at["pre-feature", "microsoft"]= non_completive_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_completive_pre_post_WER_df.at["post-feature", "microsoft"]= non_completive_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)


# Completive Done WER

completive_pre_post_WER_df.at["pre-feature", "amazon"]= completive_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
completive_pre_post_WER_df.at["post-feature", "amazon"]= completive_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

completive_pre_post_WER_df.at["pre-feature", "deepspeech"]= completive_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
completive_pre_post_WER_df.at["post-feature", "deepspeech"]= completive_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

completive_pre_post_WER_df.at["pre-feature", "google"]= completive_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
completive_pre_post_WER_df.at["post-feature", "google"]= completive_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

completive_pre_post_WER_df.at["pre-feature", "IBMWatson"]= completive_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
completive_pre_post_WER_df.at["post-feature", "IBMWatson"]= completive_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

completive_pre_post_WER_df.at["pre-feature", "microsoft"]= completive_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
completive_pre_post_WER_df.at["post-feature", "microsoft"]= completive_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)

# Ain't/Non-Aint

### Create Dataframes

In [None]:
# Create a dataframe of only completive done instances
aint_df = aint_gs_df[aint_gs_df['Aint_NonAint']==1]

# Create a dataframe of only non-completive done instances
non_aint_df = aint_gs_df[aint_gs_df['Aint_NonAint']==0]

### Get Correct/Incorrect Percentages

In [None]:
# Non-ain't

amazon_non_aint_incorrect = len(non_aint_df[non_aint_df['amazon_transcription_cleaned_correctness']==0])/len(non_aint_df)*100
amazon_non_aint_correct = len(non_aint_df[non_aint_df['amazon_transcription_cleaned_correctness']==1])/len(non_aint_df)*100

deepspeech_non_aint_incorrect = len(non_aint_df[non_aint_df['deepspeech_transcription_cleaned_correctness']==0])/len(non_aint_df)*100
deepspeech_non_aint_correct = len(non_aint_df[non_aint_df['deepspeech_transcription_cleaned_correctness']==1])/len(non_aint_df)*100

google_non_aint_incorrect = len(non_aint_df[non_aint_df['google_transcription_cleaned_correctness']==0])/len(non_aint_df)*100
google_non_aint_correct = len(non_aint_df[non_aint_df['google_transcription_cleaned_correctness']==1])/len(non_aint_df)*100

IBMWatson_non_aint_incorrect = len(non_aint_df[non_aint_df['IBMWatson_transcription_cleaned_correctness']==0])/len(non_aint_df)*100
IBMWatson_non_aint_correct = len(non_aint_df[non_aint_df['IBMWatson_transcription_cleaned_correctness']==1])/len(non_aint_df)*100

microsoft_non_aint_incorrect = len(non_aint_df[non_aint_df['microsoft_transcription_cleaned_correctness']==0])/len(non_aint_df)*100
microsoft_non_aint_correct = len(non_aint_df[non_aint_df['microsoft_transcription_cleaned_correctness']==1])/len(non_aint_df)*100


# aint

amazon_aint_incorrect = len(aint_df[aint_df['amazon_transcription_cleaned_correctness']==0])/len(aint_df)*100
amazon_aint_correct = len(aint_df[aint_df['amazon_transcription_cleaned_correctness']==1])/len(aint_df)*100

deepspeech_aint_incorrect = len(aint_df[aint_df['deepspeech_transcription_cleaned_correctness']==0])/len(aint_df)*100
deepspeech_aint_correct = len(aint_df[aint_df['deepspeech_transcription_cleaned_correctness']==1])/len(aint_df)*100

google_aint_incorrect = len(aint_df[aint_df['google_transcription_cleaned_correctness']==0])/len(aint_df)*100
google_aint_correct = len(aint_df[aint_df['google_transcription_cleaned_correctness']==1])/len(aint_df)*100

IBMWatson_aint_incorrect = len(aint_df[aint_df['IBMWatson_transcription_cleaned_correctness']==0])/len(aint_df)*100
IBMWatson_aint_correct = len(aint_df[aint_df['IBMWatson_transcription_cleaned_correctness']==1])/len(aint_df)*100

microsoft_aint_incorrect = len(aint_df[aint_df['microsoft_transcription_cleaned_correctness']==0])/len(aint_df)*100
microsoft_aint_correct = len(aint_df[aint_df['microsoft_transcription_cleaned_correctness']==1])/len(aint_df)*100

### Create Lists

In [None]:
amazon_aint_list = [amazon_aint_correct, amazon_aint_incorrect]
amazon_non_aint_list = [amazon_non_aint_correct, amazon_non_aint_incorrect]

deepspeech_aint_list = [deepspeech_aint_correct, deepspeech_aint_incorrect]
deepspeech_non_aint_list = [deepspeech_non_aint_correct, deepspeech_non_aint_incorrect]

google_aint_list = [google_aint_correct, google_aint_incorrect]
google_non_aint_list = [google_non_aint_correct, google_non_aint_incorrect]

IBMWatson_aint_list = [IBMWatson_aint_correct, IBMWatson_aint_incorrect]
IBMWatson_non_aint_list = [IBMWatson_non_aint_correct, IBMWatson_non_aint_incorrect]

microsoft_aint_list = [microsoft_aint_correct, microsoft_aint_incorrect]
microsoft_non_aint_list = [microsoft_non_aint_correct, microsoft_non_aint_incorrect]

### Create Percentage Dataframes

In [None]:
aint_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])
non_aint_percentage_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["correct", "incorrect"])

### Add Information to Percentage Dataframes

In [None]:
aint_percentage_df['amazon'] = amazon_aint_list
aint_percentage_df['deepspeech'] = deepspeech_aint_list
aint_percentage_df['google'] = google_aint_list
aint_percentage_df['IBMWatson'] = IBMWatson_aint_list
aint_percentage_df['microsoft'] = microsoft_aint_list

non_aint_percentage_df['amazon'] = amazon_non_aint_list
non_aint_percentage_df['deepspeech'] = deepspeech_non_aint_list
non_aint_percentage_df['google'] = google_non_aint_list
non_aint_percentage_df['IBMWatson'] = IBMWatson_non_aint_list
non_aint_percentage_df['microsoft'] = microsoft_non_aint_list

aint_percentage_df = aint_percentage_df.round(2)
non_aint_percentage_df = non_aint_percentage_df.round(2)

### Create Bias Ratio Dataframe

In [None]:
#correct percentage of non-aint divided by aint
aint_bias_ratio_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["Non-aint/aint"])


try:
    aint_bias_ratio_df.at["Non-aint/aint", 'amazon'] = amazon_non_aint_correct/amazon_aint_correct

except ZeroDivisionError:
    aint_bias_ratio_df.at["Non-aint/aint", 'amazon'] = "No ain't correct"

try:
    aint_bias_ratio_df.at["Non-aint/aint", 'deepspeech'] = deepspeech_non_aint_correct/deepspeech_aint_correct
    
except ZeroDivisionError:
    aint_bias_ratio_df.at["Non-aint/aint", 'deepspeech'] = "No ain't correct"
    
try:
    aint_bias_ratio_df.at["Non-aint/aint", 'google'] = google_non_aint_correct/google_aint_correct
    
except ZeroDivisionError:
    aint_bias_ratio_df.at["Non-aint/aint", 'google'] = "No ain't correct"

try:
    aint_bias_ratio_df.at["Non-aint/aint", 'IBMWatson'] = IBMWatson_non_aint_correct/IBMWatson_aint_correct
    
except ZeroDivisionError:
    aint_bias_ratio_df.at["Non-aint/aint", 'IBMWatson'] = "No ain't correct"
    
try:
    aint_bias_ratio_df.at["Non-aint/aint", 'microsoft'] = microsoft_non_aint_correct/microsoft_aint_correct
    
except ZeroDivisionError:
    aint_bias_ratio_df.at["Non-aint/aint", 'microsoft'] = "No ain't correct"

### Create Word Error Rate Dataframe

In [None]:
# WER

aint_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])
non_aint_pre_post_WER_df = pd.DataFrame(columns = ["amazon", "deepspeech", "google", "IBMWatson", "microsoft"], index = ["pre-feature", "post-feature"])


# Non-aint WER

non_aint_pre_post_WER_df.at["pre-feature", "amazon"]= non_aint_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_aint_pre_post_WER_df.at["post-feature", "amazon"]= non_aint_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_aint_pre_post_WER_df.at["pre-feature", "deepspeech"]= non_aint_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_aint_pre_post_WER_df.at["post-feature", "deepspeech"]= non_aint_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_aint_pre_post_WER_df.at["pre-feature", "google"]= non_aint_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_aint_pre_post_WER_df.at["post-feature", "google"]= non_aint_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_aint_pre_post_WER_df.at["pre-feature", "IBMWatson"]= non_aint_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_aint_pre_post_WER_df.at["post-feature", "IBMWatson"]= non_aint_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

non_aint_pre_post_WER_df.at["pre-feature", "microsoft"]= non_aint_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
non_aint_pre_post_WER_df.at["post-feature", "microsoft"]= non_aint_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)


# aint WER

aint_pre_post_WER_df.at["pre-feature", "amazon"]= aint_df['amazon_transcription_cleaned_preFeature_WER'].mean(skipna=True)
aint_pre_post_WER_df.at["post-feature", "amazon"]= aint_df['amazon_transcription_cleaned_postFeature_WER'].mean(skipna=True)

aint_pre_post_WER_df.at["pre-feature", "deepspeech"]= aint_df['deepspeech_transcription_cleaned_preFeature_WER'].mean(skipna=True)
aint_pre_post_WER_df.at["post-feature", "deepspeech"]= aint_df['deepspeech_transcription_cleaned_postFeature_WER'].mean(skipna=True)

aint_pre_post_WER_df.at["pre-feature", "google"]= aint_df['google_transcription_cleaned_preFeature_WER'].mean(skipna=True)
aint_pre_post_WER_df.at["post-feature", "google"]= aint_df['google_transcription_cleaned_postFeature_WER'].mean(skipna=True)

aint_pre_post_WER_df.at["pre-feature", "IBMWatson"]= aint_df['IBMWatson_transcription_cleaned_preFeature_WER'].mean(skipna=True)
aint_pre_post_WER_df.at["post-feature", "IBMWatson"]= aint_df['IBMWatson_transcription_cleaned_postFeature_WER'].mean(skipna=True)

aint_pre_post_WER_df.at["pre-feature", "microsoft"]= aint_df['microsoft_transcription_cleaned_preFeature_WER'].mean(skipna=True)
aint_pre_post_WER_df.at["post-feature", "microsoft"]= aint_df['microsoft_transcription_cleaned_postFeature_WER'].mean(skipna=True)

## Exporting Dataframes to CSV Files

This will export the dataframes to CSV files.

In [None]:
# Designate the output path where the CSVs will be stored
csv_output_path = "path"

### Feature: Ain't

In [None]:
aint_percentage_df.to_csv(f"{csv_output_path}aint_variations_percentCorrect.csv")
non_aint_percentage_df.to_csv(f"{csv_output_path}nonAint_variations_percentCorrect.csv")

aint_bias_ratio_df.to_csv(f"{csv_output_path}aint_variations_biasRatio.csv")

aint_pre_post_WER_df.to_csv(f"{csv_output_path}aint_variations_summaryWER.csv")
non_aint_pre_post_WER_df.to_csv(f"{csv_output_path}nonAint_variations_summaryWER.csv")

### Feature: Be

In [None]:
habitual_percentage_df.to_csv(f"{csv_output_path}habitualBe_percentCorrect.csv")
non_habitual_percentage_df.to_csv(f"{csv_output_path}nonHabitualBe_percentCorrect.csv")

be_bias_ratio_df.to_csv(f"{csv_output_path}be_biasRatio.csv")

habitual_pre_post_WER_df.to_csv(f"{csv_output_path}habitualBe_summaryWER.csv")
non_habitual_pre_post_WER_df.to_csv(f"{csv_output_path}nonHabitualBe_summaryWER.csv")

### Feature: Done

In [None]:
completive_percentage_df.to_csv(f"{csv_output_path}completiveDone_percentCorrect.csv")
non_completive_percentage_df.to_csv(f"{csv_output_path}nonCompletiveDone_percentCorrect.csv")

done_bias_ratio_df.to_csv(f"{csv_output_path}done_biasRatio.csv")

completive_pre_post_WER_df.to_csv(f"{csv_output_path}completiveDone_summaryWER.csv")
non_completive_pre_post_WER_df.to_csv(f"{csv_output_path}nonCompletiveDone_summaryWER.csv")