In [13]:
import csv
import os
import glob




In [14]:

def calculate_average_wer(csv_files):
    total_wer = 0
    num_samples = 0

    for csv_file in csv_files:
        try:
            with open(csv_file, 'r', newline='', encoding='utf-8') as file:
                reader = csv.reader(file)
                header = next(reader)  # Read header row
                wer_index = header.index('WER')  # Find index of 'WER' column

                for row in reader:
                    wer = float(row[wer_index])
                    total_wer += wer
                    num_samples += 1
        except UnicodeDecodeError:
            print(f'UnicodeDecodeError encountered in file {csv_file}, trying with ISO-8859-1 encoding.')
            with open(csv_file, 'r', newline='', encoding='ISO-8859-1') as file:
                reader = csv.reader(file)
                header = next(reader)  # Read header row
                wer_index = header.index('WER')  # Find index of 'WER' column

                for row in reader:
                    wer = float(row[wer_index])
                    total_wer += wer
                    num_samples += 1

    if num_samples > 0:
        average_wer = total_wer / num_samples
        return average_wer
    else:
        return 0

In [15]:
def append_average_wer_to_csv(pattern, average_wer, output_csv):
    file_exists = os.path.isfile(output_csv)
    with open(output_csv, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(['Pattern', 'Average WER'])
        writer.writerow([pattern, average_wer])

def process_files_with_pattern(pattern, input_path, output_csv):
    search_pattern = f'{pattern}-*.csv'
    input_file = os.path.join(input_path, search_pattern)

    matching_files = glob.glob(input_file)
    if not matching_files:
        print(f'No files found for pattern {pattern}')
        return

    average_wer = calculate_average_wer(matching_files)
    append_average_wer_to_csv(pattern, average_wer, output_csv)
    print(f'Average WER for pattern {pattern} saved to {output_csv}')



In [16]:
# Example usage:
input_path = r"outputs\libri_dataset_outputs\noise_wer\without_group"
output_csv = r"outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv"
dataset_folder = r"dataset\LibriSpeech\test-clean"

for level1 in [f.name for f in os.scandir(dataset_folder) if f.is_dir()]:
    pattern = level1
    process_files_with_pattern(pattern, input_path, output_csv)

Average WER for pattern 260 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 2830 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
UnicodeDecodeError encountered in file outputs\libri_dataset_outputs\noise_wer\without_group\2961-961.csv, trying with ISO-8859-1 encoding.
Average WER for pattern 2961 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 3570 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 3575 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 3729 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 4077 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 4446 saved to outputs\libri_dataset_outputs\no

Average WER for pattern 8230 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 8455 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 8463 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 8555 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
Average WER for pattern 908 saved to outputs\libri_dataset_outputs\noise_wer\grouped\noise_added_WER_output.csv
