In [1]:
import logging

from pydub import AudioSegment

import pandas as pd

# AudioSegment.ffmpeg = "C:\\installed\\ffmpeg\\bin\\ffmpeg.exe"
# AudioSegment.converter = "C:\\installed\\ffmpeg\\bin\\ffmpeg.exe"

from watson_developer_cloud import TextToSpeechV1

import glob
import json
from random import shuffle

from datetime import datetime

import os
import numpy as np

import csv

import global_constants

import re

import function_library

# Rohun's IBM credential
tts = TextToSpeechV1(
    username=global_constants.IBM_USERNAME,
    password=global_constants.IBM_PASSWORD,
)

def synthesize_number(number):
    try:
        byte_audio = tts.synthesize(str(number), accept='audio/wav', voice="en-US_AllisonVoice")
    except Exception as e:
        try:
            byte_audio = tts.synthesize(str(number), accept='audio/wav', voice="en-US_AllisonVoice")
        except Exception as e:
            return AudioSegment.silent(duration=250)

    # The sample_width, frame_rate and the channels are empirical values. No values in documentation.
    sound = AudioSegment(
        data=byte_audio,

        sample_width=2,
        frame_rate=22050,
        channels=1
    )

    return sound


def stich_audio_files(file_list, silence, post_number_silence, location, tag_extension):
    stiched_result = AudioSegment.empty()

    USER_STUDY_OUTPUT_DATA = "user_study_output\\"
    for index, audio_file in enumerate(file_list):
        try:
            audio = AudioSegment.from_file(USER_STUDY_OUTPUT_DATA + location + audio_file + tag_extension, format="wav")
            number = synthesize_number(index + 1)
            stiched_result += silence + number + post_number_silence + audio
        except Exception as e:
            logging.error(str(e))

    return stiched_result



def crop_and_save_to_wav(file_name):
    just_name = file_name.rfind(".")
    audio = AudioSegment.from_file(file_name, format="mp3")[3000:6000]
    
    for value in [0, 10, 20, 30, 40]:
        reduced_audio = audio - value
        reduced_audio.export(file_name[:just_name] + "_" + str(value) + ".wav", format="wav")

        
def decrease_beep_strength():
    beep = AudioSegment.from_file("data_input\\" + 'beep.wav', 'wav')
    for i in range(5):
        beep = beep - 10
        beep.export(out_f="data_input\\beep_" + str(i) + ".wav", format="wav")
        

    
'Reads files with the specified Noise types in allowed values'

def read_files(file_list, allowed_values, columns):
    result_dataframe = pd.DataFrame()
    
    for file_name in file_list:
        data_frame = pd.read_csv(file_name, names=columns, usecols=range(len(columns)))
        if len(data_frame) == 0:
            continue
        
        result_dataframe = result_dataframe.append(data_frame, ignore_index=True)
    
    result_dataframe = result_dataframe[result_dataframe["noise_type"].isin(allowed_values)]
    
    result_dataframe = result_dataframe[result_dataframe["noise"] != -1]
    
    return result_dataframe

In [96]:
'''Parameters that are being used in this exercise'''

# Data Set Iteration number
data_version = "4"

# Captcha Version. Options - 2, 3a, 3b, 4
captcha_type = "4"

if captcha_type != "3b":
    filter_keyword_list = ["REFACTORED_White_YT_VERSION_" + captcha_type, "REFACTORED_White_PODCAST_VERSION_" + captcha_type, "REFACTORED_PODCAST_VERSION_" + captcha_type]
else:
    filter_keyword_list = ["REFACTORED_White_YT_VERSION_" + captcha_type, "REFACTORED_White_PODCAST_VERSION_" + captcha_type, "REFACTORED_PODCAST_VERSION_" + captcha_type, "REFACTORED_REDONE_PODCAST_VERSION_" + captcha_type]

In [97]:
# add tag and time to name and use json to dump file names. json file uses same name.

complete_file_list = []
for filter_keyword in filter_keyword_list:
    complete_file_list.extend(glob.glob("logs/*selected*" + filter_keyword + ".csv"))

if captcha_type == "4":
    column_word = ["name", "version", "original_text", "noise", "complete", "source_type", "noise_type", "transcript", "reduced_word"]
else:
    column_word = ["name", "version", "start", "end", "original_text", "noise", "first_word_easy", "first_confidence", "second_confidence", "source_type", "noise_type", "first_word", "second_word"]

file_dataframe= read_files(complete_file_list, ["White"], column_word)

In [98]:
def create_ten(file_dataframe, source, data_version, captcha_type):
    'Copies the N files selected and stored in the log file from the specified source location to "vX" destination folder'
    
    NUMBER_TO_SELECT = 1000
    
    destination = "Test_data\\v" + data_version + "\\c" + captcha_type + "_" + str(datetime.now().timestamp()).replace(".","")
    os.makedirs(destination, exist_ok = True)

    dictionary_gt = []
    audio_name_list = []
    rows = [["Name", "Captcha", "Word_Detected", "Text_Detected"]]
   
    for index, row in file_dataframe.head(NUMBER_TO_SELECT).iterrows():
        audio_file = row["name"]    
        
        if captcha_type == "4":
            transcript = row["reduced_word"]
        else: 
            if row["first_word_easy"]:
                transcript = row["first_word"]
            else:
                transcript = row["second_word"]
        
        dictionary_gt.append({"audio" : audio_file + ".wav", "gt" : transcript})
        audio_name_list.append({"audio" : audio_file + ".wav"})
        rows.append([audio_file, captcha_type])
        
        audio = AudioSegment.from_file(source + audio_file + ".wav", format="wav")
        output_path = os.path.join(destination, audio_file + ".wav")
        audio.export(output_path, format="wav")
        
    gt_folder = "Test_data\\v" + data_version + "\\gt_data"
    os.makedirs(gt_folder, exist_ok = True)
    json.dump(dictionary_gt, open(os.path.join(gt_folder, "gt" + captcha_type + ".json"), "w"))
    
    audio_folder = "Test_data\\v" + data_version + "\\audioname"
    os.makedirs(audio_folder, exist_ok = True)
    json.dump(audio_name_list, open(os.path.join(audio_folder, "aname" + captcha_type + ".json"), "w"))
    
    selection_folder = "Test_data\\v" + data_version + "\\selection"
    os.makedirs(selection_folder, exist_ok = True)
    with open(os.path.join(selection_folder, "sample" + captcha_type + ".csv"), "w", newline="") as sample_file:
        csv.writer(sample_file).writerows(rows)

In [99]:
print(len(file_dataframe))

# chunk_source = "/home/riot/Desktop/AudioCaptcha/audio_data/data_chunk_stage/podcast_lecture"
chunk_source = "/home/riot/Desktop/AudioCaptcha/audio_data/data_chunk_stage/lecture"


# create_ten(file_dataframe, "C:\\Users\\IBM_ADMIN\\speech_recognition\\data_output_selected\\", data_version, captcha_type)

def extract_file_list():
    '''Select the entries that pass manual filtering'''
    
    if captcha_type == "2":
        file_name = "/home/riot/Desktop/AudioCaptcha/speech-recognition/Test_Data/v3_3/audioname/aname2.json"
        audio_dict = json.load(open(file_name, "r"))
        return [entry["audio"] for entry in audio_dict]

    elif captcha_type == "3b":
        
        # For 3b the data was combined from two iterations of data production.
        
        file_name = "/home/riot/Desktop/AudioCaptcha/speech-recognition/Test_Data/v3_3/audioname/aname3b.json"
        audio_dict = json.load(open(file_name, "r"))
        return [entry["audio"] for entry in audio_dict]
    
    elif captcha_type == "4":
        file_name = "/home/riot/Desktop/AudioCaptcha/speech-recognition/Test_Data/v3_3/audioname/aname4.json"
        audio_dict = json.load(open(file_name, "r"))
        return [entry["audio"] for entry in audio_dict]
    else:
        raise Exception("Unsupported")

    

def add_noise_and_export(output_directory, noise_to_add, processed_type, audio):
    '''Overlay noise. Export to output folder.'''
    silence = AudioSegment.silent(duration=250)
        
    type_2 = silence + audio + silence
    type_2 = type_2.overlay(noise_to_add, loop=True)

    output = os.path.join(output_directory, os.path.join("from_c" + captcha_type, os.path.join("c" + processed_type, "file_" + str(index) + file_format)))
    type_2.export(output, format="wav")
    
# set output directories.    
output_directory = "/home/riot/Desktop/AudioCaptcha/speech-recognition/Test_Data/v4"
audio_list = extract_file_list()
print(len(audio_list))

file_format = ".wav"

85
33


In [107]:
audio_entries_not_found = []

gt_rows = []
if captcha_type == "3b":

    # for each audio item, get row.
    regex = ".*_chunk_\\d*"

    noise = AudioSegment.from_file(os.path.join("parameter_input", "noise.wav"), format="wav")
        
    for index, audio_entry in enumerate(sorted(audio_list)):

        audio_name = audio_entry.rstrip(file_format)
        row = file_dataframe[file_dataframe.name == audio_name]
        
        print(audio_entry)

        name = re.match(regex, audio_name).group(0)

        start = row.start.iloc[0]
        end = row.end.iloc[0]
        noise_for_row = noise + row.noise.iloc[0]
        
        if row.first_word_easy.iloc[0]:
            gt = row.first_word.iloc[0]
            weak_word = [row.second_word.iloc[0]]
        else:
            gt = row.second_word.iloc[0]
            weak_word = [row.first_word.iloc[0]]

        gt_rows.append([index, audio_entry, gt, str(weak_word)])
        
        try:
            original_audio = AudioSegment.from_file(os.path.join(chunk_source, name + file_format), "wav")
            audio = original_audio[start * 1000:end * 1000]
            
            audio.export("archive/tmp/abcd_efgh.wav", "wav")
            
            result = function_library.transcribe_robustly("archive/tmp/abcd_efgh.wav")
            result_dictionary = function_library.get_dict(result)
            word_object_list = function_library.get_word_list(result_dictionary)

            last_two_word_start = None
            
            print("\ntarget word : " + row.first_word.iloc[0])
            for word_object in reversed(word_object_list):
                print(word_object.word)
                if word_object.word == row.first_word.iloc[0]:
                    last_two_word_start = word_object.start_time
                    print("Word Found")
                    break
            
            print(last_two_word_start)
            
            if last_two_word_start is None:
                if len(word_object_list) > 1:
                    print(word_object_list[-1].start_time)
                    print(word_object_list[-2].start_time)
                    last_two_word_start = word_object_list[-2].start_time
                else:
                    last_two_word_start = max(0, end - 1)
 
        except FileNotFoundError as fE:
            audio_entries_not_found.append(name + file_format)
            continue

        print(audio_entry)

        # captcha type 2
        audio = original_audio[(start + last_two_word_start) * 1000:end * 1000]
        add_noise_and_export(output_directory, noise_for_row, '2', audio)

        # captcha type 3
        audio = original_audio[start * 1000:end * 1000]
        add_noise_and_export(output_directory, noise_for_row, '3', audio)

        # captcha type 4
        min_end = min(25, end + 3)
        audio = original_audio[start * 1000:min_end * 1000]
        add_noise_and_export(output_directory, noise_for_row, "4", audio)
        
elif captcha_type == "4":
    # for each audio item, get row.
    clip_source = "/home/riot/Desktop/AudioCaptcha/speech-recognition/Test_Data/v3_USED/c4_1499150924805736"

    for index, audio_entry in enumerate(sorted(audio_list)):
        
        audio_name = audio_entry.rstrip(file_format)
        row = file_dataframe[file_dataframe.name == audio_name]

        gt_rows.append([index, audio_entry, row.reduced_word.iloc[0]])
        
        try:
            original_audio = AudioSegment.from_file(os.path.join(clip_source, audio_entry), "wav")
        except FileNotFoundError as fE:
            audio_entries_not_found.append(audio_entry)
            continue

        print(audio_entry)

        # captcha type 4
        output = os.path.join(output_directory, os.path.join("from_c" + captcha_type, os.path.join("c4", "file_" + str(index) + file_format)))
        original_audio.export(output, format="wav")

elif captcha_type == "2":

    # for each audio item, get row.

    file_format = ".wav"
    regex = ".*_chunk_\\d*"

    noise = AudioSegment.from_file(os.path.join("parameter_input", "noise.wav"), format="wav")

    for index, audio_entry in enumerate(sorted(audio_list)):

        audio_name = audio_entry.rstrip(file_format)
        row = file_dataframe[file_dataframe.name == audio_name]

        name = re.match(regex, audio_name).group(0)

        start = row.start.iloc[0]
        end = row.end.iloc[0]
        noise_for_row = noise + row.noise.iloc[0]
        
        if row.first_word_easy.iloc[0]:
            gt = row.first_word.iloc[0]
            weak_word = [row.second_word.iloc[0]]
        else:
            gt = row.second_word.iloc[0]
            weak_word = [row.first_word.iloc[0]]

        gt_rows.append([index, audio_entry, gt, str(weak_word)])

        try:
            original_audio = AudioSegment.from_file(os.path.join(chunk_source, name + file_format), "wav")
        except FileNotFoundError as fE:
            audio_entries_not_found.append(name + file_format)
            continue

        print(audio_entry)

        # captcha type 2
        audio = original_audio[start * 1000:end * 1000]
        add_noise_and_export(output_directory, noise_for_row, '2', audio)

        # captcha type 3
        max_start = max(start - 3, 0)
        audio = original_audio[max_start * 1000:end * 1000]
        add_noise_and_export(output_directory, noise_for_row, '3', audio)

        # captcha type 4
        min_end = min(25, end + 3)
        audio = original_audio[max_start * 1000:min_end * 1000]
        add_noise_and_export(output_directory, noise_for_row, "4", audio)

gt_file = os.path.join(output_directory, os.path.join("from_c" + captcha_type, "gt.csv"))
with open(gt_file, "w", newline="") as fp: 
    csv.writer(fp).writerows(gt_rows)

American_Civil_War_chunk_54_1497156637776667_count_0_noise_28_noise_type_White.wav
American_revolution_lecture_chunk_107_1497534345207332_count_2_noise_23_noise_type_White.wav
American_revolution_lecture_chunk_47_1497156637776667_count_3_noise_15_noise_type_White.wav
FinneginsWake_chunk_129_1499080254532628_count_0_noise_7_noise_type_White.wav
Google_IO_2017_chunk_196_1497534345207332_count_1_noise_19_noise_type_White.wav
Google_IO_2017_chunk_32_1497156637776667_count_0_noise_23_noise_type_White.wav
Google_IO_2017_chunk_96_1497156637776667_count_0_noise_18_noise_type_White.wav
History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_111_1499080254532628_count_0_noise_0_noise_type_White.wav
History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_129_1499080254532628_count_0_noise_0_noise_type_White.wav
Mcluhan-Mckenna_1_chunk_27_1499080254532628_count_1_noise_26_noise_type_White.wav
Nikola_Tesla_chunk_15_1497534345

In [106]:
audio_list

['American_Civil_War_chunk_54_1497156637776667_count_0_noise_28_noise_type_White.wav',
 'American_revolution_lecture_chunk_107_1497534345207332_count_2_noise_23_noise_type_White.wav',
 'American_revolution_lecture_chunk_47_1497156637776667_count_3_noise_15_noise_type_White.wav',
 'FinneginsWake_chunk_129_1499080254532628_count_0_noise_7_noise_type_White.wav',
 'Google_IO_2017_chunk_196_1497534345207332_count_1_noise_19_noise_type_White.wav',
 'Google_IO_2017_chunk_32_1497156637776667_count_0_noise_23_noise_type_White.wav',
 'Google_IO_2017_chunk_96_1497156637776667_count_0_noise_18_noise_type_White.wav',
 'History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_111_1499080254532628_count_0_noise_0_noise_type_White.wav',
 'History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_129_1499080254532628_count_0_noise_0_noise_type_White.wav',
 'Mcluhan-Mckenna_1_chunk_27_1499080254532628_count_1_noise_26_noise_type_Whit

In [32]:
complete_file_list = glob.glob("user_study_output\\user_study_initial_output\\*.wav")
shuffle(complete_file_list)

dbfs_list = []

In [88]:
import sys
sys.version

'3.6.1 |Anaconda 4.4.0 (64-bit)| (default, May 11 2017, 13:09:58) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [33]:
for file_entry in complete_file_list:
    audio = AudioSegment.from_file(file_entry, format="wav")
    dbfs_list.append(audio.dBFS)

In [47]:
mean = np.mean(dbfs_list)
std = np.std(dbfs_list)

print(mean)
print(std)

number = 0
for dbfs_value in dbfs_list:
    number += 1 if dbfs_value > (mean + std) else 0
#     if dbfs_value > (mean + std):
#         print(dbfs_value)
print((mean + std))
print(number)

-15.930842618
4.49456667099
-11.436275947
169


In [12]:
'uses selected_file_data for creating a stiched audio file'

def create_stiched_output(selected_file_data, output_tag):  
    output_file_prefix = "stiched_output\\study" + output_tag

    json.dump(selected_file_data, open(output_file_prefix + ".json", "w"))

    silence = AudioSegment.silent(duration=1000)
    post_number_silence = AudioSegment.silent(duration=500)

    source_location = "reduced_confidence\\"

    for tagged_format in ["_W_B.wav", "_O_B.wav", ".wav"]:

        f = stich_audio_files(selected_file_data, silence, post_number_silence, source_location, tagged_format)

        f.export(out_f="stiched_output\\study" + output_tag + tagged_format, format="wav")

        print("Done for {}", tagged_format)

In [35]:
# Bin the video accordoing to the initial confidence of the strong word.

def add_to_appropriate_df(original, variable_set):
    for df_dict in variable_set:

        first_criteria = (original.first_word_easy == True) & (original.first_confidence > df_dict["low"]) & (original.first_confidence <= df_dict["high"])
        second_criteria = (original.first_word_easy == False) & (original.second_confidence > df_dict["low"]) & (original.second_confidence <= df_dict["high"])
        
        df_dict["df"] = df_dict["df"].append(original[first_criteria])
        df_dict["df"] = df_dict["df"].append(original[second_criteria])
    
    return variable_set


variable_set = []

step = 5
for counter in range(70, 100, step):
    variable_set.append({"low" : float(counter)/100, "high" : float(counter + step)/100, "df" : pd.DataFrame()})
    
variable_set = add_to_appropriate_df(file_dataframe, variable_set)

output_tag = "_Two_Words_" + str(datetime.now().timestamp()).replace(".","")

def create_audio_by_confidence():

    for df_dict in variable_set:
        df = df_dict["df"]
        if len(df) == 0:
            continue

        extended_tag = output_tag + "_Low_" + str(df_dict["low"]) + "_High_" + str(df_dict["high"])
        selected_file_list = list(df.name)[:Number_of_entries]

        create_stiched_output(selected_file_list, extended_tag)

        print("Done for : ", extended_tag)

def create_for_complete_list():
    # shuffle(complete_file_data)
    create_stiched_output(list(file_dataframe.name), output_tag)
    
create_for_complete_list()

In [None]:
def stich_audio_alternate_files():
    """Stiches 5 second random char Google CAPTCHAs to create 10 second CAPTCHAs"""
    
    items = json.loads('''[ {"audio":"output_149449394454942.wav", "gt":"89016"},
  {"audio":"output_149449657320933.wav", "gt":"43417"},
  {"audio":"output_1494487771961015.wav", "gt":"81330"},
  {"audio":"output_1494493574786677.wav", "gt":"62384"},
  {"audio":"output_1494496272313691.wav", "gt":"73592"},
  {"audio":"output_1494496294872982.wav", "gt":"10105"},
  {"audio":"output_1494496519281245.wav", "gt":"38695"},
  {"audio":"output_1494496601054923.wav", "gt":"80168"},
  {"audio":"output_1494496664266538.wav", "gt":"85780"},
  {"audio":"output_1494497754558948.wav", "gt":"99358"},
  {"audio":"output_1494497804519805.wav", "gt":"08884"},
  {"audio":"output_1494497859133929.wav", "gt":"47232"},
  {"audio":"output_1494497882023238.wav", "gt":"93916"},
  {"audio":"output_1494497934197222.wav", "gt":"24001"}]''') 
    
    list1 = []
    
    for first, second in zip(items[0:7], items[7:]):    
        stiched_result = AudioSegment.empty()

        c1_source_data = "Test_Data\\v1\\c1\\Google_Captcha_Demo_5_CHAR\\"
        
        c1_output_data = "Test_Data\\v3_2\\c1\\"
        
        audio = AudioSegment.from_file(c1_source_data + first["audio"], format="wav")
        stiched_result += audio
        audio = AudioSegment.from_file(c1_source_data + second["audio"], format="wav")
        stiched_result += audio
        
        gt = first["gt"] + second["gt"]
        
        list1.append({"audio" : first["audio"], "gt" : gt})
        
        
        # stiched_result.export(c1_output_data + first["audio"], "wav")
    print(str(list1))

stich_audio_alternate_files()

In [85]:
def create_filtered_aname_file(file_text, data_version, captcha_type):

    audio_name_list = []
   
    for audio_file in file_text.split("\n"):
        audio_name_list.append({"audio" : audio_file.strip()})

    audio_folder = "Test_Data/v" + data_version + "/audioname"
    os.makedirs(audio_folder, exist_ok = True)
    json.dump(audio_name_list, open(os.path.join(audio_folder, "aname" + captcha_type + ".json"), "w"))
    
    
create_filtered_aname_file('''American_Civil_War_chunk_54_1497156637776667_count_0_noise_28_noise_type_White.wav
American_revolution_lecture_chunk_107_1497534345207332_count_2_noise_23_noise_type_White.wav
American_revolution_lecture_chunk_47_1497156637776667_count_3_noise_15_noise_type_White.wav
FinneginsWake_chunk_129_1499080254532628_count_0_noise_7_noise_type_White.wav
Google_IO_2017_chunk_196_1497534345207332_count_1_noise_19_noise_type_White.wav
Google_IO_2017_chunk_32_1497156637776667_count_0_noise_23_noise_type_White.wav
Google_IO_2017_chunk_96_1497156637776667_count_0_noise_18_noise_type_White.wav
History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_111_1499080254532628_count_0_noise_0_noise_type_White.wav
History_4A_Fall_2007_UC_Berkeley_Lecture_24_Monarchy_at_Rome_The_Age_of_Augustus_20476_chunk_129_1499080254532628_count_0_noise_0_noise_type_White.wav
Mcluhan-Mckenna_1_chunk_27_1499080254532628_count_1_noise_26_noise_type_White.wav
Nikola_Tesla_chunk_15_1497534345207332_count_0_noise_22_noise_type_White.wav
Nikola_Tesla_chunk_30_1497156637776667_count_1_noise_7_noise_type_White.wav
Shakespeare_chunk_38_1497156637776667_count_0_noise_23_noise_type_White.wav
Shakespeare_chunk_60_1497156637776667_count_0_noise_10_noise_type_White.wav
TerenceMckenna-TrueHallucinations02-16_chunk_58_1499080254532628_count_0_noise_14_noise_type_White.wav
TerenceMckenna-TrueHallucinations04-16_chunk_57_1499080254532628_count_0_noise_25_noise_type_White.wav
TerenceMckenna-TrueHallucinations06-16_chunk_63_1499080254532628_count_0_noise_21_noise_type_White.wav
TerenceMckenna-TrueHallucinations06-16_chunk_97_1499080254532628_count_0_noise_13_noise_type_White.wav
TerenceMckenna-TrueHallucinations08-16_chunk_21_1499080254532628_count_0_noise_0_noise_type_White.wav
TerenceMckenna-TrueHallucinations08-16_chunk_3_1499080254532628_count_1_noise_17_noise_type_White.wav
TerenceMckenna-TrueHallucinations08-16_chunk_41_1499080254532628_count_0_noise_9_noise_type_White.wav
TerenceMckenna-TrueHallucinations08-16_chunk_82_1499080254532628_count_0_noise_6_noise_type_White.wav
TerenceMckenna-TrueHallucinations08-16_chunk_82_1499080254532628_count_1_noise_26_noise_type_White.wav
TerenceMckenna-TrueHallucinations09-16_chunk_63_1499080254532628_count_0_noise_11_noise_type_White.wav
TerenceMckenna-TrueHallucinations09-16_chunk_64_1499080254532628_count_0_noise_7_noise_type_White.wav
TerenceMckenna-TrueHallucinations10-16_chunk_14_1499080254532628_count_0_noise_13_noise_type_White.wav
TerenceMckenna-TrueHallucinations10-16_chunk_14_1499080254532628_count_1_noise_19_noise_type_White.wav
TerenceMckenna-TrueHallucinations12-16_chunk_63_1499080254532628_count_0_noise_29_noise_type_White.wav
TerenceMckenna-TrueHallucinations12-16_chunk_64_1499080254532628_count_0_noise_18_noise_type_White.wav
TerenceMckenna-TrueHallucinations15-16_chunk_46_1499080254532628_count_0_noise_31_noise_type_White.wav
TheVoynichManuscript_chunk_117_1499080254532628_count_1_noise_0_noise_type_White.wav
Western_Philosophy_chunk_58_1497534345207332_count_0_noise_15_noise_type_White.wav
Yoshua_Bengio_chunk_59_1497156637776667_count_1_noise_0_noise_type_White.wav''', "3_3", "4")    