In [1]:
import speech_recognition as sr
import pandas as pd
import os 
from multiprocessing import Pool, cpu_count
from functools import partial
from tqdm import tqdm


# Transcribe a folder of the dataset with a transcriber

In [9]:
def transcribe(song_path, transcriber):
    # if file does not exist
    dest_path = song_path.replace('dataset', transcriber)
    dest_path = dest_path.replace(dest_path.split('.')[-1], 'txt')
    
    if os.path.exists(dest_path):
        return
    r = sr.Recognizer()
    with sr.AudioFile(song_path) as source:
        audio = r.record(source)  # read the entire audio file

    # here depending on transcriber argument we will switch on different methods
    if transcriber == 'sphinx':
        try:
            transcription = r.recognize_sphinx(audio)
        except:
            transcription = ''
    else:
        transcription = ''

    try:
        os.makedirs(os.path.dirname(dest_path))
    except:
        pass

    with open(dest_path, "w") as text_file:
        text_file.write(transcription)  # TODO: controlla
    return


def dataset_transcription(dataset_csv_path, transcriber):
    orig_df = pd.read_csv(dataset_csv_path)
    audio_path_list = orig_df['path'].to_list()
    f_part = partial(transcribe, transcriber=transcriber)
    pool = Pool(cpu_count())
    for _ in tqdm(pool.imap_unordered(f_part, audio_path_list), total=len(audio_path_list)):
        pass
    pool.close()
    return

## Run from here

In [12]:
dataset_name = ['test-clean_train_5'] #['train-clean-360']
for a in dataset_name:
    in_csv_path = '/nas/home/cborrelli/speech_forensics/csv/' + a +'.csv'
    trans = 'sphinx'
    dataset_transcription(in_csv_path, trans)



  0%|          | 0/272 [00:00<?, ?it/s][A[A

  0%|          | 1/272 [00:07<34:13,  7.58s/it][A[A

  1%|          | 2/272 [00:09<27:06,  6.02s/it][A[A

  1%|          | 3/272 [00:10<19:40,  4.39s/it][A[A

  1%|▏         | 4/272 [00:12<16:52,  3.78s/it][A[A

  2%|▏         | 5/272 [00:13<12:46,  2.87s/it][A[A

KeyboardInterrupt: 