In [9]:
# see documentation https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python
# Prerequisites: 
# - install pocketsphinx 
# - install SpeechRecognition pydub 
# - and possibly install swig and ffmpeg - add to env variables

import speech_recognition as sr
import os
import glob
from pydub import AudioSegment
from pydub.silence import split_on_silence

# a function that returns the paths to the audio
def get_audio_paths(folder_path):
    filepath_input_list = []

    for filename in glob.iglob("{}".format(folder_path), recursive=True):
        if os.path.isfile(filename): # filter dirs
            #print(filename)
            if filename.endswith(".wav"):
                filepath_input_list.append(filename)
    
    return filepath_input_list

# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_sphinx(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    
    f = open(os.path.splitext(path)[0] + "{}".format(".txt"),"w+")
    f.write(whole_text)
    f.close()
    # return the text for all chunks detected
    return whole_text

if __name__ == "__main__":
    root_dir = "C:\\Users\\adria\\OneDrive - Universität Zürich UZH\\2021_FS\\Interactive Video Retrieval\\Project\\Video_DB\\**"
    filepath_list = get_audio_paths(root_dir)

    # create a speech recognition object
    r = sr.Recognizer()
    
    # create the transcripts
    for name in filepath_list:
        get_large_audio_transcription(name)

audio-chunks\chunk1.wav : That was jason of c. affiliate we are and what is at least half but it also for two journalists for food walls in revenues were so and now found new phones here what are those bottles of night the outpouring we're out he had and that and home and i think he thought i sit here and had that read what he and bits and it is a voyage of the small business that and which son in law he he has has a high social and and i like to offer things at home who choose it i my son's the anticipated to that he even if it's just my block radius said i hope you i hope it's my own life while sue how hydroponic farm in this possibly or as how we're in the backyard home i've reached out to the man that has been seeing in the wall and it is that comes hands and to leave things like that in the belief and and hope that individuals to stressful situations end it now we've got the late war and what he has won't live show what what what and does it won't do it when to good to what and fe

audio-chunks\chunk66.wav : Yeah i'll be back to the reinhold that. 
audio-chunks\chunk67.wav : Isolates woody's voice did. 
audio-chunks\chunk68.wav : No way an least my ideal into space on some kind of empirical facts. 
audio-chunks\chunk69.wav : They get them. 
audio-chunks\chunk70.wav : The receives got much now. 
audio-chunks\chunk71.wav : This one. 
audio-chunks\chunk72.wav : Hello leaves office wine trying out. 
audio-chunks\chunk73.wav : No. 
audio-chunks\chunk74.wav : It's a whining. 
audio-chunks\chunk75.wav : Fuck. 
audio-chunks\chunk76.wav : The end of christians allow ponds that. 
audio-chunks\chunk77.wav : Uh huh. 
audio-chunks\chunk78.wav : And i'd blow ha-ha-ha it he. 
audio-chunks\chunk79.wav : Automobiles are could've done. 
audio-chunks\chunk80.wav : And. 
audio-chunks\chunk81.wav : Yeah. 
audio-chunks\chunk82.wav : I never swear. 
audio-chunks\chunk83.wav : I'm not. 
audio-chunks\chunk84.wav : Must it always noise to in here. 
audio-chunks\chunk85.wav : About them wi

audio-chunks\chunk41.wav : They'll file citizens of the city of office what'd they fit with you susan one vote won't harm you almost flawless. 
audio-chunks\chunk42.wav : You know source of food gets to stoke bishop. 
audio-chunks\chunk43.wav : Who said it is sure to a to develop ticket subject of questions all good can with specific to the winds fog. 
audio-chunks\chunk44.wav : It will save officials multicultural south korea efficient. 
audio-chunks\chunk45.wav : Contorted officials in prison more themselves up to them off it's about the future for the engine will show the forgiveness as shown here than in the new film of yours from whose didn't do the who the puppet show has the will. 
audio-chunks\chunk46.wav : That love for his house in the pictures were to them. 
audio-chunks\chunk47.wav : In most of the children of his shoulders of instead of one of the verses to love. 
audio-chunks\chunk48.wav : The wishes of the food chain of disruptive for showed corpse was found but what is 

audio-chunks\chunk114.wav : He said to go to the sickles kind of vote is of a g. filled with inches of. 
audio-chunks\chunk115.wav : The bush hopefulness up with that is to sequels of bash to fished with his sickles that was richard. 
audio-chunks\chunk116.wav : The sofas again waters of tall do the job. 
audio-chunks\chunk117.wav : Who was it because the more you don't rush me jump in and focusing to reject auschwitz in which shows in circles. 
audio-chunks\chunk118.wav : Vigil for to the view that that is included. 
audio-chunks\chunk119.wav : That they do up close and also. 
audio-chunks\chunk120.wav : I told you his service for social lubricant bush. 
audio-chunks\chunk121.wav : Beautiful film which is what. 
audio-chunks\chunk122.wav : A delegation to visit us what the shop. 
audio-chunks\chunk123.wav : The the who pleasure to watch it does it's really the midst of sickle the judge. 
audio-chunks\chunk124.wav : But all the fifth one who put the physician who would fill the truth t

audio-chunks\chunk3.wav : Did d. and hand did do and you live when it could go up to us live head that they've one and that i know what a death is now up the phone and its allies who serve so and and who have to sleep so we get this that their beliefs have is the theory of love for stuff is that enough of the of love the type totally up at a i think the government that this is what in that and i can call. 
audio-chunks\chunk1.wav : Look we have. 
audio-chunks\chunk2.wav : Hey have that to have. 
audio-chunks\chunk3.wav : What. 
audio-chunks\chunk4.wav : Oh. 
audio-chunks\chunk5.wav : Oh. 
audio-chunks\chunk6.wav : One of the plan i had an in depth to it that he that. 
audio-chunks\chunk7.wav : Hey hey hey they called them who died a month is shall have the do. 
audio-chunks\chunk8.wav : The. 
audio-chunks\chunk9.wav : Had go alone. 
audio-chunks\chunk10.wav : And you have a right that one capsule and i was and how. 
audio-chunks\chunk11.wav : Not yet. 
audio-chunks\chunk12.wav : And ye