In [4]:
import os
import moviepy.editor as mp
import speech_recognition as sr

def extract_audio(video_file, output_audio_file):
    video = mp.VideoFileClip(video_file)
    video.audio.write_audiofile(output_audio_file)

def convert_mp3_to_wav(mp3_file, output_wav_file):
    audio = mp.AudioFileClip(mp3_file)
    audio.write_audiofile(output_wav_file, codec='pcm_s16le')

def transcribe_audio(wav_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(wav_file) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_sphinx(audio_data)
            return text
        except sr.UnknownValueError:
            print("Sphinx could not understand audio")
            return ""
        except sr.RequestError as e:
            print("Sphinx error; {0}".format(e))
            return ""

def main(video_file):
    # Extract audio from video
    audio_file = os.path.splitext(video_file)[0] + ".mp3"
    extract_audio(video_file, audio_file)

    # Convert MP3 to WAV
    wav_file = os.path.splitext(video_file)[0] + ".wav"
    convert_mp3_to_wav(audio_file, wav_file)

    # Transcribe audio
    transcription = transcribe_audio(wav_file)

    # Write transcription to file
    output_text_file = os.path.splitext(video_file)[0] + "_transcription.txt"
    with open(output_text_file, "w") as text_file:
        text_file.write(transcription)

if __name__ == "__main__":
    video_file = "vid.mp4"  # Provide the video file path directly
    main(video_file)


MoviePy - Writing audio in vid.mp3


                                                                                

MoviePy - Done.
MoviePy - Writing audio in vid.wav


                                                                                

MoviePy - Done.
