<a href="https://colab.research.google.com/github/suhanitatiya/Automated-Transcript-generator/blob/main/video_to_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Create a new folder in your drive named "Video_to_text" in the root directory to save the outputs of the videos.

Please make sure to mount drive first. Click on the folder icon on the sidebar and select "Mount Drive". <br>
OR <br>
Please run the following code cell and continue to next steps.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!apt-get install ffmpeg

In [None]:
# necessary requirements :
# moviepy : extract audio from video
# whisper : ASR
# gdown : download files from Google drive
!pip install moviepy openai-whisper gdown

In [None]:
import os
import moviepy.editor as mp
import whisper
import gdown

Insert the video links below.<br>
Please make sure you have set the access to the links as "Anyone with the link".

In [None]:
video_links = [
    "https://drive.google.com/file/d/1tai6Jch97ywaDYUfjdPf9fk_eFeqfFp8/view?usp=sharing"
    ]

Insert the path to the directory on the drive where you would like to save the outputs.

In [None]:
DRIVE_OUTPUTS_DIR = "/content/drive/MyDrive/Video_to_text"

In [None]:
ASR_MODEL_NAME = "small"             # available models(increasing complexity): "tiny", "base", "small", "medium", "large"
OUTPUTS_DIR_NAME = "outputs"         # Directory to save the text files

In [None]:
OUTPUTS_DIR = os.path.join(DRIVE_OUTPUTS_DIR, OUTPUTS_DIR_NAME)

In [None]:
video_paths = []

In [None]:
for url in video_links:
  v_path = gdown.download(url, fuzzy=True)
  video_paths.append(v_path)

In [None]:
def convert_video_to_audio(video_path, audio_path):
    """Converts a video file to an audio file."""
    video = mp.VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path)


In [None]:
def transcribe_audio_to_text(audio_path, model):
    """Transcribes audio to text using OpenAI Whisper."""
    result = model.transcribe(audio_path)
    return result['text']

In [None]:
def process_videos(video_paths, output_dir, model):
    """Processes each video to convert it to text."""

    for video_path in video_paths:
        video_filename = os.path.basename(video_path)
        video_name, _ = os.path.splitext(video_filename)

        output_subdir = os.path.join(output_dir, video_name)
        os.makedirs(output_subdir, exist_ok=True)

        audio_path = os.path.join(output_subdir, video_name + '.wav')
        text_path = os.path.join(output_subdir, video_name + '.txt')

        print(f"\nProcessing video: {video_path}")

        # Convert video to audio
        convert_video_to_audio(video_path, audio_path)

        # Convert audio to text
        text = transcribe_audio_to_text(audio_path, model)

        # Save text to file
        with open(text_path, 'w') as text_file:
            text_file.write(text)

        print(f"Finished processing video: {video_path}")
        print(f"Saved the transcripts to : {text_path}")


In [None]:

output_dir =  OUTPUTS_DIR

print(f'\nStarting the video to text process...\n')

# Load the Whisper model
model = whisper.load_model(ASR_MODEL_NAME)

process_videos(video_paths, output_dir, model)


Ouputs will be saved to your drive folder.