In [1]:
# the library that will be used for this project.
# Use the pip install command to install the library in the terminal

# Libaries to install :
# pip install pytube
# pip install moviepy
# pip install SpeechRecognition
# pip install pydub
# pip install git+https://github.com/openai/whisper.git

In [2]:
# Code block that creates the directories required if they don't exist
import os
# Create the directories if they don't exist
output_audio_dir = "./audios/"
output_vidoe_dir = "./videos/"
if not os.path.exists(output_audio_dir):
    os.makedirs(output_audio_dir)
if not os.path.exists(output_vidoe_dir):
    os.makedirs(output_vidoe_dir)

In [6]:
# Code to download the video
from pytube import YouTube


def download_video(url, output_path):
    '''
    This function is used to download the video from the youtube.
    
    Parameters:
    url (str): The url of the video that will be downloaded
    output_path (str): The path where the video will be saved
    
    Returns:
    None
    '''
    try:
        yt = YouTube(url) # Create a YouTube object
        stream = yt.streams.get_highest_resolution() # Get the highest resolution stream
        stream.download(output_path) # Download the stream
        print("Download successful") 
    except Exception as e:
        print(f"Failed to download: {e}")

vid_to_download = "https://www.youtube.com/watch?v=-S9OmJlMRFI" # This is the video that will be downloaded
output_path_video = "./videos/" # This is the path where the video will be saved

download_video(vid_to_download, output_path_video) # Calling the function that downloads the video




Download successful


In [27]:
# Code to get the audio form the video file

from moviepy.editor import VideoFileClip

def extract_audio(video_path, audio_path):
    '''
    This function is used to extract the audio from the video.
    
    Parameters:
    video_path (str): The path of the video that will be extracted
    audio_path (str): The path where the audio will be saved
    
    Returns:
    None
    '''
    video = VideoFileClip(video_path) # Create a VideoFileClip object
    audio = video.audio # Extract the audio
    audio.write_audiofile(audio_path) # Write the audio to the specified path
    print("Audio extraction successful")
    
path_video = "./videos/video.mp4" # differnet path that indicates the video itself
output_audio_path = "./audios/audio.mp3" # This is the path where the audio will be saved
extract_audio(path_video, output_audio_path) # Calling the function that extracts the audio


chunk:   1%|          | 24/4564 [42:19<133:27:30, 105.83s/it, now=None]

MoviePy - Writing audio in ./audios/audio.mp3


chunk:   1%|          | 24/4564 [42:23<133:38:29, 105.97s/it, now=None]

MoviePy - Done.
Audio extraction successful


In [10]:
import whisper
from pydub import AudioSegment
import os
from tqdm import tqdm  # Import tqdm for the progress bar

def transcribe_with_whisper(audio_path, model_size="tiny"):
    try:
        # Load the model
        model = whisper.load_model(model_size)

        # Ensure the temporary directory exists
        temp_dir = "./temp/"
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        # Splitting the audio into 10-second chunks
        audio = AudioSegment.from_mp3(audio_path)
        chunks = make_chunks(audio, 10000)  # 10-second chunks

        full_transcription = []
        for i, chunk in tqdm(enumerate(chunks), total=len(chunks), desc="Transcribing Chunks"):
            # Export chunk to temporary file
            chunk_file = f"{temp_dir}chunk{i}.mp3"
            chunk.export(chunk_file, format="mp3")

            # Transcribe the chunk
            result = model.transcribe(chunk_file)
            full_transcription.append(result["text"])

        # Join all transcribed text with line breaks for better readability
        return "\n".join(full_transcription).strip()
    except Exception as e:
        print(f"Error during transcription: {e}")
        return ""

def make_chunks(audio, chunk_length_ms):
    # This function splits the audio into chunks of specified length in milliseconds
    return [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]

audio_file_path = "./audios/audio.mp3"
transcription = transcribe_with_whisper(audio_file_path, "tiny")
print(transcription)

Transcribing Chunks: 100%|██████████| 21/21 [01:03<00:00,  3.02s/it]

뭐.. 멋있다oi 노래いや 이렇게 화를 다 arrived 이 곡은 너무 멋있 TS approached
 😍
 It's not a silly little moment.
 It's not storm for the car This is a deep and time breath I It's all that we've worked in our end
 Getting no more to lack of one too So I can feel you in my arms But it's gonna come and save
 you, where Voldemort's a man who follows the lives we're going down. You can see it.
 We're going down and in order we're doing the magic
 You were so dancing in the burning room
 I'm going to do a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a little bit of a lit


