In [3]:
# !python -m venv venv
# !pip install -r requirements.txt
# pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113

In [4]:
import os
# Create the directories if they don't exist
output_audio_dir = "./audios/"
if not os.path.exists(output_audio_dir):
    os.makedirs(output_audio_dir)


In [5]:
# Code to download the video
from pytube import YouTube
from tqdm import tqdm  

def download_video(url, output_path):
    '''
    This function is used to download the video from the youtube.
    
    Parameters:
    url (str): The url of the video that will be downloaded
    output_path (str): The path where the video will be saved
    
    Returns:
    None
    '''
    try:
        audiofilename = "audio.mp3"
        yt = YouTube(url) # Create a YouTube object
        stream =  yt.streams.filter(only_audio=True).first() # Get the highest resolution stream
        tqdm(stream.download(output_path=output_path, filename=audiofilename)) # Download the stream
        print("Download successful") 
    except Exception as e:
        print(f"Failed to download: {e}")

vid_to_download = input("Enter the url of the video that you want to download: ")

output_path_video = "./audios/" # This is the path where the video will be saved

download_video(vid_to_download, output_path_video) # Calling the function that downloads the video




  0%|          | 0/44 [00:00<?, ?it/s]

Download successful





In [None]:
import whisper
import torch

def transcribe_with_whisper(audio_path):
    ''' 
    This function is used to transcribe the audio file using the whisper model.
    
    Parameters:
    audio_path (str): The path of the audio that will be transcribed
    
    Returns:
    str: The transcribed text
    '''
    try:
        # Load the model
        model = whisper.load_model('medium',device='cuda') #Differnt models can be used here i.e 'base', 'small', 'medium', 'large'
        result = model.transcribe(audio_path) #Transcribing the audio
        transcribed_text = result["text"] #Getting the text from the result
        return transcribed_text

    except Exception as e:
        print(f"Error during transcription: {e}")
        return ""
    

def format_transcription(text, words_per_line=20):
    '''
    This function is used to format the transcribed text.
    
    Parameters:
    text (str): The text that will be formatted
    words_per_line (int): The number of words per line
    
    Returns:
    str: The formatted text
    '''
    words = text.split()
    formatted_text = "" 
    for i in range(0, len(words), words_per_line): #
        formatted_text += ' '.join(words[i:i+words_per_line]) + "\n"
    return formatted_text

audio_file_path = "./audios/audio.mp3"
transcription = transcribe_with_whisper(audio_file_path)
formatted_transcription = format_transcription(transcription)
print("Transcription Result:\n")
print("--------------------------------------------------")
print(formatted_transcription)
print("--------------------------------------------------")
