In [13]:
!pip install -q openai youtube-transcript-api retry

## Extract Video Transcription from YouTube

In [5]:
from youtube_transcript_api import YouTubeTranscriptApi
def get_transcript(youtube_video_id):
    transcript = YouTubeTranscriptApi.get_transcript(youtube_video_id)
    print(transcript)

In [9]:
get_transcript("bSRMmpSTJ8E")



## Extract Video Transcription using whisper

- [whisper](https://openai.com/index/whisper/)

In [14]:
!pip install -q moviepy openai-whisper

In [15]:
!pip install -q torch torchvision torchaudio --upgrade


In [4]:
import os
import moviepy.editor as mp
import whisper
import torch

# Set the environment variable to disable the upper limit for memory allocations
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

def extract_audio_from_video(video_path, audio_output_path):
    """
    Extracts audio from a video file and saves it to an output path.
    """
    video = mp.VideoFileClip(video_path)
    video.audio.write_audiofile(audio_output_path)

def mps_available():
    """
    Returns the appropriate device for Whisper model.
    Uses MPS for Apple Silicon if available, otherwise falls back to CPU.
    """
    if torch.backends.mps.is_available() and torch.backends.mps.is_built():
        torch.mps.empty_cache()  # Clear cache
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    return device

def transcribe_audio(audio_path, model_name='base'):
    """
    Transcribes audio using Whisper.
    """
    device = mps_available()
    print(f"Loading model using: {device}")
    
    model = None
    try:
        model = whisper.load_model(model_name, device=device)
        print("Model loading complete")
    except Exception as e:
        print(f"Error: {e}")
        print("Falling back to CPU...")
        # Load model on CPU if there's an issue with MPS
        model = whisper.load_model(model_name, device="cpu")
    
    print("Transcribing...")
    result = model.transcribe(audio_path)
    return result['text']

def transcribe_video(video_path, audio_output_path='output_audio.wav', model_name='large'):
    """
    Extracts audio from a video and then transcribes it.
    """
    extract_audio_from_video(video_path, audio_output_path)
    print("Audio extraction complete, transcription in progress...")
    transcription = transcribe_audio(audio_output_path, model_name)
    print("Transcription completed")
    return transcription

def write_transcripts(video_path, transcript_file_path):
    """
    Writes transcripts to a text file.
    """
    transcription = transcribe_video(video_path)

    try:
        with open(transcript_file_path, 'w') as file:
            print("Writing to file...")
            file.write(transcription)
        print(f"Successfully written text to {transcript_file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    video_path = '/Users/tekrajchhetri/Downloads/video.mp4'
    transcript_file_path = "GMT20240604-154609_Recording_large.txt"
    write_transcripts(video_path, transcript_file_path)


MoviePy - Writing audio in output_audio.wav


                                                                                                       

MoviePy - Done.
Audio extraction complete, transcription in progress...
Loading model using: mps
Error: Could not run 'aten::empty.memory_format' with arguments from the 'SparseMPS' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::empty.memory_format' is only available for these backends: [CPU, MPS, Meta, QuantizedCPU, QuantizedMeta, MkldnnCPU, SparseCPU, SparseMeta, SparseCsrCPU, SparseCsrMeta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, 

In [7]:
import torch

In [9]:
x = torch.ones(5, device=torch.device("mps"))
x

tensor([1., 1., 1., 1., 1.], device='mps:0')