In [None]:
!pip install gradio yt-dlp librosa soundfile huggingsound transformers torch

import gradio as gr
import yt_dlp
import librosa
import soundfile as sf
from huggingsound import SpeechRecognitionModel
from transformers import pipeline
import torch

def process_video(url, progress=gr.Progress()):
    try:
        # Download YouTube video
        progress(0, desc="Downloading video")
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        progress(0.2, desc="Converting to WAV")
        # Convert to WAV
        !ffmpeg -i *.mp3 -acodec pcm_s16le -ar 16000 output.wav

        progress(0.4, desc="Transcribing audio")
        # Transcribe audio
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english", device=device)
        input_file = 'output.wav'
        stream = librosa.stream(
            input_file,
            block_length=30,
            frame_length=16000,
            hop_length=16000
        )
        audio_paths = []
        for i, speech in enumerate(stream):
            sf.write(f'{i}.wav', speech, 16000)
            audio_paths.append(f'{i}.wav')

        transcriptions = model.transcribe(audio_paths)
        full_transcript = ' '.join(item['transcription'] for item in transcriptions)

        progress(0.8, desc="Summarizing text")
        # Summarize text
        summarization = pipeline('summarization')
        num_iters = int(len(full_transcript)/1000)
        summarized_text = []
        for i in range(0, num_iters + 1):
            start = i * 1000
            end = (i + 1) * 1000
            out = summarization(full_transcript[start:end], min_length=5, max_length=20)
            summarized_text.append(out[0]['summary_text'])

        progress(1.0, desc="Done")
        return '\n'.join(summarized_text)
    except Exception as e:
        return f"Error: {str(e)}"

iface = gr.Interface(
    fn=process_video,
    inputs=gr.Textbox(label="YouTube URL"),
    outputs=gr.Textbox(label="Summary"),
    title="YouTube Video Summarizer",
    description="Enter a YouTube URL to get a summary of the video's content."
)

iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://b11cef05cb3b62303a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


