In [1]:
from __future__ import unicode_literals
import speech_recognition as sr
import yt_dlp
import ffmpeg
import sys
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
video_url = input("Enter the video link: ")
file_name = "output"

In [11]:
def download_from_url(url: str, output_file: str):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_file,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        stream = ffmpeg.input(output_file)
        stream = ffmpeg.output(stream, output_file.replace('.m4a', '.wav'))

In [12]:
download_from_url(video_url, file_name)


[youtube] Extracting URL: https://www.youtube.com/watch?v=vdIRqXd7K3k
[youtube] vdIRqXd7K3k: Downloading webpage
[youtube] vdIRqXd7K3k: Downloading android player API JSON
[info] vdIRqXd7K3k: Downloading 1 format(s): 251
[download] Destination: output
[download] 100% of    3.73MiB in 00:02:22 at 26.90KiB/s  
[ExtractAudio] Destination: output.wav
Deleting original file output (pass -k to keep)


In [13]:
r = sr.Recognizer()

In [14]:
video = sr.AudioFile(file_name + ".wav")

In [15]:
with video as source:
    audio = r.record(source)

In [16]:
result = r.recognize_whisper(audio)

In [17]:
with open('transcript.txt', 'w') as f:
    f.write(result)

In [18]:
with open('transcript.txt', 'r') as f:
    text = f.read()

In [19]:
tokenizer = AutoTokenizer.from_pretrained("philschmid/bart-large-cnn-samsum",model_max_length=1024)

model = AutoModelForSeq2SeqLM.from_pretrained(
    "philschmid/bart-large-cnn-samsum")


In [20]:
summarizer = pipeline(
    "summarization", model=model, tokenizer=tokenizer, framework="pt")


# break the text into chunks of 1024 characters
chunks = [text[i:i+1024] for i in range(0, len(text), 1024)]

# summarize the chunks
summary = []
for chunk in chunks:
    summary.append(summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text'])

# join the chunks
summary = ''.join(summary)

# save the summary to a text file
with open('summary.txt', 'w') as f:
    f.write(summary)

Your max_length is set to 150, but you input_length is only 128. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


In [21]:
# convert the summary to audio and save it
from gtts import gTTS

tts = gTTS(summary)

tts.save("summary.mp3")

: 

Next Step : https://colab.research.google.com/drive/1wVVqUPqwiDBUVeWWOUNglpGhU3hg_cbR?usp=sharing#scrollTo=JrK20I32grP6