In [37]:
import speech_recognition as sr
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import YouTube
import youtube_dl
from pydub import AudioSegment
import copy
import os
from transformers import pipeline
import math

In [42]:
def extract_cc(url):
    """
    Extract closed caption using built-in API and joining lines
    
    """
    transcript = YouTubeTranscriptApi.get_transcript(url[17:])
    return ' '.join([i['text'] for i in transcript])

def audio_extraction(url):
    """
    Extract audio from youtube videos to be processed for speech recognition
    
    """
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    title = YouTube(url).title
    print(title)
    urls_list = copy.copy(url)
    print(urls_list)
    u = urls_list.split('/')[-1]
    return title+"-" +u+".mp3"
    
def speech_recognition(audio_file):
    """
    Transcribe extracted youtube video audio file
    
    """
    r = sr.Recognizer()
    clip = sr.AudioFile(audio_file)
    with clip as source:
        r.adjust_for_ambient_noise(source)
        audio = r.record(source)
    return r.recognize_google(audio)
    
def extract_transcription(url):
    """
    Convert Youtube mp3 to wav to be transcribed
    
    """
    input_file = audio_extraction(url)
    output_file = "result.wav"
    sound = AudioSegment.from_mp3(input_file)
    sound.export(output_file, format="wav")
    transcript = speech_recognition(output_file)
    os.remove(output_file)
    os.remove(input_file)
    return transcript

def summarizer(url, model = 'transformers'):
    
    try:
        text = extract_cc(url)
    except:
        text = extract_transcription(url)
    
    n = int(math.log(len(text.split())))*120
    
    summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="tf")
    summary_text = summarizer(text, max_length=n, min_length=5, do_sample=False)[0]['summary_text']
    return summary_text


In [43]:
# text = extract_cc("https://youtu.be/DfzK4zJShZM")
summarizer("https://youtu.be/DfzK4zJShZM")

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
Token indices sequence length is longer than the specified maximum sequence length for this model (1699 > 512). Running this sequence through the model will result in indexing errors


"ukraine says it has liberated the area surrounding kiev as russia's forces retreat . ukrainian president vladimir zelinski warns this could be the start of a russian offensive to seize the east and south of the country . disturbing reports of civilian deaths are being reported in kievan and chernhiv . andre soldatov is an investigative journalist in vienna who is also an expert on russia ."