# Transcription and summarization notebook with AIs

---



Repository: https://github.com/martinopiaggi/summarize

In [10]:
Source = "https://www.youtube.com/watch?v=qFZQPvdL5fQ" #@param {type:"string"}
Type_of_source = "Youtube video or playlist" #@param ['Youtube video or playlist', 'Google Drive video link','Dropbox video link']
Type = Type_of_source
URL = Source

#@markdown ---
#@markdown Insert your API key depending on which endpoint you want to use (by default Groq, otherwise check OpenAI to use chatGpt)
api_key = "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" #@param {type:"string"}

OpenAI_endpoint = True  #@param {type:"boolean"}
use_Youtube_captions = True #@param {type:"boolean"}



In [2]:
%%capture
#@markdown ## Installation of libraries
#@markdown Installation of libraries

import subprocess
import re

import torch
from torch.utils.data import Dataset, DataLoader

if use_Youtube_captions:
  !pip install youtube-transcript-api
  from youtube_transcript_api import YouTubeTranscriptApi

if (not Type == "Youtube video or playlist") or (not use_Youtube_captions):
  !pip install faster-whisper
  from faster_whisper import WhisperModel

if OpenAI_endpoint:
  !pip install openai
  import openai
  client = openai.OpenAI(api_key=api_key)
else:
  !pip install groq
  from groq import Groq
  client = Groq(api_key=api_key)

if Type == "Youtube video or playlist":
  !pip install git+https://github.com/pytube/pytube
  from pytube import YouTube

if Type == "Google Drive video link":
  from google.colab import drive
  drive.mount('/gdrive')

if Type == ("Dropbox video link"):
  !sudo apt update && sudo apt install ffmpeg


In [11]:
#@markdown ## Video downloads
#@markdown Downloading video sources
video_path_local_list = []
skip_transcription=False

Text = ""
TextTimestamps = ""

def seconds_to_time_format(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"


def download_youtube_audio_only(url):
    yt = YouTube(url)
    audio_stream = yt.streams.get_audio_only()
    saved_path = audio_stream.download(output_path=".", skip_existing=True)
    return saved_path


def download_youtube_captions(url):
    regex = r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})'
    video_id =  re.search(regex, url).group(1)
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

    try:
      transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
    except:
      for available_transcript in transcript_list:
        if available_transcript.is_translatable:
          transcript = available_transcript.translate('en').fetch()
          break

    text = ""
    for entry in transcript:
            start_time = seconds_to_time_format(entry['start'])
            text += f"{start_time} {entry['text'].strip()}\n"

    transcript_file_name = f"{video_id}_captions.md"

    with open(transcript_file_name, 'w', encoding='utf-8') as f:
      f.write(Text)

    return text,transcript_file_name

if Type == "Youtube video or playlist":
    if use_Youtube_captions:
      Text, transcript_file_name = download_youtube_captions(URL)
      skip_transcription=True
    else:
      video_path_local_list.append(download_youtube_audio_only(URL))

elif Type == "Google Drive video link":
  subprocess.run(['ffmpeg', '-y', '-i', '/gdrive/My Drive/' + URL, '-vn', '-acodec', 'pcm_s16le',
                  '-ar', '16000', '-ac', '1', 'gdrive_audio.wav'], check=True)
  video_path_local_list.append("gdrive_audio.wav")

elif Type == "Dropbox video link":
    subprocess.run(['wget', '-O', 'dropbox_video.mp4', UnprocessableEntityError], check=True)
    subprocess.run(['ffmpeg', '-y', '-i', 'dropbox_video.mp4', '-vn', '-acodec', 'pcm_s16le',
                    '-ar', '16000', '-ac', '1', 'dropbox_video_audio.wav'], check=True)
    video_path_local_list.append("dropbox_video_audio.wav")


In [4]:
%%capture
# @markdown ## Transcription using Faster Whisper
# @markdown Manually specifying the language can increase speed.

if not skip_transcription:

  language = "en" # @param {type:"string"}
  # @markdown An initial prompt with specific context-aware words and names can improve accuracy.

  initial_prompt = "" # @param {type:"string"}

  video_path_local = str(video_path_local_list[0])


  model = WhisperModel('small', device="cuda", compute_type='int8')
  segments, info = model.transcribe(str(video_path_local), beam_size=5,
                                    language=None if language == "auto" else language,
                                    task="translate",
                                    initial_prompt=initial_prompt)

  transcript_file_name = video_path_local.replace(".mp4", ".md")

  with open(transcript_file_name, 'w') as f:
    for segment in segments:
      start_time = seconds_to_time_format(segment.start)
      Text += f"[{start_time}] {segment.text.strip()} "

    f.write(Text)


In [13]:
# @markdown ## Summarization and elaboration

prompt_type = "Summarization"  # @param ['Summarization', 'Only grammar correction with highlights']

# Define your prompts using a dictionary for easier management
prompts = {
    'Summarization': """Summarize the video transcript excerpt including a concise title that reflects the content. Wrap the title with **markdown bold notation**. Write the summary as if you are continuing a conversation without needing to signal a beginning. Here is the transcript: """,
    'Only grammar correction with highlights': """Repeat the following text correcting any grammatical errors and formatting error. Highlight only the important quote (if there are any) with **markdown bold notation**. Focus solely on the essence of the content as if you are continuing a conversation without using any form of introduction like 'Here's the corrected text:'. Here is the text to fix: """
}

# Select the appropriate prompt
summary_prompt = prompts[prompt_type]

def summarize(prompt):
    if OpenAI_endpoint:
      model="gpt-3.5-turbo"
    else:
      model="mixtral-8x7b-32768"
    completion = client.chat.completions.create(
            model=model,
            messages=[
            {"role": "system", "content": summary_prompt},
            {"role": "user", "content": prompt}
            ],
            max_tokens=4096
    )
    return completion.choices[0].message.content

def extract_timestamp_ranges(timestamp_chunks):
    timestamp_pattern = re.compile(r'(\d{2}:\d{2}:\d{2})')
    return [f"{matches[0]}" for chunk in timestamp_chunks if (matches := timestamp_pattern.findall(chunk))]

def format_timestamp_link(timestamp):
    """Formats a markdown link to a YouTube video with a specific start time."""
    hours, minutes, seconds = map(int, timestamp.split(':'))
    total_seconds = hours * 3600 + minutes * 60 + seconds
    return f"{timestamp} - {URL}&t={total_seconds}"

def process_and_summarize(text):
    chunk_size, overlap_size = 4096, 40
    texts = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - overlap_size)]
    timestamp_ranges = extract_timestamp_ranges(texts)
    summary = []

    for idx, text_chunk in enumerate(texts):
        summarized_chunk = summarize(text_chunk)
        summary_piece = format_timestamp_link(timestamp_ranges[idx]) + " " + summarized_chunk
        summary_piece += "\n"
        summary.append(summary_piece)

    # Save the final summary
    final_name =  transcript_file_name.replace(".md", "_FINAL.md") if Type != "Dropbox video link" else "final_dropbox_video.md"
    with open(final_name, 'w') as f:
        f.write("\n\n".join(summary))

process_and_summarize(Text)