
https://github.com/martinopiaggi/summarize

In [None]:
# @markdown ## 🔗 **Source Configuration**

# @markdown **Source Type**
Type_of_source = "YouTube Video"  # @param ["YouTube Video", "Google Drive Video Link", "Dropbox Video Link", "Local File"]

# @markdown **Source URL or Path**
Source = "https://www.youtube.com/watch?v=z5W74QC3v2I"  # @param {type:"string"}

# Set variables based on user input
Type = Type_of_source
URL = Source

# @markdown **Use YouTube Captions**
# @markdown If source is a Youtube video, it's recommended to use the available YouTube captions
# @markdown to save on transcription time and API usage.
use_Youtube_captions = True  # @param {type:"boolean"}

# @markdown ---
# @markdown ## 🌐 **API Configuration**
# @markdown The summarization process uses the API key specified in `api_key` variable.
# @markdown Ensure you have set the required environment variables or Colab secrets for your API keys.

api_endpoint = "OpenAI"  # @param ["Groq", "OpenAI", "Custom"]

# Define endpoints and models based on the selected API
endpoints = {
    "Groq": "https://api.groq.com/openai/v1",
    "OpenAI": "https://api.openai.com/v1",
    "Custom": "https://api.deepseek.com/v1"  # Default custom endpoint
}
base_url = endpoints.get(api_endpoint)

# Define models based on the selected API
models = {
    "Groq": "llama-3.3-70b-versatile",
    "OpenAI": "gpt-4o",
    "Custom": "deepseek-chat"  # Default custom model
}
model = models.get(api_endpoint)

# @markdown ---
# @markdown ## 🎤 **Transcription Settings**
# @markdown The transcription settings are applied only if you want to use Whisper transcription and not Youtube Captions.

# @markdown If you plan to use Whisper API endpoint (only **Groq** endpoint is supported for now) you have to specify your Groq API key in `api_key_groq`.
# @markdown If using locally Whisper: remember to switch the runtime type to a GPU instance (e.g., T4). Go to **Runtime** > **Change runtime type** and select **GPU** as the hardware accelerator.

transcription_method = "Cloud Whisper"  # @param ["Cloud Whisper", "Local Whisper"]
language = "auto"  # @param {type:"string"}
initial_prompt = ""  # @param {type:"string"}

In [None]:
# @markdown ## 🛠️ Install Dependencies and Set Up Environment

!pip install nest_asyncio
!pip install git+https://github.com/martinopiaggi/summarize.git@feature/refactor-backend

# Import and setup
import os
import nest_asyncio
from dotenv import load_dotenv
from summarizer import main, CONFIG
import asyncio

# Apply nest_asyncio for Colab compatibility
nest_asyncio.apply()

# Install source-specific dependencies
if Type == "Google Drive Video Link":
    from google.colab import drive
    drive.mount('/content/drive')
elif Type == "Local File":
    from google.colab import files


def get_api_key():
    if api_endpoint == "Groq":
      return get_groq_api_key()
    try:
        from google.colab import userdata
        api_key = userdata.get('api_key')
    except ImportError:
        load_dotenv()
        api_key = os.getenv('api_key')

    if not api_key:
        raise ValueError("API key not found in environment variables or Colab secrets")

    return api_key

def get_groq_api_key():
    try:
        from google.colab import userdata
        groq_api_key = userdata.get('api_key_groq')
    except ImportError:
        load_dotenv()
        groq_api_key = os.getenv('api_key_groq')

    if not groq_api_key:
        raise ValueError("Groq API key not found in environment variables or Colab secrets")

    return groq_api_key

api_key = get_api_key()
groq_api_key = get_groq_api_key()

# This lines so that we are sure that are set on Google Colab
os.environ['api_key'] = api_key
os.environ['api_key_groq'] = groq_api_key

In [None]:
# @markdown ## ⚙️ Configure Summarization Settings
prompt_type = "Questions and answers"  # @param ['Summarization', 'Only grammar correction with highlights','Distill Wisdom', 'Questions and answers', 'Essay Writing in Paul Graham Style']
parallel_api_calls = 30  # @param {type:"slider", min:1, max:60, step:1}
chunk_size = 18000      # @param {type:"slider", min:2000, max:28000, step:2000}
max_output_tokens = 4096  # @param {type:"slider", min:1024, max:8192, step:1024}

In [None]:
# @markdown ## 🚀 Run Summarization

if Type == "Local File" and not URL:
    print("📁 Please upload your video file...")
    uploaded = files.upload()
    if uploaded:
        URL = list(uploaded.keys())[0]
        print(f"✅ Using uploaded file: {URL}")

# Configure settings
CONFIG.update({
    "type_of_source": Type,
    "source_url_or_path": URL,
    "use_youtube_captions": use_Youtube_captions if Type == "YouTube Video" else False,
    "transcription_method": transcription_method,
    "language": language,
    "initial_prompt": initial_prompt,
    "prompt_type": prompt_type,
    "parallel_api_calls": parallel_api_calls,
    "chunk_size": chunk_size,
    "max_output_tokens": max_output_tokens,
    "base_url": base_url,
    "model": model
})

try:
    print("\n🎬 Starting summarization...\n")
    final_summary = main(CONFIG)

    # Save to file with metadata
    filename = "summary.md"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"# Summary for: {URL}\n\n")
        f.write(f"Generated using: {model} at {base_url}\n\n")
        f.write(final_summary)
    print(f"\n💾 Summary saved to {filename}")

except Exception as e:
    print(f"\n❌ Error: {str(e)}")
    if "api_key" in str(e).lower():
        print("\n⚠️ Make sure you've set up your API keys in the configuration cell above!")
    elif "ffmpeg" in str(e).lower():
        print("\n⚠️ Installing ffmpeg...")
        !apt-get update && apt-get install -y ffmpeg
        print("Please run the cell again.")