<a href="https://colab.research.google.com/drive/16sLs1fJ7inP1wKw90zgk7Q_88N4sFU1v?usp=sharing" target="_parent">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# https://github.com/martinopiaggi/summarize

In [None]:
# @markdown ## 🔗 **Source Configuration**

# @markdown **Source Type**
Type_of_source = "YouTube Video"  # @param ["YouTube Video", "Google Drive Video Link", "Dropbox Video Link", "Local File"]

# @markdown **Source URL or Path**
Source = ""  # @param {type:"string"}

# Set variables based on user input
Type = Type_of_source
URL = Source

# @markdown **Use YouTube Captions**
# @markdown If source is a Youtube video, it's recommended to use the available YouTube captions 
# @markdown to save on transcription time and API usage.
use_Youtube_captions = True  # @param {type:"boolean"}

# @markdown ---
# @markdown ## 🌐 **API Configuration**
# @markdown The summarization process uses the API key specified in `api_key` variable. 
# @markdown Ensure you have set the required environment variables or Colab secrets for your API keys.

api_endpoint = "Groq"  # @param ["Groq", "OpenAI", "Custom"]

# @markdown ---
# @markdown ## 🎤 **Transcription Settings**
# @markdown These settings apply only if you want to use Whisper (Cloud or Local),
# @markdown otherwise YouTube captions (if available) will be used.

transcription_method = "Cloud Whisper"  # @param ["Cloud Whisper", "Local Whisper"]
language = "auto"  # @param {type:"string"}
initial_prompt = ""  # @param {type:"string"}

## Install Dependencies and Set Up Environment

In [None]:
# For Google Drive integration (if needed)
if Type == "Google Drive Video Link":
    from google.colab import drive
    drive.mount('/content/drive')

# Install the package and all dependencies
!pip install --quiet git+https://github.com/martinopiaggi/summarize.git

# Import required modules
import os
from dotenv import load_dotenv
from summarizer import main, CONFIG

## Configure Summarization Settings

In [None]:
# @markdown Prompt type to use from prompts.json
prompt_type = "Questions and answers"  # @param ['Summarization', 'Only grammar correction with highlights','Distill Wisdom', 'Questions and answers', 'Essay Writing in Paul Graham Style']
parallel_api_calls = 30  # @param
chunk_size = 10000      # @param
overlap_size = 20       # @param
max_output_tokens = 4096  # @param

In [None]:
# Configure settings
CONFIG.update({
    "type_of_source": Type,
    "source_url_or_path": URL,
    "use_youtube_captions": use_Youtube_captions,
    "api_endpoint": api_endpoint,
    "transcription_method": transcription_method,
    "language": language,
    "initial_prompt": initial_prompt,
    "prompt_type": prompt_type,
    "parallel_api_calls": parallel_api_calls,
    "chunk_size": chunk_size,
    "overlap_size": overlap_size,
    "max_output_tokens": max_output_tokens
})

# Run summarization
final_summary = main(CONFIG)
print(final_summary)

# Save to file
with open("final_summary.md", "w", encoding="utf-8") as f:
    f.write(final_summary)

### Clean folder (optional)

Use with caution
`!rm *.md`

In [None]:
# !rm *.md